Message ID | f62608e88f125096c1f236a47e3ee670104c7b78.1628618950.git.jonathantanmy@google.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | In grep, no adding submodule ODB as alternates | expand |
On Tue, Aug 10, 2021 at 11:28:43AM -0700, Jonathan Tan wrote: > diff --git a/builtin/grep.c b/builtin/grep.c > index 9e61c7c993..5a40e18e47 100644 > --- a/builtin/grep.c > +++ b/builtin/grep.c > @@ -65,6 +65,9 @@ static int todo_done; > /* Has all work items been added? */ > static int all_work_added; > > +static struct repository **repos_to_free; > +static size_t repos_to_free_nr, repos_to_free_alloc; One thing I was curious about was whether it would make more sense to use an existing utility library in Git to handle this. But I think we kind of are, since ALLOC_GROW is used, so this is idiomatic for Git project. Ok, fine, I guess this is life at C ;) > + > /* This lock protects all the variables above. */ > static pthread_mutex_t grep_mutex; > > @@ -168,6 +171,17 @@ static void work_done(struct work_item *w) > grep_unlock(); > } > > +static void free_repos(void) > +{ > + int i; > + > + for (i = 0; i < repos_to_free_nr; i++) { > + repo_clear(repos_to_free[i]); > + free(repos_to_free[i]); > + } > + free(repos_to_free); > +} Should repos_to_free_nr be reset here? I guess it doesn't make sense to, since we'd be trying to use-after-free the initial repos_to_free head pointer too if we wanted to reuse this array. > + > static void *run(void *arg) > { > int hit = 0; > @@ -415,19 +429,24 @@ static int grep_submodule(struct grep_opt *opt, > const struct object_id *oid, > const char *filename, const char *path, int cached) > { > - struct repository subrepo; > + struct repository *subrepo; > struct repository *superproject = opt->repo; > const struct submodule *sub; > struct grep_opt subopt; > - int hit; > + int hit = 0; > > sub = submodule_from_path(superproject, null_oid(), path); > > if (!is_submodule_active(superproject, path)) > return 0; > > - if (repo_submodule_init(&subrepo, superproject, sub)) > + subrepo = xmalloc(sizeof(*subrepo)); > + if (repo_submodule_init(subrepo, superproject, sub)) { > + free(subrepo); > return 0; > + } > + ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc); > + repos_to_free[repos_to_free_nr++] = subrepo; Is this the only place we add to the repos_to_free array? It looks like yes, so I guess this doesn't need a helper. > > /* > * NEEDSWORK: repo_read_gitmodules() might call > @@ -438,7 +457,7 @@ static int grep_submodule(struct grep_opt *opt, > * subrepo's odbs to the in-memory alternates list. > */ > obj_read_lock(); > - repo_read_gitmodules(&subrepo, 0); > + repo_read_gitmodules(subrepo, 0); > > /* > * NEEDSWORK: This adds the submodule's object directory to the list of > @@ -450,11 +469,11 @@ static int grep_submodule(struct grep_opt *opt, > * store is no longer global and instead is a member of the repository > * object. > */ > - add_submodule_odb_by_path(subrepo.objects->odb->path); > + add_submodule_odb_by_path(subrepo->objects->odb->path); > obj_read_unlock(); > > memcpy(&subopt, opt, sizeof(subopt)); > - subopt.repo = &subrepo; > + subopt.repo = subrepo; > > if (oid) { > enum object_type object_type; > @@ -464,9 +483,9 @@ static int grep_submodule(struct grep_opt *opt, > struct strbuf base = STRBUF_INIT; > > obj_read_lock(); > - object_type = oid_object_info(&subrepo, oid, NULL); > + object_type = oid_object_info(subrepo, oid, NULL); > obj_read_unlock(); > - data = read_object_with_reference(&subrepo, > + data = read_object_with_reference(subrepo, > oid, tree_type, > &size, NULL); > if (!data) > @@ -484,7 +503,6 @@ static int grep_submodule(struct grep_opt *opt, > hit = grep_cache(&subopt, pathspec, cached); > } > > - repo_clear(&subrepo); > return hit; > } > > @@ -1182,5 +1200,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) > run_pager(&opt, prefix); > clear_pathspec(&pathspec); > free_grep_patterns(&opt); > + free_repos(); > return !hit; > } The rest of the diff seems to be a pretty clear object-becomes-reference conversion, so... Reviewed-by: Emily Shaffer <emilyshaffer@google.com>
> > +static struct repository **repos_to_free; > > +static size_t repos_to_free_nr, repos_to_free_alloc; > > One thing I was curious about was whether it would make more sense to > use an existing utility library in Git to handle this. But I think we > kind of are, since ALLOC_GROW is used, so this is idiomatic for Git > project. Ok, fine, I guess this is life at C ;) Yeah, this *is* the utility library :-) > > +static void free_repos(void) > > +{ > > + int i; > > + > > + for (i = 0; i < repos_to_free_nr; i++) { > > + repo_clear(repos_to_free[i]); > > + free(repos_to_free[i]); > > + } > > + free(repos_to_free); > > +} > > Should repos_to_free_nr be reset here? I guess it doesn't make sense to, > since we'd be trying to use-after-free the initial repos_to_free head > pointer too if we wanted to reuse this array. Hmm...I guess that if I'm going through the trouble of clearing the repos instead of just letting all the memory be collected at the end of a process, I should also reset _nr and _alloc. I'll make that change.
diff --git a/builtin/grep.c b/builtin/grep.c index 9e61c7c993..5a40e18e47 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -65,6 +65,9 @@ static int todo_done; /* Has all work items been added? */ static int all_work_added; +static struct repository **repos_to_free; +static size_t repos_to_free_nr, repos_to_free_alloc; + /* This lock protects all the variables above. */ static pthread_mutex_t grep_mutex; @@ -168,6 +171,17 @@ static void work_done(struct work_item *w) grep_unlock(); } +static void free_repos(void) +{ + int i; + + for (i = 0; i < repos_to_free_nr; i++) { + repo_clear(repos_to_free[i]); + free(repos_to_free[i]); + } + free(repos_to_free); +} + static void *run(void *arg) { int hit = 0; @@ -415,19 +429,24 @@ static int grep_submodule(struct grep_opt *opt, const struct object_id *oid, const char *filename, const char *path, int cached) { - struct repository subrepo; + struct repository *subrepo; struct repository *superproject = opt->repo; const struct submodule *sub; struct grep_opt subopt; - int hit; + int hit = 0; sub = submodule_from_path(superproject, null_oid(), path); if (!is_submodule_active(superproject, path)) return 0; - if (repo_submodule_init(&subrepo, superproject, sub)) + subrepo = xmalloc(sizeof(*subrepo)); + if (repo_submodule_init(subrepo, superproject, sub)) { + free(subrepo); return 0; + } + ALLOC_GROW(repos_to_free, repos_to_free_nr + 1, repos_to_free_alloc); + repos_to_free[repos_to_free_nr++] = subrepo; /* * NEEDSWORK: repo_read_gitmodules() might call @@ -438,7 +457,7 @@ static int grep_submodule(struct grep_opt *opt, * subrepo's odbs to the in-memory alternates list. */ obj_read_lock(); - repo_read_gitmodules(&subrepo, 0); + repo_read_gitmodules(subrepo, 0); /* * NEEDSWORK: This adds the submodule's object directory to the list of @@ -450,11 +469,11 @@ static int grep_submodule(struct grep_opt *opt, * store is no longer global and instead is a member of the repository * object. */ - add_submodule_odb_by_path(subrepo.objects->odb->path); + add_submodule_odb_by_path(subrepo->objects->odb->path); obj_read_unlock(); memcpy(&subopt, opt, sizeof(subopt)); - subopt.repo = &subrepo; + subopt.repo = subrepo; if (oid) { enum object_type object_type; @@ -464,9 +483,9 @@ static int grep_submodule(struct grep_opt *opt, struct strbuf base = STRBUF_INIT; obj_read_lock(); - object_type = oid_object_info(&subrepo, oid, NULL); + object_type = oid_object_info(subrepo, oid, NULL); obj_read_unlock(); - data = read_object_with_reference(&subrepo, + data = read_object_with_reference(subrepo, oid, tree_type, &size, NULL); if (!data) @@ -484,7 +503,6 @@ static int grep_submodule(struct grep_opt *opt, hit = grep_cache(&subopt, pathspec, cached); } - repo_clear(&subrepo); return hit; } @@ -1182,5 +1200,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) run_pager(&opt, prefix); clear_pathspec(&pathspec); free_grep_patterns(&opt); + free_repos(); return !hit; }
Currently, struct repository objects corresponding to submodules are allocated on the stack in grep_submodule(). This currently works because they will not be used once grep_submodule() exits, but a subsequent patch will require these structs to be accessible for longer (perhaps even in another thread). Allocate them on the heap and clear them only at the very end. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> --- builtin/grep.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-)