Message ID | 20240322000304.76810-4-thalia@archibald.dev (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | fast-import: tighten parsing of paths | expand |
On Fri, Mar 22, 2024 at 12:03:33AM +0000, Thalia Archibald wrote: > These strbufs are owned. Release them at the end of their scopes. > > Signed-off-by: Thalia Archibald <thalia@archibald.dev> > --- > builtin/fast-import.c | 29 ++++++++++++++++++----------- > 1 file changed, 18 insertions(+), 11 deletions(-) > > diff --git a/builtin/fast-import.c b/builtin/fast-import.c > index 1b3d6784c1..d6f998f363 100644 > --- a/builtin/fast-import.c > +++ b/builtin/fast-import.c > @@ -2364,6 +2364,7 @@ static void file_change_m(const char *p, struct branch *b) > /* Git does not track empty, non-toplevel directories. */ > if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) { > tree_content_remove(&b->branch_tree, path.buf, NULL, 0); > + strbuf_release(&path); > return; > } Oh, now you get to my comment in the preceding patch. With this patch we're now in a somewhat weird in-between state where the buffers are still static, but we release their memory after each call. So we kind of get the worst of both worlds: static variables without being able to reuse the buffer's memory. If we were to change this then we should definitely mark the buffers as non-static. If so, it would be great to demonstrate that this does not significantly impact performance. The same is true for all the other instances. Patrick > @@ -2409,11 +2410,11 @@ static void file_change_m(const char *p, struct branch *b) > command_buf.buf); > } > > - if (!*path.buf) { > + if (*path.buf) > + tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL); > + else > tree_content_replace(&b->branch_tree, &oid, mode, NULL); > - return; > - } > - tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL); > + strbuf_release(&path); > } > > static void file_change_d(const char *p, struct branch *b) > @@ -2422,6 +2423,7 @@ static void file_change_d(const char *p, struct branch *b) > > parse_path_eol(&path, p, "path"); > tree_content_remove(&b->branch_tree, path.buf, NULL, 1); > + strbuf_release(&path); > } > > static void file_change_cr(const char *p, struct branch *b, int rename) > @@ -2440,17 +2442,18 @@ static void file_change_cr(const char *p, struct branch *b, int rename) > tree_content_get(&b->branch_tree, source.buf, &leaf, 1); > if (!leaf.versions[1].mode) > die("Path %s not in branch", source.buf); > - if (!*dest.buf) { /* C "path/to/subdir" "" */ > + if (*dest.buf) > + tree_content_set(&b->branch_tree, dest.buf, > + &leaf.versions[1].oid, > + leaf.versions[1].mode, > + leaf.tree); > + else /* C "path/to/subdir" "" */ > tree_content_replace(&b->branch_tree, > &leaf.versions[1].oid, > leaf.versions[1].mode, > leaf.tree); > - return; > - } > - tree_content_set(&b->branch_tree, dest.buf, > - &leaf.versions[1].oid, > - leaf.versions[1].mode, > - leaf.tree); > + strbuf_release(&source); > + strbuf_release(&dest); > } > > static void note_change_n(const char *p, struct branch *b, unsigned char *old_fanout) > @@ -2804,6 +2807,7 @@ static void parse_new_commit(const char *arg) > free(author); > free(committer); > free(encoding); > + strbuf_release(&msg); > > if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark)) > b->pack_id = pack_id; > @@ -2886,6 +2890,7 @@ static void parse_new_tag(const char *arg) > strbuf_addch(&new_data, '\n'); > strbuf_addbuf(&new_data, &msg); > free(tagger); > + strbuf_release(&msg); > > if (store_object(OBJ_TAG, &new_data, NULL, &t->oid, next_mark)) > t->pack_id = MAX_PACK_ID; > @@ -3171,6 +3176,7 @@ static void print_ls(int mode, const unsigned char *hash, const char *path) > strbuf_addch(&line, '\n'); > } > cat_blob_write(line.buf, line.len); > + strbuf_release(&line); > } > > static void parse_ls(const char *p, struct branch *b) > @@ -3206,6 +3212,7 @@ static void parse_ls(const char *p, struct branch *b) > release_tree_content_recursive(leaf.tree); > if (!b || root != &b->branch_tree) > release_tree_entry(root); > + strbuf_release(&path); > } > > static void checkpoint(void) > -- > 2.44.0 > > >
(Resending as plain text) On Mar 28, 2024, at 01:21, Patrick Steinhardt <ps@pks.im> wrote: > I was about to propose that we should likely also change all of these > static variables to be local instead. I don't think that we use the > variables after the function calls. But now that I see that we do it > like this in all of these helpers I think what's going on is that this > is a memory optimization to avoid reallocating buffers all the time. > > Ugly, but so be it. We could refactor the code to pass in scratch > buffers from the outside to remove those static variables. But that > certainly would be a bigger change and thus likely outside of the scope > of this patch series. > Oh, now you get to my comment in the preceding patch. With this patch > we're now in a somewhat weird in-between state where the buffers are > still static, but we release their memory after each call. So we kind of > get the worst of both worlds: static variables without being able to > reuse the buffer's memory. > > If we were to change this then we should definitely mark the buffers as > non-static. If so, it would be great to demonstrate that this does not > significantly impact performance. > > The same is true for all the other instances. I had glossed that they're `static`, since I've grown accustomed to Rust, where this sort of non-reentrant code is discouraged. However, this pattern is great for fast-import, because all of its data is simply freed when it exits at the end of the stream. I dropped this patch in v2. I don't think it's worth hoisting these `strbuf`s out. It would only reduce it from 5 to 2 total static `strbuf`s for paths, but would make ownership less clear. Thalia
diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 1b3d6784c1..d6f998f363 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2364,6 +2364,7 @@ static void file_change_m(const char *p, struct branch *b) /* Git does not track empty, non-toplevel directories. */ if (S_ISDIR(mode) && is_empty_tree_oid(&oid) && *path.buf) { tree_content_remove(&b->branch_tree, path.buf, NULL, 0); + strbuf_release(&path); return; } @@ -2409,11 +2410,11 @@ static void file_change_m(const char *p, struct branch *b) command_buf.buf); } - if (!*path.buf) { + if (*path.buf) + tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL); + else tree_content_replace(&b->branch_tree, &oid, mode, NULL); - return; - } - tree_content_set(&b->branch_tree, path.buf, &oid, mode, NULL); + strbuf_release(&path); } static void file_change_d(const char *p, struct branch *b) @@ -2422,6 +2423,7 @@ static void file_change_d(const char *p, struct branch *b) parse_path_eol(&path, p, "path"); tree_content_remove(&b->branch_tree, path.buf, NULL, 1); + strbuf_release(&path); } static void file_change_cr(const char *p, struct branch *b, int rename) @@ -2440,17 +2442,18 @@ static void file_change_cr(const char *p, struct branch *b, int rename) tree_content_get(&b->branch_tree, source.buf, &leaf, 1); if (!leaf.versions[1].mode) die("Path %s not in branch", source.buf); - if (!*dest.buf) { /* C "path/to/subdir" "" */ + if (*dest.buf) + tree_content_set(&b->branch_tree, dest.buf, + &leaf.versions[1].oid, + leaf.versions[1].mode, + leaf.tree); + else /* C "path/to/subdir" "" */ tree_content_replace(&b->branch_tree, &leaf.versions[1].oid, leaf.versions[1].mode, leaf.tree); - return; - } - tree_content_set(&b->branch_tree, dest.buf, - &leaf.versions[1].oid, - leaf.versions[1].mode, - leaf.tree); + strbuf_release(&source); + strbuf_release(&dest); } static void note_change_n(const char *p, struct branch *b, unsigned char *old_fanout) @@ -2804,6 +2807,7 @@ static void parse_new_commit(const char *arg) free(author); free(committer); free(encoding); + strbuf_release(&msg); if (!store_object(OBJ_COMMIT, &new_data, NULL, &b->oid, next_mark)) b->pack_id = pack_id; @@ -2886,6 +2890,7 @@ static void parse_new_tag(const char *arg) strbuf_addch(&new_data, '\n'); strbuf_addbuf(&new_data, &msg); free(tagger); + strbuf_release(&msg); if (store_object(OBJ_TAG, &new_data, NULL, &t->oid, next_mark)) t->pack_id = MAX_PACK_ID; @@ -3171,6 +3176,7 @@ static void print_ls(int mode, const unsigned char *hash, const char *path) strbuf_addch(&line, '\n'); } cat_blob_write(line.buf, line.len); + strbuf_release(&line); } static void parse_ls(const char *p, struct branch *b) @@ -3206,6 +3212,7 @@ static void parse_ls(const char *p, struct branch *b) release_tree_content_recursive(leaf.tree); if (!b || root != &b->branch_tree) release_tree_entry(root); + strbuf_release(&path); } static void checkpoint(void)
These strbufs are owned. Release them at the end of their scopes. Signed-off-by: Thalia Archibald <thalia@archibald.dev> --- builtin/fast-import.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-)