Message ID | e9c3f5ac5c6b7a01ee9e43730889d8066a270271.1667575142.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | rebase: stop setting GIT_REFLOG_ACTION | expand |
On Fri, Nov 04, 2022 at 03:19:01PM +0000, Phillip Wood via GitGitGadget wrote: > --- > sequencer.c | 45 +++++++++++++++++++++++++-------------------- > sequencer.h | 6 ++++++ > 2 files changed, 31 insertions(+), 20 deletions(-) The end result is much cleaner, thanks. Are there any spots left in the code that still expect to read GIT_REFLOG_ACTION from their environment? I did a quick look around and based on my understanding of the sequencer code this should all be OK. But some analysis in the patch message is appreciated. Thanks, Taylor
On Fri, Nov 04 2022, Phillip Wood via GitGitGadget wrote: > From: Phillip Wood <phillip.wood@dunelm.org.uk> > +static const char *sequencer_reflog_action(struct replay_opts *opts) > +{ > + if (!opts->reflog_action) { > + opts->reflog_action = getenv(GIT_REFLOG_ACTION); > + opts->reflog_action = > + xstrdup(opts->reflog_action ? opts->reflog_action > + : action_name(opts)); > + } > + > + return opts->reflog_action; > +} We always return an xstrdup'd here, so this should be a "char *" return value, not a "const char *"., but > __attribute__((format (printf, 3, 4))) > static const char *reflog_message(struct replay_opts *opts, > const char *sub_action, const char *fmt, ...) > { > va_list ap; > static struct strbuf buf = STRBUF_INIT; > - char *reflog_action = getenv(GIT_REFLOG_ACTION); > > va_start(ap, fmt); > strbuf_reset(&buf); Here we just reset the strbuf... > - strbuf_addstr(&buf, reflog_action ? reflog_action : action_name(opts)); > + strbuf_addstr(&buf, sequencer_reflog_action(opts)); Here we leak the freshly xstrdup'd value, mostly untested, but shouldn't this instead be: diff --git a/sequencer.c b/sequencer.c index e23f6f0b718..58a97e04c67 100644 --- a/sequencer.c +++ b/sequencer.c @@ -3695,10 +3695,11 @@ static const char *reflog_message(struct replay_opts *opts, { va_list ap; static struct strbuf buf = STRBUF_INIT; + char *msg = sequencer_reflog_action(opts); va_start(ap, fmt); strbuf_reset(&buf); - strbuf_addstr(&buf, sequencer_reflog_action(opts)); + strbuf_attach(&buf, msg, strlen(msg), strlen(msg) + 1); if (sub_action) strbuf_addf(&buf, " (%s)", sub_action); if (fmt) { Of course that requires dropping the "const", per the above... > if (sub_action) > strbuf_addf(&buf, " (%s)", sub_action); > if (fmt) { > @@ -4497,7 +4511,7 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts, > RESET_HEAD_RUN_POST_CHECKOUT_HOOK, > .head_msg = reflog_message(opts, "start", "checkout %s", > onto_name), > - .default_reflog_action = "rebase" > + .default_reflog_action = sequencer_reflog_action(opts) Here we'd before hand a fixed string to reset_head(), but now it's xstrdup()'d, but the corresponding free() on that side is missing. But aren't we always just returing "rebase" here still? > [...] > @@ -5116,7 +5121,7 @@ static int single_pick(struct repository *r, > TODO_PICK : TODO_REVERT; > item.commit = cmit; > > - setenv(GIT_REFLOG_ACTION, action_name(opts), 0); > + opts->reflog_message = sequencer_reflog_action(opts); > return do_pick_commit(r, &item, opts, 0, &check_todo); Here you're adding a new memory leak, which you can see if you run e.g. the 1st test of ./t1013-read-tree-submodule.sh before & after this change.
Hi Ævar On 07/11/2022 16:12, Ævar Arnfjörð Bjarmason wrote: > > On Fri, Nov 04 2022, Phillip Wood via GitGitGadget wrote: > >> From: Phillip Wood <phillip.wood@dunelm.org.uk> > >> +static const char *sequencer_reflog_action(struct replay_opts *opts) >> +{ >> + if (!opts->reflog_action) { >> + opts->reflog_action = getenv(GIT_REFLOG_ACTION); >> + opts->reflog_action = >> + xstrdup(opts->reflog_action ? opts->reflog_action >> + : action_name(opts)); >> + } >> + >> + return opts->reflog_action; >> +} > > We always return an xstrdup'd here, so this should be a "char *" return > value, not a "const char *"., but We always return the same string, it is only allocated on the first call. opt->reflog_action is freed at the end of the rebase in sequencer_remove_state(). >> __attribute__((format (printf, 3, 4))) >> static const char *reflog_message(struct replay_opts *opts, >> const char *sub_action, const char *fmt, ...) >> { >> va_list ap; >> static struct strbuf buf = STRBUF_INIT; >> - char *reflog_action = getenv(GIT_REFLOG_ACTION); >> >> va_start(ap, fmt); >> strbuf_reset(&buf); > > Here we just reset the strbuf... > >> - strbuf_addstr(&buf, reflog_action ? reflog_action : action_name(opts)); >> + strbuf_addstr(&buf, sequencer_reflog_action(opts)); > > Here we leak the freshly xstrdup'd value, mostly untested, but shouldn't > this instead be: > > diff --git a/sequencer.c b/sequencer.c > index e23f6f0b718..58a97e04c67 100644 > --- a/sequencer.c > +++ b/sequencer.c > @@ -3695,10 +3695,11 @@ static const char *reflog_message(struct replay_opts *opts, > { > va_list ap; > static struct strbuf buf = STRBUF_INIT; > + char *msg = sequencer_reflog_action(opts); > > va_start(ap, fmt); > strbuf_reset(&buf); > - strbuf_addstr(&buf, sequencer_reflog_action(opts)); > + strbuf_attach(&buf, msg, strlen(msg), strlen(msg) + 1); > if (sub_action) > strbuf_addf(&buf, " (%s)", sub_action); > if (fmt) { > > Of course that requires dropping the "const", per the above... > > >> if (sub_action) >> strbuf_addf(&buf, " (%s)", sub_action); >> if (fmt) { >> @@ -4497,7 +4511,7 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts, >> RESET_HEAD_RUN_POST_CHECKOUT_HOOK, >> .head_msg = reflog_message(opts, "start", "checkout %s", >> onto_name), >> - .default_reflog_action = "rebase" >> + .default_reflog_action = sequencer_reflog_action(opts) > > Here we'd before hand a fixed string to reset_head(), but now it's > xstrdup()'d, but the corresponding free() on that side is missing. > > But aren't we always just returing "rebase" here still? Yes but I think it makes sense to call squencer_reflog_action() in case this changes in the future. Note I don't think this is leaking anything. >> [...] >> @@ -5116,7 +5121,7 @@ static int single_pick(struct repository *r, >> TODO_PICK : TODO_REVERT; >> item.commit = cmit; >> >> - setenv(GIT_REFLOG_ACTION, action_name(opts), 0); >> + opts->reflog_message = sequencer_reflog_action(opts); >> return do_pick_commit(r, &item, opts, 0, &check_todo); > > Here you're adding a new memory leak, which you can see if you run > e.g. the 1st test of ./t1013-read-tree-submodule.sh before & after this > change. I'm not sure how, opts->reflog_message will be a copy of opts->reflog_action which is freed at the end of the rebase. I'll have a proper look tomorrow to see if I'm missing something. Best Wishes Phillip
Hi Ævar On 07/11/2022 19:35, Phillip Wood wrote: >>> @@ -5116,7 +5121,7 @@ static int single_pick(struct repository *r, >>> TODO_PICK : TODO_REVERT; >>> item.commit = cmit; >>> - setenv(GIT_REFLOG_ACTION, action_name(opts), 0); >>> + opts->reflog_message = sequencer_reflog_action(opts); >>> return do_pick_commit(r, &item, opts, 0, &check_todo); >> >> Here you're adding a new memory leak, which you can see if you run >> e.g. the 1st test of ./t1013-read-tree-submodule.sh before & after this >> change. What's a read-tree test using rebase for? I find the submodule tests completely incomprehensible. It is calling test_submodule_switch_recursing_with_args() which does not call rebase directly but who knows what is going on in all the helper functions. Have you got a simple example of a test which shows a new leak? > I'm not sure how, opts->reflog_message will be a copy of > opts->reflog_action which is freed at the end of the rebase. I'll have a > proper look tomorrow to see if I'm missing something. So it is possible this is showing up because I think we only free the heap allocated members of opts in sequencer_remove_state() and that is not called when we stop for a conflict resolution, a break command, a failed exec or a rescheduled pick/reset etc. The way to fix that would be to refactor sequencer_remove_state() to create replay_opts_release() and call that from builtin/{revert,rebase}.c As that is unrelated to removing the setenv() calls which is the focus of this series I will not be doing that in this series. > Best Wishes > > Phillip
On Tue, Nov 08 2022, Phillip Wood wrote: > Hi Ævar > > On 07/11/2022 19:35, Phillip Wood wrote: >>>> @@ -5116,7 +5121,7 @@ static int single_pick(struct repository *r, >>>> TODO_PICK : TODO_REVERT; >>>> item.commit = cmit; >>>> - setenv(GIT_REFLOG_ACTION, action_name(opts), 0); >>>> + opts->reflog_message = sequencer_reflog_action(opts); >>>> return do_pick_commit(r, &item, opts, 0, &check_todo); >>> >>> Here you're adding a new memory leak, which you can see if you run >>> e.g. the 1st test of ./t1013-read-tree-submodule.sh before & after this >>> change. > > What's a read-tree test using rebase for? I find the submodule tests > completely incomprehensible. It is calling > test_submodule_switch_recursing_with_args() which does not call rebase > directly but who knows what is going on in all the helper functions. I don't know, I just worked by way backwards from the leak logs, so... > Have you got a simple example of a test which shows a new leak? ...yes, e.g. (after make SANITIZE=leak): ./t3425-rebase-topology-merges.sh -vixd Will, on "master", emit: Direct leak of 1408 byte(s) in 1 object(s) allocated from: #0 0x7ff891b5f545 in __interceptor_malloc ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:75 #1 0x6c45e8 in do_xmalloc wrapper.c:51 #2 0x6c4670 in xmalloc wrapper.c:72 #3 0x6037e2 in parse_options_concat parse-options-cb.c:188 #4 0x4c547c in run_sequencer builtin/revert.c:140 #5 0x4c5a4c in cmd_revert builtin/revert.c:247 #6 0x407a32 in run_builtin git.c:466 #7 0x407e0a in handle_builtin git.c:721 #8 0x40803d in run_argv git.c:788 #9 0x40850f in cmd_main git.c:923 #10 0x4eed6f in main common-main.c:57 #11 0x7ff8918b9209 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 #12 0x7ff8918b92bb in __libc_start_main_impl ../csu/libc-start.c:389 #13 0x405fd0 in _start (git+0x405fd0) Direct leak of 4 byte(s) in 1 object(s) allocated from: #0 0x7ff891b5f545 in __interceptor_malloc ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:75 #1 0x7ff891929caa in __GI___strdup string/strdup.c:42 #2 0x6c4591 in xstrdup wrapper.c:39 #3 0x4c58f8 in run_sequencer builtin/revert.c:223 #4 0x4c5a4c in cmd_revert builtin/revert.c:247 #5 0x407a32 in run_builtin git.c:466 #6 0x407e0a in handle_builtin git.c:721 #7 0x40803d in run_argv git.c:788 #8 0x40850f in cmd_main git.c:923 #9 0x4eed6f in main common-main.c:57 #10 0x7ff8918b9209 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 #11 0x7ff8918b92bb in __libc_start_main_impl ../csu/libc-start.c:389 #12 0x405fd0 in _start (git+0x405fd0) After we still have the first leak (which is unrelated), and the second, but have added this one: Direct leak of 7 byte(s) in 1 object(s) allocated from: #0 0x7f7cc51e5545 in __interceptor_malloc ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:75 #1 0x7f7cc4fafcaa in __GI___strdup string/strdup.c:42 #2 0x6c460b in xstrdup wrapper.c:39 #3 0x66df91 in sequencer_reflog_action sequencer.c:3685 #4 0x6725ad in single_pick sequencer.c:5124 #5 0x6728dd in sequencer_pick_revisions sequencer.c:5178 #6 0x4c5a17 in run_sequencer builtin/revert.c:237 #7 0x4c5aa9 in cmd_revert builtin/revert.c:247 #8 0x407a32 in run_builtin git.c:466 #9 0x407e0a in handle_builtin git.c:721 #10 0x40803d in run_argv git.c:788 #11 0x40850f in cmd_main git.c:923 #12 0x4eedcc in main common-main.c:57 #13 0x7f7cc4f3f209 in __libc_start_call_main ../sysdeps/nptl/libc_start_call_main.h:58 #14 0x7f7cc4f3f2bb in __libc_start_main_impl ../csu/libc-start.c:389 #15 0x405fd0 in _start (git+0x405fd0) But more to the point, if you run the test suite with e.g.: GIT_TEST_PASSING_SANITIZE_LEAK=check GIT_TEST_SANITIZE_LEAK_LOG=true You can find these raw reports in: grep -r sequencer test-results/*.leak Or, from my github.com/avar/git.git use this nice script/alias to summarize it (I haven't upstreamed this yet): $ git help scan-leaks-top 'scan-leaks-top' is aliased to '!f() { cd t && git cat-file blob avar/add-new-sanitize-leak-test-modes-follow-up:t/aggregate-leaks.perl | perl - | less -S; }; f' >> I'm not sure how, opts->reflog_message will be a copy of >> opts->reflog_action which is freed at the end of the rebase. I'll >> have a proper look tomorrow to see if I'm missing something. > > So it is possible this is showing up because I think we only free the > heap allocated members of opts in sequencer_remove_state() and that is > not called when we stop for a conflict resolution, a break command, a > failed exec or a rescheduled pick/reset etc. The way to fix that would > be to refactor sequencer_remove_state() to create > replay_opts_release() and call that from builtin/{revert,rebase}.c Yes, I think that's probably the root cause. I have a leak-fixing topic as a follow-up to my current one, which among other things tried to address this: https://github.com/avar/git/commit/7a150d1b7e2 I'd just forgot about it. That link currently says committed <24hrs ago, but I was just rebasing the topic for something unrelated, I hacked this up in mid-August. > As that is unrelated to removing the setenv() calls which is the focus > of this series I will not be doing that in this series. I'm fine with us leaving this for now, and saying that it's OK that we're adding some new leaks, if we're addressing the setenv/getenv issue, and that we can fix the root cause of the current leaks later. But let's be clear: It's not unrelated to your refactoring in this topic, we didn't have this leak before, and now we have it. These two patches are the cause of some new leaks we didn't have before. And, if we run this on my topic which narrowly attempted to fix these leaks e.g. that "t3425-rebase-topology-merges.sh" test will have just 1 leak in that failing test, v.s. 3 leaks with this topic (the "4 byte(s) in 1 object(s)" above). It's just a nice coincidence that our memory leaks are currently in such a sorry state overall that this isn't failing e.g. the linux-leaks CI, because the new leaks are being masked by tests that area already failing due to other pre-existing leaks. But all that being said I think the right move is for this topic to proceed, perhaps with an updated commit message noting some of this. It's really just running into the existing problem of replay_opts_release(). If that destructor isn't reliable (which it isn't) we can still make new use of it, and then fix how we call it later for all its callers. Which I've just tested: I cherry-pick that 7a150d1b7e2 and the few preceding commits it needs (dcc104aef89..7a150d1b7e2), and then apply this on top: diff --git a/builtin/revert.c b/builtin/revert.c index ee32c714a76..0abd805beed 100644 --- a/builtin/revert.c +++ b/builtin/revert.c @@ -250,6 +250,7 @@ int cmd_revert(int argc, const char **argv, const char *prefix) if (opts.revs) release_revisions(opts.revs); free(opts.revs); + replay_opts_release(&opts); return res; } The leaks above are down to just the unrelated parse_options_concat() leak. I.e. this really is just a case of us missing the destructor due to a more general issue. 1. https://lore.kernel.org/git/8eec228d-d392-523d-2415-149b946f642e@dunelm.org.uk/
diff --git a/sequencer.c b/sequencer.c index e658df7e8ff..e23f6f0b718 100644 --- a/sequencer.c +++ b/sequencer.c @@ -375,6 +375,7 @@ int sequencer_remove_state(struct replay_opts *opts) } free(opts->gpg_sign); + free(opts->reflog_action); free(opts->default_strategy); free(opts->strategy); for (i = 0; i < opts->xopts_nr; i++) @@ -1050,6 +1051,8 @@ static int run_git_commit(const char *defmsg, gpg_opt, gpg_opt); } + strvec_pushf(&cmd.env, GIT_REFLOG_ACTION "=%s", opts->reflog_message); + if (opts->committer_date_is_author_date) strvec_pushf(&cmd.env, "GIT_COMMITTER_DATE=%s", opts->ignore_date ? @@ -1589,8 +1592,8 @@ static int try_to_commit(struct repository *r, goto out; } - if (update_head_with_reflog(current_head, oid, - getenv("GIT_REFLOG_ACTION"), msg, &err)) { + if (update_head_with_reflog(current_head, oid, opts->reflog_message, + msg, &err)) { res = error("%s", err.buf); goto out; } @@ -3674,17 +3677,28 @@ static int do_label(struct repository *r, const char *name, int len) return ret; } +static const char *sequencer_reflog_action(struct replay_opts *opts) +{ + if (!opts->reflog_action) { + opts->reflog_action = getenv(GIT_REFLOG_ACTION); + opts->reflog_action = + xstrdup(opts->reflog_action ? opts->reflog_action + : action_name(opts)); + } + + return opts->reflog_action; +} + __attribute__((format (printf, 3, 4))) static const char *reflog_message(struct replay_opts *opts, const char *sub_action, const char *fmt, ...) { va_list ap; static struct strbuf buf = STRBUF_INIT; - char *reflog_action = getenv(GIT_REFLOG_ACTION); va_start(ap, fmt); strbuf_reset(&buf); - strbuf_addstr(&buf, reflog_action ? reflog_action : action_name(opts)); + strbuf_addstr(&buf, sequencer_reflog_action(opts)); if (sub_action) strbuf_addf(&buf, " (%s)", sub_action); if (fmt) { @@ -4497,7 +4511,7 @@ static int checkout_onto(struct repository *r, struct replay_opts *opts, RESET_HEAD_RUN_POST_CHECKOUT_HOOK, .head_msg = reflog_message(opts, "start", "checkout %s", onto_name), - .default_reflog_action = "rebase" + .default_reflog_action = sequencer_reflog_action(opts) }; if (reset_head(r, &ropts)) { apply_autostash(rebase_path_autostash()); @@ -4566,11 +4580,8 @@ static int pick_commits(struct repository *r, struct replay_opts *opts) { int res = 0, reschedule = 0; - char *prev_reflog_action; - /* Note that 0 for 3rd parameter of setenv means set only if not set */ - setenv(GIT_REFLOG_ACTION, action_name(opts), 0); - prev_reflog_action = xstrdup(getenv(GIT_REFLOG_ACTION)); + opts->reflog_message = sequencer_reflog_action(opts); if (opts->allow_ff) assert(!(opts->signoff || opts->no_commit || opts->record_origin || should_edit(opts) || @@ -4618,14 +4629,12 @@ static int pick_commits(struct repository *r, } if (item->command <= TODO_SQUASH) { if (is_rebase_i(opts)) - setenv(GIT_REFLOG_ACTION, reflog_message(opts, - command_to_string(item->command), NULL), - 1); + opts->reflog_message = reflog_message(opts, + command_to_string(item->command), NULL); + res = do_pick_commit(r, item, opts, is_final_fixup(todo_list), &check_todo); - if (is_rebase_i(opts)) - setenv(GIT_REFLOG_ACTION, prev_reflog_action, 1); if (is_rebase_i(opts) && res < 0) { /* Reschedule */ advise(_(rescheduled_advice), @@ -5050,8 +5059,6 @@ int sequencer_continue(struct repository *r, struct replay_opts *opts) if (read_populate_opts(opts)) return -1; if (is_rebase_i(opts)) { - char *previous_reflog_action; - if ((res = read_populate_todo(r, &todo_list, opts))) goto release_todo_list; @@ -5062,13 +5069,11 @@ int sequencer_continue(struct repository *r, struct replay_opts *opts) unlink(rebase_path_dropped()); } - previous_reflog_action = xstrdup(getenv(GIT_REFLOG_ACTION)); - setenv(GIT_REFLOG_ACTION, reflog_message(opts, "continue", NULL), 1); + opts->reflog_message = reflog_message(opts, "continue", NULL); if (commit_staged_changes(r, opts, &todo_list)) { res = -1; goto release_todo_list; } - setenv(GIT_REFLOG_ACTION, previous_reflog_action, 1); } else if (!file_exists(get_todo_path(opts))) return continue_single_pick(r, opts); else if ((res = read_populate_todo(r, &todo_list, opts))) @@ -5116,7 +5121,7 @@ static int single_pick(struct repository *r, TODO_PICK : TODO_REVERT; item.commit = cmit; - setenv(GIT_REFLOG_ACTION, action_name(opts), 0); + opts->reflog_message = sequencer_reflog_action(opts); return do_pick_commit(r, &item, opts, 0, &check_todo); } diff --git a/sequencer.h b/sequencer.h index 563fe599334..888c18aad71 100644 --- a/sequencer.h +++ b/sequencer.h @@ -63,6 +63,9 @@ struct replay_opts { char **xopts; size_t xopts_nr, xopts_alloc; + /* Reflog */ + char *reflog_action; + /* Used by fixup/squash */ struct strbuf current_fixups; int current_fixup_count; @@ -73,6 +76,9 @@ struct replay_opts { /* Only used by REPLAY_NONE */ struct rev_info *revs; + + /* Private use */ + const char *reflog_message; }; #define REPLAY_OPTS_INIT { .edit = -1, .action = -1, .current_fixups = STRBUF_INIT }