Message ID | dfb639373234a6b8d5f9110380a66ffccbe0b1d6.1637590855.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Inspect reflog data programmatically in more tests | expand |
"Han-Wen Nienhuys via GitGitGadget" <gitgitgadget@gmail.com> writes: > From: Han-Wen Nienhuys <hanwen@google.com> > > Commit 523fa69c ("reflog: cleanse messages in the refs.c layer") standardizes > how write entries into the reflog. This commit standardizes how we get messages > out of the reflog. Before, the files backend implicitly added '\n' to the end of > reflog message on reading, which creates a subtle incompatibility with alternate > ref storage backends, such as reftable. > > We address this by stripping LF from the message before we pass it to the > user-provided callback. If this were truly "user-provided", then I'd argue that all backends should follow whatever the files backend has been doing forever---if the files added LF implicitly, others should, too, because that is pretty much what these "user-provided" callbacks have been expecting to see. In other words, it would be a bug for newer backends to behave differently. But I _think_ these callbacks are all under our control, and if that is the case, I am fine either way, even though I would have a strong preference not to have to change the API without a good reason, even if it is a purely internal one. So, let's go and see if we can find a good reason in the changes we can make to the callback functions ;-) > - end = strchr(logmsg, '\n'); > - if (end) > - *end = '\0'; > - We could argue that the lack of LF at the end from the API output made this caller simpler, which may be a plus. > diff --git a/reflog-walk.c b/reflog-walk.c > index 8ac4b284b6b..3ee59a98d2f 100644 > --- a/reflog-walk.c > +++ b/reflog-walk.c > @@ -244,8 +244,6 @@ void get_reflog_message(struct strbuf *sb, > > info = &commit_reflog->reflogs->items[commit_reflog->recno+1]; > len = strlen(info->message); > - if (len > 0) > - len--; /* strip away trailing newline */ Likewise. > @@ -284,10 +282,10 @@ void show_reflog_message(struct reflog_walk_info *reflog_info, int oneline, > info = &commit_reflog->reflogs->items[commit_reflog->recno+1]; > get_reflog_selector(&selector, reflog_info, dmode, force_date, 0); > if (oneline) { > - printf("%s: %s", selector.buf, info->message); > + printf("%s: %s\n", selector.buf, info->message); > } > else { > - printf("Reflog: %s (%s)\nReflog message: %s", > + printf("Reflog: %s (%s)\nReflog message: %s\n", > selector.buf, info->email, info->message); > } This is Meh either way. > diff --git a/refs/files-backend.c b/refs/files-backend.c > index 151b0056fe5..583bbc5f8b9 100644 > --- a/refs/files-backend.c > +++ b/refs/files-backend.c > @@ -1936,17 +1936,15 @@ static int show_one_reflog_ent(struct strbuf *sb, each_reflog_ent_fn fn, void *c > int tz; > const char *p = sb->buf; > > - /* old SP new SP name <email> SP time TAB msg LF */ > - if (!sb->len || sb->buf[sb->len - 1] != '\n' || > - parse_oid_hex(p, &ooid, &p) || *p++ != ' ' || > + /* old SP new SP name <email> SP time TAB msg */ > + if (!sb->len || parse_oid_hex(p, &ooid, &p) || *p++ != ' ' || > parse_oid_hex(p, &noid, &p) || *p++ != ' ' || > - !(email_end = strchr(p, '>')) || > - email_end[1] != ' ' || > + !(email_end = strchr(p, '>')) || email_end[1] != ' ' || > !(timestamp = parse_timestamp(email_end + 2, &message, 10)) || > !message || message[0] != ' ' || > - (message[1] != '+' && message[1] != '-') || > - !isdigit(message[2]) || !isdigit(message[3]) || > - !isdigit(message[4]) || !isdigit(message[5])) > + (message[1] != '+' && message[1] != '-') || !isdigit(message[2]) || > + !isdigit(message[3]) || !isdigit(message[4]) || > + !isdigit(message[5])) > return 0; /* corrupt? */ > email_end[1] = '\0'; > tz = strtol(message + 1, NULL, 10); > @@ -2038,6 +2036,7 @@ static int files_for_each_reflog_ent_reverse(struct ref_store *ref_store, > strbuf_splice(&sb, 0, 0, bp + 1, endp - (bp + 1)); > scanp = bp; > endp = bp + 1; > + strbuf_trim_trailing_newline(&sb); > ret = show_one_reflog_ent(&sb, fn, cb_data); > strbuf_reset(&sb); > if (ret) > @@ -2050,6 +2049,7 @@ static int files_for_each_reflog_ent_reverse(struct ref_store *ref_store, > * Process it, and we can end the loop. > */ > strbuf_splice(&sb, 0, 0, buf, endp - buf); > + strbuf_trim_trailing_newline(&sb); > ret = show_one_reflog_ent(&sb, fn, cb_data); > strbuf_reset(&sb); > break; > @@ -2099,7 +2099,7 @@ static int files_for_each_reflog_ent(struct ref_store *ref_store, > if (!logfp) > return -1; > > - while (!ret && !strbuf_getwholeline(&sb, logfp, '\n')) > + while (!ret && !strbuf_getline(&sb, logfp)) > ret = show_one_reflog_ent(&sb, fn, cb_data); > fclose(logfp); > strbuf_release(&sb); > @@ -3059,18 +3059,18 @@ static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid, > if ((*cb->should_prune_fn)(ooid, noid, email, timestamp, tz, > message, policy_cb)) { > if (!cb->newlog) > - printf("would prune %s", message); > + printf("would prune %s\n", message); > else if (cb->flags & EXPIRE_REFLOGS_VERBOSE) > - printf("prune %s", message); > + printf("prune %s\n", message); > } else { > if (cb->newlog) { > - fprintf(cb->newlog, "%s %s %s %"PRItime" %+05d\t%s", > - oid_to_hex(ooid), oid_to_hex(noid), > - email, timestamp, tz, message); > + fprintf(cb->newlog, "%s %s %s %" PRItime " %+05d\t%s\n", > + oid_to_hex(ooid), oid_to_hex(noid), email, > + timestamp, tz, message); > oidcpy(&cb->last_kept_oid, noid); > } > if (cb->flags & EXPIRE_REFLOGS_VERBOSE) > - printf("keep %s", message); > + printf("keep %s\n", message); > } > return 0; > } Hmmmm. I'll defer my judgment to the end. > diff --git a/t/t1405-main-ref-store.sh b/t/t1405-main-ref-store.sh > index 49718b7ea7f..a600bedf2cd 100755 > --- a/t/t1405-main-ref-store.sh > +++ b/t/t1405-main-ref-store.sh > @@ -89,13 +89,12 @@ test_expect_success 'for_each_reflog()' ' > test_expect_success 'for_each_reflog_ent()' ' > $RUN for-each-reflog-ent HEAD >actual && > head -n1 actual | grep one && > - tail -n2 actual | head -n1 | grep recreate-main > + tail -n1 actual | grep recreate-main > ' > > test_expect_success 'for_each_reflog_ent_reverse()' ' > $RUN for-each-reflog-ent-reverse HEAD >actual && > - head -n1 actual | grep recreate-main && > - tail -n2 actual | head -n1 | grep one > + tail -n1 actual | grep one > ' > > test_expect_success 'reflog_exists(HEAD)' ' > diff --git a/t/t1406-submodule-ref-store.sh b/t/t1406-submodule-ref-store.sh > index 0a87058971e..b0365c1fee0 100755 > --- a/t/t1406-submodule-ref-store.sh > +++ b/t/t1406-submodule-ref-store.sh > @@ -74,13 +74,13 @@ test_expect_success 'for_each_reflog()' ' > test_expect_success 'for_each_reflog_ent()' ' > $RUN for-each-reflog-ent HEAD >actual && > head -n1 actual | grep first && > - tail -n2 actual | head -n1 | grep main.to.new > + tail -n1 actual | grep main.to.new > ' > > test_expect_success 'for_each_reflog_ent_reverse()' ' > $RUN for-each-reflog-ent-reverse HEAD >actual && > head -n1 actual | grep main.to.new && > - tail -n2 actual | head -n1 | grep first > + tail -n1 actual | grep first > ' > > test_expect_success 'reflog_exists(HEAD)' ' I got an impression from the proposed log message and the changes to the code (except for the refs/files-backend.c, which I only skimmed) that the idea is that the refs API stops adding LF at the end, and the callers got a matching change to compensate for the (now) missing LF. If that is the idea behind the change, why do we need to change any existing test? The only way any tests need to be modified due to such a change I can think of is when we forget to or failed to make compensating change to the callers of the API. Puzzled... Ah, the $RUN is hiding what is really going on; it is running the "test-tool ref-store" helper, and we did not adjust that helper. So if we make a compensating change to the test-tool then we do not have to have these changes at all? But that point may be moot. In any case, in order to lose 5 lines from show-branch.c, and 2 lines from reflog-walk.c, I see that we had to touch 30+ lines in refs/files-backend.c. I find it a bit hard to sell this as an improvement to the API, to be honest. Luckily, it looks to me that this step is mostly unreleated to the main thrust of these patches in the series, which is "reading .git/logs/ in the test would work only when testing files backend; use for-each-ref test helper to recreate what would have been read by such tests from the files backend's files and inspect that instead, and that would allow us test other backends for free". So I suspect that this step can be safely dropped? Thanks.
On Mon, Nov 22 2021, Han-Wen Nienhuys via GitGitGadget wrote: > From: Han-Wen Nienhuys <hanwen@google.com> > [...] > @@ -3059,18 +3059,18 @@ static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid, > if ((*cb->should_prune_fn)(ooid, noid, email, timestamp, tz, > message, policy_cb)) { > if (!cb->newlog) > - printf("would prune %s", message); > + printf("would prune %s\n", message); > else if (cb->flags & EXPIRE_REFLOGS_VERBOSE) > - printf("prune %s", message); > + printf("prune %s\n", message); > } else { > if (cb->newlog) { > - fprintf(cb->newlog, "%s %s %s %"PRItime" %+05d\t%s", > - oid_to_hex(ooid), oid_to_hex(noid), > - email, timestamp, tz, message); > + fprintf(cb->newlog, "%s %s %s %" PRItime " %+05d\t%s\n", > + oid_to_hex(ooid), oid_to_hex(noid), email, > + timestamp, tz, message); > oidcpy(&cb->last_kept_oid, noid); > } > if (cb->flags & EXPIRE_REFLOGS_VERBOSE) > - printf("keep %s", message); > + printf("keep %s\n", message); > } > return 0; > } I haven't looked deeply into this topic as a whole, but FWIW this conflicts with a topic I've got locally and was going to submit sometime after the current batch of my own ref fixes in "next". That we've got verbose output at all in file-backend.c is a wart, and it should just be moved out if it entirely, this commit (stacked on top of various other fixes I've got in the area) does that: https://github.com/avar/git/commit/eff40d2d81b With that change the reftable code will need to handle far less of this, as it'll be handled in builtin/reflog.c, it just needs to behave properly in the appropriate "expire reflog?" callback. I.e. the backend tells us "does this expire?", the builtin/reflog.c code consumes that and does any verbose or non-verbose (or progress etc.) output. Now, the merge conflict between that and this looks rather trivial, but I can't help but wonder if we're going in the wrong direction here API-wise, or maybe "wrong direction" is too strong, but "sticking with the wrong patterN?". I think your cleanup works, but wouldn't a better thing be to move this to callbacks rather than tweaking the fprintf formats? I.e. in my version the whole body of this function has become: static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid, const char *email, timestamp_t timestamp, int tz, const char *message, void *cb_data) { struct expire_reflog_cb *cb = cb_data; reflog_expiry_should_prune_fn *fn = cb->should_prune_fn; if (cb->rewrite) ooid = &cb->last_kept_oid; if (fn(ooid, noid, email, timestamp, tz, message, cb->policy_cb)) return 0; if (cb->dry_run) return 0; /* --dry-run */ fprintf(cb->newlog, "%s %s %s %"PRItime" %+05d\t%s", oid_to_hex(ooid), oid_to_hex(noid), email, timestamp, tz, message); oidcpy(&cb->last_kept_oid, noid); return 0; } The only file-backend specific part of it is that fprintf(). If we moved towards making that part of it be: write_reflog_entry_cb(oid_to_hex(ooid), oid_to_hex(noid), email, timestamp, tz, message); Then we could lift the whole of this API to a level that makes more sense for a backed to implement. The refs/files-backend.c shouldn't need to have one function calll the "should_prune_fn" *and* write out the data. Instead some code common to all backends should call the "should prune?", and then call the backend's "here's an entry for you to write" callback. But maybe I'm overthinking this whole thing. I'm just wondering if we have say a sqlite reflog backend as opposed to the file/reftable backend that wants to store this data in a schema. Such a a backend would need to unpack the data, as we're sprintf() formatting function parameters before it gets passed to the backends.
On Mon, Nov 22, 2021 at 11:27 PM Junio C Hamano <gitster@pobox.com> wrote: > > "Han-Wen Nienhuys via GitGitGadget" <gitgitgadget@gmail.com> writes: > > > From: Han-Wen Nienhuys <hanwen@google.com> > > > > Commit 523fa69c ("reflog: cleanse messages in the refs.c layer") standardizes > > how write entries into the reflog. This commit standardizes how we get messages > > out of the reflog. Before, the files backend implicitly added '\n' to the end of > > reflog message on reading, which creates a subtle incompatibility with alternate > > ref storage backends, such as reftable. > > > > We address this by stripping LF from the message before we pass it to the > > user-provided callback. > > If this were truly "user-provided", then I'd argue that all backends > should follow whatever the files backend has been doing forever---if > the files added LF implicitly, others should, too, because that is > pretty much what these "user-provided" callbacks have been expecting > to see. I think it's just wrong. If you pass `msg` to a storage API, you should get `msg` when you read it back, not (msg + "\n"). > Ah, the $RUN is hiding what is really going on; it is running the > "test-tool ref-store" helper, and we did not adjust that helper. So > if we make a compensating change to the test-tool then we do not > have to have these changes at all? But that point may be moot. > > In any case, in order to lose 5 lines from show-branch.c, and 2 > lines from reflog-walk.c, I see that we had to touch 30+ lines in > refs/files-backend.c. I find it a bit hard to sell this as an > improvement to the API, to be honest. The test-tool ref-store adds its own '\n', so you always get a blank line in the output. That serves no purpose, and leads to the tail-n2 | head -n1 in order to read the last log line. I think it's silly, and should be dropped.
On Tue, Nov 23, 2021 at 11:40 AM Ævar Arnfjörð Bjarmason <avarab@gmail.com> wrote: > I think your cleanup works, but wouldn't a better thing be to move this > to callbacks rather than tweaking the fprintf formats? sure. In the reftable glue, I have if (!(flags & EXPIRE_REFLOGS_DRY_RUN)) { /* XXX - skip writing records that were not changed. */ err = reftable_addition_commit(add); } else { /* XXX - print something */ } letting the callbacks do the printing means less work for reftable. > The refs/files-backend.c shouldn't need to have one function calll the > "should_prune_fn" *and* write out the data. Instead some code common to > all backends should call the "should prune?", and then call the > backend's "here's an entry for you to write" callback. not sure if that will work. For reftable, you have to write something (a tombstone) if you _do_ want to prune the entry. > But maybe I'm overthinking this whole thing. I'm just wondering if we > have say a sqlite reflog backend as opposed to the file/reftable backend > that wants to store this data in a schema. Such a a backend would need reftable also stores this in a schema: there are separate fields for e-mail, timezone, timestamp etc.
Han-Wen Nienhuys <hanwen@google.com> writes: > On Mon, Nov 22, 2021 at 11:27 PM Junio C Hamano <gitster@pobox.com> wrote: >> >> If this were truly "user-provided", then I'd argue that all backends >> should follow whatever the files backend has been doing forever---if >> the files added LF implicitly, others should, too, because that is >> pretty much what these "user-provided" callbacks have been expecting >> to see. > > I think it's just wrong. If you pass `msg` to a storage API, you > should get `msg` when you read it back, not (msg + "\n"). If you give a log message "git commit -m 'single line'", you get LF at the end of the commit message for free. This is no different. And you know that this is not a "storage API" that stores the input in verbatim after looking at refs.c::copy_reflog_msg(). >> Ah, the $RUN is hiding what is really going on; it is running the >> "test-tool ref-store" helper, and we did not adjust that helper. So >> if we make a compensating change to the test-tool then we do not >> have to have these changes at all? But that point may be moot. >> >> In any case, in order to lose 5 lines from show-branch.c, and 2 >> lines from reflog-walk.c, I see that we had to touch 30+ lines in >> refs/files-backend.c. I find it a bit hard to sell this as an >> improvement to the API, to be honest. > > The test-tool ref-store adds its own '\n', so you always get a blank > line in the output. That serves no purpose, and leads to the But that is only because test-tool is wrong, no? If we know that the API gives the trailing LF, what purpose does it serve to add another one? > tail-n2 | head -n1 > > in order to read the last log line. I think it's silly, and should be dropped. Yes, it is silly and should be dropped, but if you drop it on the tool side, then it may become even easier to do the "instead of reading from .git/refs/logs files, have the tool dump and inspect that" step, no? This being test-tool, I do not mind losing backward compatibility that forces us a silly "tail -n 2 | head -n 1" pipeline at all. But if silliness comes from the test-tool thta does not work well with the internal API, that is what should be fixed. Surely you can change the API and its current callers to compensate for its silliness, but I do not think that is a good direction to go in. Thanks.
On Tue, Nov 23, 2021 at 6:10 PM Junio C Hamano <gitster@pobox.com> wrote: > > Han-Wen Nienhuys <hanwen@google.com> writes: > > > On Mon, Nov 22, 2021 at 11:27 PM Junio C Hamano <gitster@pobox.com> wrote: > >> > >> If this were truly "user-provided", then I'd argue that all backends > >> should follow whatever the files backend has been doing forever---if > >> the files added LF implicitly, others should, too, because that is > >> pretty much what these "user-provided" callbacks have been expecting > >> to see. > > > > I think it's just wrong. If you pass `msg` to a storage API, you > > should get `msg` when you read it back, not (msg + "\n"). > > If you give a log message "git commit -m 'single line'", you get LF > at the end of the commit message for free. This is no different. > And you know that this is not a "storage API" that stores the input > in verbatim after looking at refs.c::copy_reflog_msg(). I'm talking about refs/refs-internal.h. It seems you want to add something like /* The ref backend should add a '\n' relative to the message supplied to the delete/symref/update functions. */ typedef int for_each_reflog_ent_fn(struct ref_store *ref_store, const char *refname, each_reflog_ent_fn fn, void *cb_data); ? -- Han-Wen Nienhuys - Google Munich I work 80%. Don't expect answers from me on Fridays. -- Google Germany GmbH, Erika-Mann-Strasse 33, 80636 Munich Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Geschäftsführer: Paul Manicle, Halimah DeLaine Prado -- Google Germany GmbH, Erika-Mann-Strasse 33, 80636 Munich Registergericht und -nummer: Hamburg, HRB 86891 Sitz der Gesellschaft: Hamburg Geschäftsführer: Paul Manicle, Halimah DeLaine Prado
Han-Wen Nienhuys <hanwen@google.com> writes: > I'm talking about refs/refs-internal.h. It seems you want to add something like > > /* The ref backend should add a '\n' relative to the message supplied > to the delete/symref/update functions. */ > typedef int for_each_reflog_ent_fn(struct ref_store *ref_store, > const char *refname, > each_reflog_ent_fn fn, > void *cb_data); > > ? Sorry, I do not follow. Doesn't the ref backend already ensure that the message is not an incomplete line? If you feed a single complete line when updating, I do not think the backend should add any extra LF relative to the given message: $ git update-ref -m 'creating a new branch manually ' refs/heads/newtest master $ git update-ref -m 'updating a new branch manually ' refs/heads/newtest master~1 $ git reflog refs/heads/newtest 4150a1677b refs/heads/newtest@{0}: updating a new branch manually 5f439a0ecf refs/heads/newtest@{1}: updating the reference I think what deserves such a comment more is the prototype for each_reflog_ent_fn describing what the parameters to that callback function, to help the callers of the iterator what to expect. That is the end-user facing part. /* * Callback to process a reflog entry found by the iteration functions (see * below) */ typedef int each_reflog_ent_fn( struct object_id *old_oid, struct object_id *new_oid, const char *committer, timestamp_t timestamp, int tz, const char *msg, void *cb_data); Currently it only says "Callback (see below)" but "below" has only comments about the difference between refs_for_each_reflog_ent() and refs_for_each_reflog_entreverse() functions, and does not talk about what "committer" looks like (i.e. does it give user.name equivalent, user.name plus <user.email>, or something else?), and what "msg" looks like (i.e. if a multi-line message was given when a ref was updated or created, do we get these lines intact? if it gets mangled, how? if the original was a single-liner incomplete line, do we lack the final LF?), and how tz is encoded. I think the rule for "msg" is that: a multi-line message, or a message on a single incomplete-line, are normalized into a single complete line, and callback gets a single complete line. Once these rules get specified tightly and fully, it is up to the ref(log) backends how to implement the interface. If files-backend wants to keep LF at the end of the message when storing (so that it can easily use it as record delimiter), it can do so and reuse the LF at the end of the message as part of the msg parameter to the callback. If another backend wants to drop LF at the end of the message when storing (to save space), it can do so as long as the callback function gets the LF just like the other backend.
On Tue, Nov 23, 2021 at 9:34 PM Junio C Hamano <gitster@pobox.com> wrote: > > Han-Wen Nienhuys <hanwen@google.com> writes: > > > I'm talking about refs/refs-internal.h. It seems you want to add something like > > > > /* The ref backend should add a '\n' relative to the message supplied > > to the delete/symref/update functions. */ > > typedef int for_each_reflog_ent_fn(struct ref_store *ref_store, > > const char *refname, > > each_reflog_ent_fn fn, > > void *cb_data); > > > > ? > > Sorry, I do not follow. Doesn't the ref backend already ensure that > the message is not an incomplete line? If you feed a single > complete line when updating, I do not think the backend should add > any extra LF relative to the given message: But it does, currently. As of commit 523fa69c36744ae6779e38614cb9bfb2be552923 Author: Junio C Hamano <gitster@pobox.com> .. - We expect that a reflog message consists of a single line. The file format used by the files backend may add a LF after the it is the job of the files ref backend to add a LF, and the input to the ref backend is a string without trailing LF. But the files backend then produces that message with a LF, when reading back the data, eg. $ git --version git version 2.34.0.rc2.393.gf8c9666880-goog $ GIT_TRACE_REFS="1" git branch -m bla blub .. 12:03:59.408705 refs/debug.c:162 rename_ref: refs/heads/bla -> refs/heads/blub "Branch: renamed refs/heads/bla to refs/heads/blub": 0 $ GIT_TRACE_REFS=1 git reflog show refs/heads/blub 12:04:23.277805 refs/debug.c:294 reflog_ent refs/heads/blub (ret 0): cd3e606211bb1cf8bc57f7d76bab98cc17a150bc -> cd3e606211bb1cf8bc57f7d76bab98cc17a150bc, Han-Wen Nienhuys <hanwen@google.com> 1637751839 "Branch: renamed refs/heads/bla to refs/heads/blub " > I think the rule for "msg" is that: > > a multi-line message, or a message on a single incomplete-line, > are normalized into a single complete line, and callback gets a > single complete line. > That is not how it works today. The files backend verbatimly dumps the message supplied to it. (Maybe it should crash if there is a '\n' in the message).
Han-Wen Nienhuys <hanwen@google.com> writes: >> ... Doesn't the ref backend already ensure that >> the message is not an incomplete line? If you feed a single >> complete line when updating, I do not think the backend should add >> any extra LF relative to the given message: > > But it does, currently. As of > > commit 523fa69c36744ae6779e38614cb9bfb2be552923 > Author: Junio C Hamano <gitster@pobox.com> > .. > - We expect that a reflog message consists of a single line. The > file format used by the files backend may add a LF after the > > it is the job of the files ref backend to add a LF, and the input to > the ref backend is a string without trailing LF. But the files backend > then produces that message with a LF, when reading back the data, eg. Ah, perhaps our confusion comes from the fact that I view the ref API as a whole and do not draw a fine line of distinction between the "ref API implementation common across backends" vs "what ref files backend does". But as the implementor of one backend, you obviously have to care. In any case, when I did that commit, I clearly meant that the normalization implemented by the patch belong to the common part to be used by all backends for uniform handling of refs. If a call to refs API in a repository that is configured to use reftable backend bypasses the normalization function introduced in that commit, that is a bug. So, yes "ref API ensures that the message is not an incomplete line to turn 0, 1, or multiple lines input into a single line", which is why the experiments you omitted from your quote (reproduced here) $ git update-ref -m 'creating a new branch manually ' refs/heads/newtest master $ git update-ref -m 'updating a new branch manually ' refs/heads/newtest master~1 $ git reflog refs/heads/newtest 4150a1677b refs/heads/newtest@{0}: updating a new branch manually 5f439a0ecf refs/heads/newtest@{1}: updating the reference to give a complete line when recording the reflog entries does not result in an extra LF shown on the output. >> I think the rule for "msg" is that: >> >> a multi-line message, or a message on a single incomplete-line, >> are normalized into a single complete line, and callback gets a >> single complete line. > > That is not how it works today. The files backend verbatimly dumps the > message supplied to it. (Maybe it should crash if there is a '\n' in > the message). As I said, if parts of refs API implementation forgets to call normalization, it is a bug. Is there different codepath other than the one that is exercised with the "git update -m ''" experiment above and you found such a bug there? It needs to be fixed. Thanks.
On Wed, Nov 24, 2021 at 7:53 PM Junio C Hamano <gitster@pobox.com> wrote: > > Han-Wen Nienhuys <hanwen@google.com> writes: > > >> ... Doesn't the ref backend already ensure that > >> the message is not an incomplete line? If you feed a single > >> complete line when updating, I do not think the backend should add > >> any extra LF relative to the given message: > > > > But it does, currently. As of > > > > commit 523fa69c36744ae6779e38614cb9bfb2be552923 > > Author: Junio C Hamano <gitster@pobox.com> > > .. > > - We expect that a reflog message consists of a single line. The > > file format used by the files backend may add a LF after the > > > > it is the job of the files ref backend to add a LF, and the input to > > the ref backend is a string without trailing LF. But the files backend > > then produces that message with a LF, when reading back the data, eg. > > Ah, perhaps our confusion comes from the fact that I view the ref > API as a whole and do not draw a fine line of distinction between > the "ref API implementation common across backends" vs "what ref > files backend does". But as the implementor of one backend, you > obviously have to care. Having the read function return something different than what the write gets still seems odd to me. How about having copy_reflog_msg() trim trailing space and then always add LF? The files backend can assert that the string ends in LF, and doesn't need to add LF itself.
Han-Wen Nienhuys <hanwen@google.com> writes: > $ GIT_TRACE_REFS="1" git branch -m bla blub > .. > 12:03:59.408705 refs/debug.c:162 rename_ref: refs/heads/bla -> > refs/heads/blub "Branch: renamed refs/heads/bla to refs/heads/blub": 0 > > $ GIT_TRACE_REFS=1 git reflog show refs/heads/blub > 12:04:23.277805 refs/debug.c:294 reflog_ent refs/heads/blub > (ret 0): cd3e606211bb1cf8bc57f7d76bab98cc17a150bc -> > cd3e606211bb1cf8bc57f7d76bab98cc17a150bc, Han-Wen Nienhuys > <hanwen@google.com> 1637751839 "Branch: renamed refs/heads/bla to > refs/heads/blub > " > >> I think the rule for "msg" is that: >> >> a multi-line message, or a message on a single incomplete-line, >> are normalized into a single complete line, and callback gets a >> single complete line. >> > > That is not how it works today. The files backend verbatimly dumps the > message supplied to it. (Maybe it should crash if there is a '\n' in > the message). I still am puzzled what you wanted to illustrate with the "git branch -m bla" trace. The call graph into the refs API looks like this: builtin/branch.c::cmd_branch() -> branch.c::create_branch() -> refs.c::ref_transaction_update() -> refs.c::ref_transaction_add_update() -> refs.c::ref_transaction_commit() and the message the user gave is passed through in "msg" variable without modification, when calling ref_transaction_update(), which in turn makes a call to ref_transaction_add_update(). It does this: struct ref_update *ref_transaction_add_update( struct ref_transaction *transaction, const char *refname, unsigned int flags, const struct object_id *new_oid, const struct object_id *old_oid, const char *msg) { struct ref_update *update; if (transaction->state != REF_TRANSACTION_OPEN) BUG("update called for transaction that is not open"); FLEX_ALLOC_STR(update, refname, refname); ALLOC_GROW(transaction->updates, transaction->nr + 1, transaction->alloc); transaction->updates[transaction->nr++] = update; update->flags = flags; if (flags & REF_HAVE_NEW) oidcpy(&update->new_oid, new_oid); if (flags & REF_HAVE_OLD) oidcpy(&update->old_oid, old_oid); update->msg = normalize_reflog_message(msg); return update; } And normalize_reflog_message() calls copy_reflog_msg() to squash runs of isspace() bytes (note: that class includes LF) into a single space, and runs rtrim(), so update->msg will get a single incomplete line. As I suspected in a separate message, I think my notion of what happens in the ref API implementation common to all backends and what happens in each backend, and hence my statements about the distinction, were much fuzzier than yours, so I should say that I consider that all of the above is happening in the common part across all backends. If normalize happens in ref-files, it should happen in reftable backend, too, automatically. The files-backend has no chance to even see an embedded LF in the message when the transaction gets committed and the backend is called, does it? So I am not sure why we should crash upon seeing a LF in the message. In any case, it seems that as a comment to clarify the end-user facing each_reflog_ent_fn() parameters, what you quoted above from my message seems correct to me, after following the callgraph like the above. A 0-line (i.e. incomplete, like your 'bla' given to "git branch"), 1-line (i.e. a single complete line, like the message I gave to "update-ref -m" in my earlier illustration), or multi-line message given when a reflog entry is created, is normalized and when the each_reflog_ent_fn() callback is called, it is shown to the callback function as a single complete line, with a LF at the end. Phrased without the explanation specific to this discussion, but with a bit more detail: The message given when a reflog entry was created, is normalized into a single line by turning each LF into a space, squeezing a run of multiple whitespaces into one space, and removing trailing whitespaces. The callback gets a single complete line in the 'msg' parameter. perhaps?
On Wed, Nov 24, 2021 at 8:27 PM Junio C Hamano <gitster@pobox.com> wrote: > > Han-Wen Nienhuys <hanwen@google.com> writes: > > > $ GIT_TRACE_REFS="1" git branch -m bla blub > > .. > > 12:03:59.408705 refs/debug.c:162 rename_ref: refs/heads/bla -> > > refs/heads/blub "Branch: renamed refs/heads/bla to refs/heads/blub": 0 > > > > $ GIT_TRACE_REFS=1 git reflog show refs/heads/blub > > 12:04:23.277805 refs/debug.c:294 reflog_ent refs/heads/blub > > (ret 0): cd3e606211bb1cf8bc57f7d76bab98cc17a150bc -> > > cd3e606211bb1cf8bc57f7d76bab98cc17a150bc, Han-Wen Nienhuys > > <hanwen@google.com> 1637751839 "Branch: renamed refs/heads/bla to > > refs/heads/blub > > " > > > >> I think the rule for "msg" is that: > >> > >> a multi-line message, or a message on a single incomplete-line, > >> are normalized into a single complete line, and callback gets a > >> single complete line. > >> > > > > That is not how it works today. The files backend verbatimly dumps the > > message supplied to it. (Maybe it should crash if there is a '\n' in > > the message). > > I still am puzzled what you wanted to illustrate with the "git > branch -m bla" trace. I'm trying to illustrate that (from the perspective of the ref backend API) one call inserts something without a '\n', but the call that reads the info back, gets the same data back with a '\n'. It looks confusing and inconsistent to me. It seems fine to decide that the message should always end in a LF, but then why not do that in the normalization routine, so it is shared across all backends? For the purpose of the debug support (GIT_TRACE_REFS=1), I would rather prefer if the message was always without LF, because the LF ruins the visual of the debug output, but that is a minor concern.
Han-Wen Nienhuys <hanwen@google.com> writes: >> Ah, perhaps our confusion comes from the fact that I view the ref >> API as a whole and do not draw a fine line of distinction between >> the "ref API implementation common across backends" vs "what ref >> files backend does". But as the implementor of one backend, you >> obviously have to care. > > Having the read function return something different than what the > write gets still seems odd to me. > How about having copy_reflog_msg() trim trailing space and then always add LF? > > The files backend can assert that the string ends in LF, and doesn't > need to add LF itself. Ehh, let's step back a bit. Is there anything in the common part and files backend in ref API that needs to be changed to fix some bug? I do not see anything broken that needs to be fixed by "assert that the string ends in LF and doesn't need to add LF itself" or by "always add LF".
On Wed, Nov 24, 2021 at 9:55 PM Junio C Hamano <gitster@pobox.com> wrote: > Ehh, let's step back a bit. > > Is there anything in the common part and files backend in ref API > that needs to be changed to fix some bug? I do not see anything > broken that needs to be fixed by "assert that the string ends in LF > and doesn't need to add LF itself" or by "always add LF". Clearly, you and I disagree about what is logical behavior here. I've reworked the series to fit with your opinion on the matter. PTAL.
Han-Wen Nienhuys <hanwen@google.com> writes: > On Wed, Nov 24, 2021 at 9:55 PM Junio C Hamano <gitster@pobox.com> wrote: >> Ehh, let's step back a bit. >> >> Is there anything in the common part and files backend in ref API >> that needs to be changed to fix some bug? I do not see anything >> broken that needs to be fixed by "assert that the string ends in LF >> and doesn't need to add LF itself" or by "always add LF". > > Clearly, you and I disagree about what is logical behavior here. > > I've reworked the series to fit with your opinion on the matter. PTAL. Thanks. I find both are logical and internally consistent, so I prefer a choice that requires fewer changes to what is already working.
diff --git a/builtin/show-branch.c b/builtin/show-branch.c index f1e8318592c..016ccdafd0f 100644 --- a/builtin/show-branch.c +++ b/builtin/show-branch.c @@ -761,7 +761,6 @@ int cmd_show_branch(int ac, const char **av, const char *prefix) char *logmsg; char *nth_desc; const char *msg; - char *end; timestamp_t timestamp; int tz; @@ -772,10 +771,6 @@ int cmd_show_branch(int ac, const char **av, const char *prefix) break; } - end = strchr(logmsg, '\n'); - if (end) - *end = '\0'; - msg = (*logmsg == '\0') ? "(none)" : logmsg; reflog_msg[i] = xstrfmt("(%s) %s", show_date(timestamp, tz, diff --git a/reflog-walk.c b/reflog-walk.c index 8ac4b284b6b..3ee59a98d2f 100644 --- a/reflog-walk.c +++ b/reflog-walk.c @@ -244,8 +244,6 @@ void get_reflog_message(struct strbuf *sb, info = &commit_reflog->reflogs->items[commit_reflog->recno+1]; len = strlen(info->message); - if (len > 0) - len--; /* strip away trailing newline */ strbuf_add(sb, info->message, len); } @@ -284,10 +282,10 @@ void show_reflog_message(struct reflog_walk_info *reflog_info, int oneline, info = &commit_reflog->reflogs->items[commit_reflog->recno+1]; get_reflog_selector(&selector, reflog_info, dmode, force_date, 0); if (oneline) { - printf("%s: %s", selector.buf, info->message); + printf("%s: %s\n", selector.buf, info->message); } else { - printf("Reflog: %s (%s)\nReflog message: %s", + printf("Reflog: %s (%s)\nReflog message: %s\n", selector.buf, info->email, info->message); } diff --git a/refs/files-backend.c b/refs/files-backend.c index 151b0056fe5..583bbc5f8b9 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1936,17 +1936,15 @@ static int show_one_reflog_ent(struct strbuf *sb, each_reflog_ent_fn fn, void *c int tz; const char *p = sb->buf; - /* old SP new SP name <email> SP time TAB msg LF */ - if (!sb->len || sb->buf[sb->len - 1] != '\n' || - parse_oid_hex(p, &ooid, &p) || *p++ != ' ' || + /* old SP new SP name <email> SP time TAB msg */ + if (!sb->len || parse_oid_hex(p, &ooid, &p) || *p++ != ' ' || parse_oid_hex(p, &noid, &p) || *p++ != ' ' || - !(email_end = strchr(p, '>')) || - email_end[1] != ' ' || + !(email_end = strchr(p, '>')) || email_end[1] != ' ' || !(timestamp = parse_timestamp(email_end + 2, &message, 10)) || !message || message[0] != ' ' || - (message[1] != '+' && message[1] != '-') || - !isdigit(message[2]) || !isdigit(message[3]) || - !isdigit(message[4]) || !isdigit(message[5])) + (message[1] != '+' && message[1] != '-') || !isdigit(message[2]) || + !isdigit(message[3]) || !isdigit(message[4]) || + !isdigit(message[5])) return 0; /* corrupt? */ email_end[1] = '\0'; tz = strtol(message + 1, NULL, 10); @@ -2038,6 +2036,7 @@ static int files_for_each_reflog_ent_reverse(struct ref_store *ref_store, strbuf_splice(&sb, 0, 0, bp + 1, endp - (bp + 1)); scanp = bp; endp = bp + 1; + strbuf_trim_trailing_newline(&sb); ret = show_one_reflog_ent(&sb, fn, cb_data); strbuf_reset(&sb); if (ret) @@ -2050,6 +2049,7 @@ static int files_for_each_reflog_ent_reverse(struct ref_store *ref_store, * Process it, and we can end the loop. */ strbuf_splice(&sb, 0, 0, buf, endp - buf); + strbuf_trim_trailing_newline(&sb); ret = show_one_reflog_ent(&sb, fn, cb_data); strbuf_reset(&sb); break; @@ -2099,7 +2099,7 @@ static int files_for_each_reflog_ent(struct ref_store *ref_store, if (!logfp) return -1; - while (!ret && !strbuf_getwholeline(&sb, logfp, '\n')) + while (!ret && !strbuf_getline(&sb, logfp)) ret = show_one_reflog_ent(&sb, fn, cb_data); fclose(logfp); strbuf_release(&sb); @@ -3059,18 +3059,18 @@ static int expire_reflog_ent(struct object_id *ooid, struct object_id *noid, if ((*cb->should_prune_fn)(ooid, noid, email, timestamp, tz, message, policy_cb)) { if (!cb->newlog) - printf("would prune %s", message); + printf("would prune %s\n", message); else if (cb->flags & EXPIRE_REFLOGS_VERBOSE) - printf("prune %s", message); + printf("prune %s\n", message); } else { if (cb->newlog) { - fprintf(cb->newlog, "%s %s %s %"PRItime" %+05d\t%s", - oid_to_hex(ooid), oid_to_hex(noid), - email, timestamp, tz, message); + fprintf(cb->newlog, "%s %s %s %" PRItime " %+05d\t%s\n", + oid_to_hex(ooid), oid_to_hex(noid), email, + timestamp, tz, message); oidcpy(&cb->last_kept_oid, noid); } if (cb->flags & EXPIRE_REFLOGS_VERBOSE) - printf("keep %s", message); + printf("keep %s\n", message); } return 0; } diff --git a/t/t1405-main-ref-store.sh b/t/t1405-main-ref-store.sh index 49718b7ea7f..a600bedf2cd 100755 --- a/t/t1405-main-ref-store.sh +++ b/t/t1405-main-ref-store.sh @@ -89,13 +89,12 @@ test_expect_success 'for_each_reflog()' ' test_expect_success 'for_each_reflog_ent()' ' $RUN for-each-reflog-ent HEAD >actual && head -n1 actual | grep one && - tail -n2 actual | head -n1 | grep recreate-main + tail -n1 actual | grep recreate-main ' test_expect_success 'for_each_reflog_ent_reverse()' ' $RUN for-each-reflog-ent-reverse HEAD >actual && - head -n1 actual | grep recreate-main && - tail -n2 actual | head -n1 | grep one + tail -n1 actual | grep one ' test_expect_success 'reflog_exists(HEAD)' ' diff --git a/t/t1406-submodule-ref-store.sh b/t/t1406-submodule-ref-store.sh index 0a87058971e..b0365c1fee0 100755 --- a/t/t1406-submodule-ref-store.sh +++ b/t/t1406-submodule-ref-store.sh @@ -74,13 +74,13 @@ test_expect_success 'for_each_reflog()' ' test_expect_success 'for_each_reflog_ent()' ' $RUN for-each-reflog-ent HEAD >actual && head -n1 actual | grep first && - tail -n2 actual | head -n1 | grep main.to.new + tail -n1 actual | grep main.to.new ' test_expect_success 'for_each_reflog_ent_reverse()' ' $RUN for-each-reflog-ent-reverse HEAD >actual && head -n1 actual | grep main.to.new && - tail -n2 actual | head -n1 | grep first + tail -n1 actual | grep first ' test_expect_success 'reflog_exists(HEAD)' '