Message ID | pull.1420.v3.git.1669108102092.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] range-diff: support reading mbox files | expand |
Hi Dscho On 22/11/2022 09:08, Johannes Schindelin via GitGitGadget wrote: > [...] > Changes since v2: > > * Fixed two leaks > * Avoided dropping a patch just because it does not end in a "-- " line > * Empty lines in the middle of a diff are now treated as context lines > (newer GNU diff versions generate those) Oh, well spotted, I should have thought of that myself. > * We now warn when a parsed diff is incomplete (i.e. when the ranges in > the hunk header do not line up with the number of parsed lines), but > still follow Postel's Law i.e. being kind and accepting. This version looks good to me, thanks for working on it. Best Wishes Phillip > Changes since v1: > > * We no longer leak allocated memory in the struct patch instance > * Made strtost() a bit more stringent > * Postel [https://en.wikipedia.org/wiki/Postel%27s_Law]ized the mbox > parser substantially, together with a couple more cosmetic fixes, > based on Phillip Wood's excellent review of v1. > * Extended the test case to cover mboxes containing CR/LF and in-body > From: lines > > Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1420%2Fdscho%2Frange-diff-from-mbox-v3 > Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1420/dscho/range-diff-from-mbox-v3 > Pull-Request: https://github.com/gitgitgadget/git/pull/1420 > > Range-diff vs v2: > > 1: 485249ddfb3 ! 1: ac4d8704068 range-diff: support reading mbox files > @@ range-diff.c: struct patch_util { > + > + len = find_next_line(line, size); > + > -+ if (state == MBOX_BEFORE_HEADER || > -+ (state == MBOX_IN_DIFF && line[0] == 'F')) { > ++ if (state == MBOX_BEFORE_HEADER) { > ++parse_from_delimiter: > + if (!skip_prefix(line, "From ", &p)) > + continue; > + > ++ if (util) > ++ BUG("util already allocated"); > + util = xcalloc(1, sizeof(*util)); > + if (get_oid_hex(p, &util->oid) < 0) > + oidcpy(&util->oid, null_oid()); > @@ range-diff.c: struct patch_util { > + } > + } else if (state == MBOX_IN_DIFF) { > + switch (line[0]) { > -+ case '\0': > -+ continue; /* ignore empty lines after diff */ > ++ case '\0': /* newer GNU diff, an empty context line */ > + case '+': > + case '-': > + case ' ': > @@ range-diff.c: struct patch_util { > + strbuf_addch(&buf, '\n'); > + util->diffsize++; > + continue; > ++ default: > ++ if (old_count || new_count) > ++ warning(_("diff ended prematurely (-%d/+%d)"), > ++ (int)old_count, (int)new_count); > ++ break; > + } > + > + if (util) { > + string_list_append(list, buf.buf)->util = util; > ++ util = NULL; > + strbuf_reset(&buf); > + } > -+ util = xcalloc(1, sizeof(*util)); > -+ oidcpy(&util->oid, null_oid()); > -+ util->matching = -1; > -+ author = subject = NULL; > + state = MBOX_BEFORE_HEADER; > ++ goto parse_from_delimiter; > + } > + } > + strbuf_release(&contents); > @@ range-diff.c: int show_range_diff(const char *range1, const char *range2, > struct rev_info revs; > > + if (skip_prefix(arg, "mbox:", &path)) { > ++ free(copy); > + if (!strcmp(path, "-") || file_exists(path)) > + return 1; > + error_errno(_("not an mbox: '%s'"), path); > > > Documentation/git-range-diff.txt | 3 +- > range-diff.c | 338 ++++++++++++++++++++++++++++++- > t/t3206-range-diff.sh | 27 +++ > 3 files changed, 366 insertions(+), 2 deletions(-) > > diff --git a/Documentation/git-range-diff.txt b/Documentation/git-range-diff.txt > index 0b393715d70..e2c4661acde 100644 > --- a/Documentation/git-range-diff.txt > +++ b/Documentation/git-range-diff.txt > @@ -37,7 +37,8 @@ There are three ways to specify the commit ranges: > > - `<range1> <range2>`: Either commit range can be of the form > `<base>..<rev>`, `<rev>^!` or `<rev>^-<n>`. See `SPECIFYING RANGES` > - in linkgit:gitrevisions[7] for more details. > + in linkgit:gitrevisions[7] for more details. Alternatively, the > + patches can be provided as an mbox-formatted file via `mbox:<path>`. > > - `<rev1>...<rev2>`. This is equivalent to > `<rev2>..<rev1> <rev1>..<rev2>`. > diff --git a/range-diff.c b/range-diff.c > index 124dd678c38..c391ffa603b 100644 > --- a/range-diff.c > +++ b/range-diff.c > @@ -12,6 +12,7 @@ > #include "userdiff.h" > #include "apply.h" > #include "revision.h" > +#include "dir.h" > > struct patch_util { > /* For the search for an exact match */ > @@ -26,6 +27,313 @@ struct patch_util { > struct object_id oid; > }; > > +static inline int strtost(char const *s, size_t *result, const char **end) > +{ > + unsigned long u; > + char *p; > + > + errno = 0; > + /* negative values would be accepted by strtoul */ > + if (!isdigit(*s)) > + return -1; > + u = strtoul(s, &p, 10); > + if (errno || p == s) > + return -1; > + if (result) > + *result = u; > + *end = p; > + > + return 0; > +} > + > +static int parse_hunk_header(const char *p, > + size_t *old_count, size_t *new_count, > + const char **end) > +{ > + size_t o = 1, n = 1; > + > + if (!skip_prefix(p, "@@ -", &p) || > + strtost(p, NULL, &p) || > + /* The range is -<start>[,<count>], defaulting to count = 1 */ > + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &o, &p))) || > + !skip_prefix(p, " +", &p) || > + strtost(p, NULL, &p) || > + /* The range is +<start>[,<count>], defaulting to count = 1 */ > + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &n, &p))) || > + !skip_prefix(p, " @@", &p)) > + return -1; > + > + *old_count = o; > + *new_count = n; > + *end = p; > + > + return 0; > +} > + > +/* > + * This function finds the end of the line, replaces the newline character with > + * a NUL, and returns the offset of the start of the next line. > + * > + * If no newline character was found, it returns the offset of the trailing NUL > + * instead. > + */ > +static inline int find_next_line(const char *line, size_t size) > +{ > + char *eol; > + > + eol = memchr(line, '\n', size); > + if (!eol) > + return size; > + > + *eol = '\0'; > + > + return eol + 1 - line; > +} > + > +static int read_mbox(const char *path, struct string_list *list) > +{ > + struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT; > + struct strbuf long_subject = STRBUF_INIT; > + struct patch_util *util = NULL; > + enum { > + MBOX_BEFORE_HEADER, > + MBOX_IN_HEADER, > + MBOX_IN_COMMIT_MESSAGE, > + MBOX_AFTER_TRIPLE_DASH, > + MBOX_IN_DIFF > + } state = MBOX_BEFORE_HEADER; > + char *line, *current_filename = NULL; > + int len; > + size_t size, old_count = 0, new_count = 0; > + const char *author = NULL, *subject = NULL; > + > + if (!strcmp(path, "-")) { > + if (strbuf_read(&contents, STDIN_FILENO, 0) < 0) > + return error_errno(_("could not read stdin")); > + } else if (strbuf_read_file(&contents, path, 0) < 0) > + return error_errno(_("could not read '%s'"), path); > + > + line = contents.buf; > + size = contents.len; > + for (; size; size -= len, line += len) { > + const char *p; > + > + len = find_next_line(line, size); > + > + if (state == MBOX_BEFORE_HEADER) { > +parse_from_delimiter: > + if (!skip_prefix(line, "From ", &p)) > + continue; > + > + if (util) > + BUG("util already allocated"); > + util = xcalloc(1, sizeof(*util)); > + if (get_oid_hex(p, &util->oid) < 0) > + oidcpy(&util->oid, null_oid()); > + util->matching = -1; > + author = subject = NULL; > + > + state = MBOX_IN_HEADER; > + continue; > + } > + > + if (starts_with(line, "diff --git ")) { > + struct patch patch = { 0 }; > + struct strbuf root = STRBUF_INIT; > + int linenr = 0; > + int orig_len; > + > + state = MBOX_IN_DIFF; > + old_count = new_count = 0; > + strbuf_addch(&buf, '\n'); > + if (!util->diff_offset) > + util->diff_offset = buf.len; > + > + orig_len = len; > + /* `find_next_line()`'s replaced the LF with a NUL */ > + line[len - 1] = '\n'; > + len = len > 1 && line[len - 2] == '\r' ? > + error(_("cannot handle diff headers with " > + "CR/LF line endings")) : > + parse_git_diff_header(&root, &linenr, 1, line, > + len, size, &patch); > + if (len < 0) { > + error(_("could not parse git header '%.*s'"), > + orig_len, line); > + release_patch(&patch); > + free(util); > + free(current_filename); > + string_list_clear(list, 1); > + strbuf_release(&buf); > + strbuf_release(&contents); > + strbuf_release(&long_subject); > + return -1; > + } > + > + if (patch.old_name) > + skip_prefix(patch.old_name, "a/", > + (const char **)&patch.old_name); > + if (patch.new_name) > + skip_prefix(patch.new_name, "b/", > + (const char **)&patch.new_name); > + > + strbuf_addstr(&buf, " ## "); > + if (patch.is_new) > + strbuf_addf(&buf, "%s (new)", patch.new_name); > + else if (patch.is_delete) > + strbuf_addf(&buf, "%s (deleted)", patch.old_name); > + else if (patch.is_rename) > + strbuf_addf(&buf, "%s => %s", patch.old_name, patch.new_name); > + else > + strbuf_addstr(&buf, patch.new_name); > + > + free(current_filename); > + if (patch.is_delete) > + current_filename = xstrdup(patch.old_name); > + else > + current_filename = xstrdup(patch.new_name); > + > + if (patch.new_mode && patch.old_mode && > + patch.old_mode != patch.new_mode) > + strbuf_addf(&buf, " (mode change %06o => %06o)", > + patch.old_mode, patch.new_mode); > + > + strbuf_addstr(&buf, " ##\n"); > + util->diffsize++; > + release_patch(&patch); > + } else if (state == MBOX_IN_HEADER) { > + if (!line[0]) { > + state = MBOX_IN_COMMIT_MESSAGE; > + /* Look for an in-body From: */ > + if (skip_prefix(line + 1, "From: ", &p)) { > + size -= p - line; > + line += p - line; > + len = find_next_line(line, size); > + > + while (isspace(*p)) > + p++; > + author = p; > + } > + strbuf_addstr(&buf, " ## Metadata ##\n"); > + if (author) > + strbuf_addf(&buf, "Author: %s\n", author); > + strbuf_addstr(&buf, "\n ## Commit message ##\n"); > + if (subject) > + strbuf_addf(&buf, " %s\n\n", subject); > + } else if (skip_prefix(line, "From: ", &p)) { > + while (isspace(*p)) > + p++; > + author = p; > + } else if (skip_prefix(line, "Subject: ", &p)) { > + const char *q; > + > + while (isspace(*p)) > + p++; > + subject = p; > + > + if (starts_with(p, "[PATCH") && > + (q = strchr(p, ']'))) { > + q++; > + while (isspace(*q)) > + q++; > + subject = q; > + } > + > + if (len < size && line[len] == ' ') { > + /* handle long subject */ > + strbuf_reset(&long_subject); > + strbuf_addstr(&long_subject, subject); > + while (len < size && line[len] == ' ') { > + line += len; > + size -= len; > + len = find_next_line(line, size); > + strbuf_addstr(&long_subject, line); > + } > + subject = long_subject.buf; > + } > + } > + } else if (state == MBOX_IN_COMMIT_MESSAGE) { > + if (!line[0]) { > + strbuf_addch(&buf, '\n'); > + } else if (strcmp(line, "---")) { > + int tabs = 0; > + > + /* simulate tab expansion */ > + while (line[tabs] == '\t') > + tabs++; > + strbuf_addf(&buf, "%*s%s\n", > + 4 + 8 * tabs, "", line + tabs); > + } else { > + /* > + * Trim the trailing newline that is added > + * by `format-patch`. > + */ > + strbuf_trim_trailing_newline(&buf); > + state = MBOX_AFTER_TRIPLE_DASH; > + } > + } else if (state == MBOX_IN_DIFF) { > + switch (line[0]) { > + case '\0': /* newer GNU diff, an empty context line */ > + case '+': > + case '-': > + case ' ': > + /* A `-- ` line indicates the end of a diff */ > + if (!old_count && !new_count) > + break; > + if (old_count && line[0] != '+') > + old_count--; > + if (new_count && line[0] != '-') > + new_count--; > + /* fallthrough */ > + case '\\': > + strbuf_addstr(&buf, line); > + strbuf_addch(&buf, '\n'); > + util->diffsize++; > + continue; > + case '@': > + if (parse_hunk_header(line, &old_count, > + &new_count, &p)) > + break; > + > + strbuf_addstr(&buf, "@@"); > + if (current_filename && *p) > + strbuf_addf(&buf, " %s:", > + current_filename); > + strbuf_addstr(&buf, p); > + strbuf_addch(&buf, '\n'); > + util->diffsize++; > + continue; > + default: > + if (old_count || new_count) > + warning(_("diff ended prematurely (-%d/+%d)"), > + (int)old_count, (int)new_count); > + break; > + } > + > + if (util) { > + string_list_append(list, buf.buf)->util = util; > + util = NULL; > + strbuf_reset(&buf); > + } > + state = MBOX_BEFORE_HEADER; > + goto parse_from_delimiter; > + } > + } > + strbuf_release(&contents); > + > + if (util) { > + if (state == MBOX_IN_DIFF) > + string_list_append(list, buf.buf)->util = util; > + else > + free(util); > + } > + strbuf_release(&buf); > + strbuf_release(&long_subject); > + free(current_filename); > + > + return 0; > +} > + > /* > * Reads the patches into a string list, with the `util` field being populated > * as struct object_id (will need to be free()d). > @@ -41,6 +349,10 @@ static int read_patches(const char *range, struct string_list *list, > ssize_t len; > size_t size; > int ret = -1; > + const char *path; > + > + if (skip_prefix(range, "mbox:", &path)) > + return read_mbox(path, list); > > strvec_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", > "--reverse", "--date-order", "--decorate=no", > @@ -424,6 +736,19 @@ static void output_pair_header(struct diff_options *diffopt, > > strbuf_addch(buf, ' '); > pp_commit_easy(CMIT_FMT_ONELINE, commit, buf); > + } else { > + struct patch_util *util = b_util ? b_util : a_util; > + const char *needle = "\n ## Commit message ##\n"; > + const char *p = !util || !util->patch ? > + NULL : strstr(util->patch, needle); > + if (p) { > + if (status == '!') > + strbuf_addf(buf, "%s%s", color_reset, color); > + > + strbuf_addch(buf, ' '); > + p += strlen(needle); > + strbuf_add(buf, p, strchrnul(p, '\n') - p); > + } > } > strbuf_addf(buf, "%s\n", color_reset); > > @@ -554,6 +879,9 @@ int show_range_diff(const char *range1, const char *range2, > if (range_diff_opts->left_only && range_diff_opts->right_only) > res = error(_("options '%s' and '%s' cannot be used together"), "--left-only", "--right-only"); > > + if (!strcmp(range1, "mbox:-") && !strcmp(range2, "mbox:-")) > + res = error(_("only one mbox can be read from stdin")); > + > if (!res && read_patches(range1, &branch1, range_diff_opts->other_arg)) > res = error(_("could not parse log for '%s'"), range1); > if (!res && read_patches(range2, &branch2, range_diff_opts->other_arg)) > @@ -575,10 +903,18 @@ int show_range_diff(const char *range1, const char *range2, > int is_range_diff_range(const char *arg) > { > char *copy = xstrdup(arg); /* setup_revisions() modifies it */ > - const char *argv[] = { "", copy, "--", NULL }; > + const char *argv[] = { "", copy, "--", NULL }, *path; > int i, positive = 0, negative = 0; > struct rev_info revs; > > + if (skip_prefix(arg, "mbox:", &path)) { > + free(copy); > + if (!strcmp(path, "-") || file_exists(path)) > + return 1; > + error_errno(_("not an mbox: '%s'"), path); > + return 0; > + } > + > init_revisions(&revs, NULL); > if (setup_revisions(3, argv, &revs, NULL) == 1) { > for (i = 0; i < revs.pending.nr; i++) > diff --git a/t/t3206-range-diff.sh b/t/t3206-range-diff.sh > index 459beaf7d9c..89ef9a5ffc4 100755 > --- a/t/t3206-range-diff.sh > +++ b/t/t3206-range-diff.sh > @@ -783,6 +783,33 @@ test_expect_success 'ranges with pathspecs' ' > ! grep "$topic_oid" actual > ' > > +test_expect_success 'compare range vs mbox' ' > + git format-patch --stdout topic..mode-only-change >mbox && > + git range-diff topic...mode-only-change >expect && > + > + git range-diff mode-only-change..topic mbox:./mbox >actual && > + test_cmp expect actual && > + > + sed -e "/^From: .*/{ > + h > + s/.*/From: Bugs Bunny <bugs@bun.ny>/ > + :1 > + N > + /[ -z]$/b1 > + G > + }" <mbox >mbox.from && > + git range-diff mode-only-change..topic mbox:./mbox.from >actual.from && > + test_cmp expect actual.from && > + > + append_cr <mbox >mbox.cr && > + test_must_fail git range-diff \ > + mode-only-change..topic mbox:./mbox.cr 2>err && > + grep CR/LF err && > + > + git range-diff mode-only-change..topic mbox:- <mbox >actual.stdin && > + test_cmp expect actual.stdin > +' > + > test_expect_success 'submodule changes are shown irrespective of diff.submodule' ' > git init sub-repo && > test_commit -C sub-repo sub-first && > > base-commit: b75747829f4c277323c78b1c5973ad63ea038a2d
> +static inline int strtost(char const *s, size_t *result, const char **end) > +{ > + unsigned long u; > + char *p; > + > + errno = 0; Minor nit. If this is to be able to see the error condition from strtoul(), I think it should be done after the "!isdigit()" test, immediately before we make strtoul() call, to avoid clearing errno unnecessarily. > + /* negative values would be accepted by strtoul */ > + if (!isdigit(*s)) > + return -1; > + u = strtoul(s, &p, 10); > + if (errno || p == s) > + return -1; > +static int parse_hunk_header(const char *p, > + size_t *old_count, size_t *new_count, > + const char **end) > +{ > + size_t o = 1, n = 1; > + > + if (!skip_prefix(p, "@@ -", &p) || > + strtost(p, NULL, &p) || > + /* The range is -<start>[,<count>], defaulting to count = 1 */ > + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &o, &p))) || > + !skip_prefix(p, " +", &p) || > + strtost(p, NULL, &p) || > + /* The range is +<start>[,<count>], defaulting to count = 1 */ > + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &n, &p))) || > + !skip_prefix(p, " @@", &p)) > + return -1; > + > + *old_count = o; > + *new_count = n; > + *end = p; We care only about how long the hunk is, and do not care exactly where in the preimage it sits. The code looks correct, but for such a specialized "parser", the name of the function gives a false impression that it does a lot more. Finding it only slightly disturbing. > + * This function finds the end of the line, replaces the newline character with > + * a NUL, and returns the offset of the start of the next line. > + * > + * If no newline character was found, it returns the offset of the trailing NUL > + * instead. Pretty bog-standard "read each line" helper. I suspect we may want to consolidate multiple copies we may have elsewhere after the dust settles. Looking good. > +static inline int find_next_line(const char *line, size_t size) > +{ > + char *eol; > + > + eol = memchr(line, '\n', size); > + if (!eol) > + return size; > + > + *eol = '\0'; > + > + return eol + 1 - line; > +} > +static int read_mbox(const char *path, struct string_list *list) > +{ > + struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT; > + struct strbuf long_subject = STRBUF_INIT; > + struct patch_util *util = NULL; > + enum { > + MBOX_BEFORE_HEADER, > + MBOX_IN_HEADER, > + MBOX_IN_COMMIT_MESSAGE, > + MBOX_AFTER_TRIPLE_DASH, > + MBOX_IN_DIFF > + } state = MBOX_BEFORE_HEADER; > + char *line, *current_filename = NULL; > + int len; > + size_t size, old_count = 0, new_count = 0; > + const char *author = NULL, *subject = NULL; > + > + if (!strcmp(path, "-")) { > + if (strbuf_read(&contents, STDIN_FILENO, 0) < 0) > + return error_errno(_("could not read stdin")); > + } else if (strbuf_read_file(&contents, path, 0) < 0) > + return error_errno(_("could not read '%s'"), path); > + > + line = contents.buf; > + size = contents.len; > + for (; size; size -= len, line += len) { > + const char *p; > + > + len = find_next_line(line, size); > + > + if (state == MBOX_BEFORE_HEADER) { > +parse_from_delimiter: > + if (!skip_prefix(line, "From ", &p)) > + continue; > + > + if (util) > + BUG("util already allocated"); OK. The only transition that brings us into _BEFORE_HEADER state is from _IN_DIFF and we consume and clear the current util there before the transition happens, so this BUG() will trigger only when there is some programming error, not any data errors. Good. > + util = xcalloc(1, sizeof(*util)); > + if (get_oid_hex(p, &util->oid) < 0) > + oidcpy(&util->oid, null_oid()); > + util->matching = -1; > + author = subject = NULL; > + > + state = MBOX_IN_HEADER; > + continue; > + } > + > + if (starts_with(line, "diff --git ")) { > + struct patch patch = { 0 }; > + struct strbuf root = STRBUF_INIT; > + int linenr = 0; > + int orig_len; > + > + state = MBOX_IN_DIFF; > + old_count = new_count = 0; > + strbuf_addch(&buf, '\n'); > + if (!util->diff_offset) > + util->diff_offset = buf.len; > + > + orig_len = len; > + /* `find_next_line()`'s replaced the LF with a NUL */ > + line[len - 1] = '\n'; Does this work correctly when the input ended with an incomplete line that lacked the final LF? find_next_line() would have given the size of the remaining input, and the byte at line[len-1] is the last byte on the incomplete line that is not LF. > + len = len > 1 && line[len - 2] == '\r' ? > + error(_("cannot handle diff headers with " > + "CR/LF line endings")) : > + parse_git_diff_header(&root, &linenr, 1, line, > + len, size, &patch); Cute (in that it tries to use a single "len < 0" for all error conditions) but moderately hard to follow. > + if (len < 0) { > + error(_("could not parse git header '%.*s'"), > + orig_len, line); > + release_patch(&patch); > + free(util); > + free(current_filename); > + string_list_clear(list, 1); > + strbuf_release(&buf); > + strbuf_release(&contents); > + strbuf_release(&long_subject); > + return -1; > + } OK. > + if (patch.old_name) > + skip_prefix(patch.old_name, "a/", > + (const char **)&patch.old_name); > + if (patch.new_name) > + skip_prefix(patch.new_name, "b/", > + (const char **)&patch.new_name); Do we only accept "-p1" patches? From time to time we seem to hear on this list from folks in communities that do not use -p1 (aka a/ and b/) convention. > + } else if (state == MBOX_IN_HEADER) { > + if (!line[0]) { OK. After seeing a block of header lines, the first empty line signals the end of the headers and we transition into a new state. > + state = MBOX_IN_COMMIT_MESSAGE; As an in-body "From:" can have have another blank line or some in-body header other than "From: " before it at the beginning of an e-mail body, I do not think this is a good code structure. I would have expected that the first blank line would transition us into a new state (in-commit-messages state) without doing anything else and in that state: - Leading blank lines are skipped, and we will stay in the same state. - From:, Subject:, Date:, etc. are interpreted as in-body headers, and we will stay in the same state, expecting more in-body headers, - Everything else will bring us into "we are now really reading the log" state (do not lose that line that made us transition into the new state---that line is the first line of the body). would happen. > + } else if (state == MBOX_IN_COMMIT_MESSAGE) { > + if (!line[0]) { > + strbuf_addch(&buf, '\n'); > + } else if (strcmp(line, "---")) { > + int tabs = 0; > + > + /* simulate tab expansion */ > + while (line[tabs] == '\t') > + tabs++; > + strbuf_addf(&buf, "%*s%s\n", > + 4 + 8 * tabs, "", line + tabs); I am not sure if special casing the empty line above is correct. I am assuming that you are pretending as if you read an "git show -s" output after applying that patch, but "git log" gives a 4-space indent even for an empty line, I think. A quick sanity check $ git show -s | cat -e on the patch I am responding to tells me that it is the case. I'll stop here, as I see Phillip saw the reading of "diff --git" output well enough to notice that diff.suppressBlankEmpty is handled correctly and I'd trust his review. Thanks.
Another thing I forgot to mention. > Let's offer a way to read those patches from pre-prepared MBox files > instead when an argument "mbox:<filename>" is passed instead of a commit > range. > ... > + const char *path; > + > + if (skip_prefix(range, "mbox:", &path)) > + return read_mbox(path, list); Shouldn't this take the prefix into account, similar to how option of OPTION_FILENAME type does using parse-options.c::fix_filename()? A test that starts "range-diff" in a subdirectory and refer to a mbox file elsewhere may be a good way to prevent such a bug from happening and regression after the change hits a release.
diff --git a/Documentation/git-range-diff.txt b/Documentation/git-range-diff.txt index 0b393715d70..e2c4661acde 100644 --- a/Documentation/git-range-diff.txt +++ b/Documentation/git-range-diff.txt @@ -37,7 +37,8 @@ There are three ways to specify the commit ranges: - `<range1> <range2>`: Either commit range can be of the form `<base>..<rev>`, `<rev>^!` or `<rev>^-<n>`. See `SPECIFYING RANGES` - in linkgit:gitrevisions[7] for more details. + in linkgit:gitrevisions[7] for more details. Alternatively, the + patches can be provided as an mbox-formatted file via `mbox:<path>`. - `<rev1>...<rev2>`. This is equivalent to `<rev2>..<rev1> <rev1>..<rev2>`. diff --git a/range-diff.c b/range-diff.c index 124dd678c38..c391ffa603b 100644 --- a/range-diff.c +++ b/range-diff.c @@ -12,6 +12,7 @@ #include "userdiff.h" #include "apply.h" #include "revision.h" +#include "dir.h" struct patch_util { /* For the search for an exact match */ @@ -26,6 +27,313 @@ struct patch_util { struct object_id oid; }; +static inline int strtost(char const *s, size_t *result, const char **end) +{ + unsigned long u; + char *p; + + errno = 0; + /* negative values would be accepted by strtoul */ + if (!isdigit(*s)) + return -1; + u = strtoul(s, &p, 10); + if (errno || p == s) + return -1; + if (result) + *result = u; + *end = p; + + return 0; +} + +static int parse_hunk_header(const char *p, + size_t *old_count, size_t *new_count, + const char **end) +{ + size_t o = 1, n = 1; + + if (!skip_prefix(p, "@@ -", &p) || + strtost(p, NULL, &p) || + /* The range is -<start>[,<count>], defaulting to count = 1 */ + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &o, &p))) || + !skip_prefix(p, " +", &p) || + strtost(p, NULL, &p) || + /* The range is +<start>[,<count>], defaulting to count = 1 */ + !(*p == ' ' || (*p == ',' && !strtost(p + 1, &n, &p))) || + !skip_prefix(p, " @@", &p)) + return -1; + + *old_count = o; + *new_count = n; + *end = p; + + return 0; +} + +/* + * This function finds the end of the line, replaces the newline character with + * a NUL, and returns the offset of the start of the next line. + * + * If no newline character was found, it returns the offset of the trailing NUL + * instead. + */ +static inline int find_next_line(const char *line, size_t size) +{ + char *eol; + + eol = memchr(line, '\n', size); + if (!eol) + return size; + + *eol = '\0'; + + return eol + 1 - line; +} + +static int read_mbox(const char *path, struct string_list *list) +{ + struct strbuf buf = STRBUF_INIT, contents = STRBUF_INIT; + struct strbuf long_subject = STRBUF_INIT; + struct patch_util *util = NULL; + enum { + MBOX_BEFORE_HEADER, + MBOX_IN_HEADER, + MBOX_IN_COMMIT_MESSAGE, + MBOX_AFTER_TRIPLE_DASH, + MBOX_IN_DIFF + } state = MBOX_BEFORE_HEADER; + char *line, *current_filename = NULL; + int len; + size_t size, old_count = 0, new_count = 0; + const char *author = NULL, *subject = NULL; + + if (!strcmp(path, "-")) { + if (strbuf_read(&contents, STDIN_FILENO, 0) < 0) + return error_errno(_("could not read stdin")); + } else if (strbuf_read_file(&contents, path, 0) < 0) + return error_errno(_("could not read '%s'"), path); + + line = contents.buf; + size = contents.len; + for (; size; size -= len, line += len) { + const char *p; + + len = find_next_line(line, size); + + if (state == MBOX_BEFORE_HEADER) { +parse_from_delimiter: + if (!skip_prefix(line, "From ", &p)) + continue; + + if (util) + BUG("util already allocated"); + util = xcalloc(1, sizeof(*util)); + if (get_oid_hex(p, &util->oid) < 0) + oidcpy(&util->oid, null_oid()); + util->matching = -1; + author = subject = NULL; + + state = MBOX_IN_HEADER; + continue; + } + + if (starts_with(line, "diff --git ")) { + struct patch patch = { 0 }; + struct strbuf root = STRBUF_INIT; + int linenr = 0; + int orig_len; + + state = MBOX_IN_DIFF; + old_count = new_count = 0; + strbuf_addch(&buf, '\n'); + if (!util->diff_offset) + util->diff_offset = buf.len; + + orig_len = len; + /* `find_next_line()`'s replaced the LF with a NUL */ + line[len - 1] = '\n'; + len = len > 1 && line[len - 2] == '\r' ? + error(_("cannot handle diff headers with " + "CR/LF line endings")) : + parse_git_diff_header(&root, &linenr, 1, line, + len, size, &patch); + if (len < 0) { + error(_("could not parse git header '%.*s'"), + orig_len, line); + release_patch(&patch); + free(util); + free(current_filename); + string_list_clear(list, 1); + strbuf_release(&buf); + strbuf_release(&contents); + strbuf_release(&long_subject); + return -1; + } + + if (patch.old_name) + skip_prefix(patch.old_name, "a/", + (const char **)&patch.old_name); + if (patch.new_name) + skip_prefix(patch.new_name, "b/", + (const char **)&patch.new_name); + + strbuf_addstr(&buf, " ## "); + if (patch.is_new) + strbuf_addf(&buf, "%s (new)", patch.new_name); + else if (patch.is_delete) + strbuf_addf(&buf, "%s (deleted)", patch.old_name); + else if (patch.is_rename) + strbuf_addf(&buf, "%s => %s", patch.old_name, patch.new_name); + else + strbuf_addstr(&buf, patch.new_name); + + free(current_filename); + if (patch.is_delete) + current_filename = xstrdup(patch.old_name); + else + current_filename = xstrdup(patch.new_name); + + if (patch.new_mode && patch.old_mode && + patch.old_mode != patch.new_mode) + strbuf_addf(&buf, " (mode change %06o => %06o)", + patch.old_mode, patch.new_mode); + + strbuf_addstr(&buf, " ##\n"); + util->diffsize++; + release_patch(&patch); + } else if (state == MBOX_IN_HEADER) { + if (!line[0]) { + state = MBOX_IN_COMMIT_MESSAGE; + /* Look for an in-body From: */ + if (skip_prefix(line + 1, "From: ", &p)) { + size -= p - line; + line += p - line; + len = find_next_line(line, size); + + while (isspace(*p)) + p++; + author = p; + } + strbuf_addstr(&buf, " ## Metadata ##\n"); + if (author) + strbuf_addf(&buf, "Author: %s\n", author); + strbuf_addstr(&buf, "\n ## Commit message ##\n"); + if (subject) + strbuf_addf(&buf, " %s\n\n", subject); + } else if (skip_prefix(line, "From: ", &p)) { + while (isspace(*p)) + p++; + author = p; + } else if (skip_prefix(line, "Subject: ", &p)) { + const char *q; + + while (isspace(*p)) + p++; + subject = p; + + if (starts_with(p, "[PATCH") && + (q = strchr(p, ']'))) { + q++; + while (isspace(*q)) + q++; + subject = q; + } + + if (len < size && line[len] == ' ') { + /* handle long subject */ + strbuf_reset(&long_subject); + strbuf_addstr(&long_subject, subject); + while (len < size && line[len] == ' ') { + line += len; + size -= len; + len = find_next_line(line, size); + strbuf_addstr(&long_subject, line); + } + subject = long_subject.buf; + } + } + } else if (state == MBOX_IN_COMMIT_MESSAGE) { + if (!line[0]) { + strbuf_addch(&buf, '\n'); + } else if (strcmp(line, "---")) { + int tabs = 0; + + /* simulate tab expansion */ + while (line[tabs] == '\t') + tabs++; + strbuf_addf(&buf, "%*s%s\n", + 4 + 8 * tabs, "", line + tabs); + } else { + /* + * Trim the trailing newline that is added + * by `format-patch`. + */ + strbuf_trim_trailing_newline(&buf); + state = MBOX_AFTER_TRIPLE_DASH; + } + } else if (state == MBOX_IN_DIFF) { + switch (line[0]) { + case '\0': /* newer GNU diff, an empty context line */ + case '+': + case '-': + case ' ': + /* A `-- ` line indicates the end of a diff */ + if (!old_count && !new_count) + break; + if (old_count && line[0] != '+') + old_count--; + if (new_count && line[0] != '-') + new_count--; + /* fallthrough */ + case '\\': + strbuf_addstr(&buf, line); + strbuf_addch(&buf, '\n'); + util->diffsize++; + continue; + case '@': + if (parse_hunk_header(line, &old_count, + &new_count, &p)) + break; + + strbuf_addstr(&buf, "@@"); + if (current_filename && *p) + strbuf_addf(&buf, " %s:", + current_filename); + strbuf_addstr(&buf, p); + strbuf_addch(&buf, '\n'); + util->diffsize++; + continue; + default: + if (old_count || new_count) + warning(_("diff ended prematurely (-%d/+%d)"), + (int)old_count, (int)new_count); + break; + } + + if (util) { + string_list_append(list, buf.buf)->util = util; + util = NULL; + strbuf_reset(&buf); + } + state = MBOX_BEFORE_HEADER; + goto parse_from_delimiter; + } + } + strbuf_release(&contents); + + if (util) { + if (state == MBOX_IN_DIFF) + string_list_append(list, buf.buf)->util = util; + else + free(util); + } + strbuf_release(&buf); + strbuf_release(&long_subject); + free(current_filename); + + return 0; +} + /* * Reads the patches into a string list, with the `util` field being populated * as struct object_id (will need to be free()d). @@ -41,6 +349,10 @@ static int read_patches(const char *range, struct string_list *list, ssize_t len; size_t size; int ret = -1; + const char *path; + + if (skip_prefix(range, "mbox:", &path)) + return read_mbox(path, list); strvec_pushl(&cp.args, "log", "--no-color", "-p", "--no-merges", "--reverse", "--date-order", "--decorate=no", @@ -424,6 +736,19 @@ static void output_pair_header(struct diff_options *diffopt, strbuf_addch(buf, ' '); pp_commit_easy(CMIT_FMT_ONELINE, commit, buf); + } else { + struct patch_util *util = b_util ? b_util : a_util; + const char *needle = "\n ## Commit message ##\n"; + const char *p = !util || !util->patch ? + NULL : strstr(util->patch, needle); + if (p) { + if (status == '!') + strbuf_addf(buf, "%s%s", color_reset, color); + + strbuf_addch(buf, ' '); + p += strlen(needle); + strbuf_add(buf, p, strchrnul(p, '\n') - p); + } } strbuf_addf(buf, "%s\n", color_reset); @@ -554,6 +879,9 @@ int show_range_diff(const char *range1, const char *range2, if (range_diff_opts->left_only && range_diff_opts->right_only) res = error(_("options '%s' and '%s' cannot be used together"), "--left-only", "--right-only"); + if (!strcmp(range1, "mbox:-") && !strcmp(range2, "mbox:-")) + res = error(_("only one mbox can be read from stdin")); + if (!res && read_patches(range1, &branch1, range_diff_opts->other_arg)) res = error(_("could not parse log for '%s'"), range1); if (!res && read_patches(range2, &branch2, range_diff_opts->other_arg)) @@ -575,10 +903,18 @@ int show_range_diff(const char *range1, const char *range2, int is_range_diff_range(const char *arg) { char *copy = xstrdup(arg); /* setup_revisions() modifies it */ - const char *argv[] = { "", copy, "--", NULL }; + const char *argv[] = { "", copy, "--", NULL }, *path; int i, positive = 0, negative = 0; struct rev_info revs; + if (skip_prefix(arg, "mbox:", &path)) { + free(copy); + if (!strcmp(path, "-") || file_exists(path)) + return 1; + error_errno(_("not an mbox: '%s'"), path); + return 0; + } + init_revisions(&revs, NULL); if (setup_revisions(3, argv, &revs, NULL) == 1) { for (i = 0; i < revs.pending.nr; i++) diff --git a/t/t3206-range-diff.sh b/t/t3206-range-diff.sh index 459beaf7d9c..89ef9a5ffc4 100755 --- a/t/t3206-range-diff.sh +++ b/t/t3206-range-diff.sh @@ -783,6 +783,33 @@ test_expect_success 'ranges with pathspecs' ' ! grep "$topic_oid" actual ' +test_expect_success 'compare range vs mbox' ' + git format-patch --stdout topic..mode-only-change >mbox && + git range-diff topic...mode-only-change >expect && + + git range-diff mode-only-change..topic mbox:./mbox >actual && + test_cmp expect actual && + + sed -e "/^From: .*/{ + h + s/.*/From: Bugs Bunny <bugs@bun.ny>/ + :1 + N + /[ -z]$/b1 + G + }" <mbox >mbox.from && + git range-diff mode-only-change..topic mbox:./mbox.from >actual.from && + test_cmp expect actual.from && + + append_cr <mbox >mbox.cr && + test_must_fail git range-diff \ + mode-only-change..topic mbox:./mbox.cr 2>err && + grep CR/LF err && + + git range-diff mode-only-change..topic mbox:- <mbox >actual.stdin && + test_cmp expect actual.stdin +' + test_expect_success 'submodule changes are shown irrespective of diff.submodule' ' git init sub-repo && test_commit -C sub-repo sub-first &&