Message ID | pull.1309.git.1659106382128.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | tests: replace mingw_test_cmp with a helper in C | expand |
Hi, On Fri, 29 Jul 2022, Johannes Schindelin via GitGitGadget wrote: > From: Johannes Schindelin <johannes.schindelin@gmx.de> > > This helper is more performant than running the `mingw_test_cmp` code > with with MSYS2's Bash. And a lot more readable. Ooops... s/with with/with/ Ciao, Dscho > > To accommodate t1050, which wants to compare files weighing in with 3MB > (falling outside of t1050's malloc limit of 1.5MB), we simply lift the > allocation limit by setting the environment variable GIT_ALLOC_LIMIT to > zero when calling the helper. > > Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> > --- > tests: replace mingw_test_cmp with a helper in C > > On the heels of sending a patch to fix a performance regression due to a > mis-use of test_cmp > [https://lore.kernel.org/git/b9203ea247776332e4b6f519aa27d541207adc2f.1659097724.git.gitgitgadget@gmail.com/], > I was curious to see whether Git for Windows had the same issue. And it > does not > [https://github.com/git-for-windows/git/runs/7556381815?check_suite_focus=true#step:5:127]: > it passes t5351 in 22 seconds, even while using test_cmp to compare pack > files > [https://github.com/git-for-windows/git/blob/3922f62f0d5991e9fe0a0817ebf89a91339c7705/t/t5351-unpack-large-objects.sh#L90]. > > The answer is of course that a test helper written in C is much faster > than writing the same in Bash, especially on Windows. This is especially > sad when said Bash code is only used on Windows. So I pulled out this > helper from the years-long effort to let Git for Windows use BusyBox' > ash to run the test suite. The result is this patch, which has been in > Git for Windows since June 2018. > > Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1309%2Fdscho%2Fmingw-test-cmp-v1 > Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1309/dscho/mingw-test-cmp-v1 > Pull-Request: https://github.com/gitgitgadget/git/pull/1309 > > Makefile | 1 + > t/helper/test-cmp.c | 73 +++++++++++++++++++++++++++++++++++++++++ > t/helper/test-tool.c | 1 + > t/helper/test-tool.h | 1 + > t/test-lib-functions.sh | 68 +------------------------------------- > t/test-lib.sh | 2 +- > 6 files changed, 78 insertions(+), 68 deletions(-) > create mode 100644 t/helper/test-cmp.c > > diff --git a/Makefile b/Makefile > index 1624471badc..45f108e43a1 100644 > --- a/Makefile > +++ b/Makefile > @@ -723,6 +723,7 @@ TEST_BUILTINS_OBJS += test-advise.o > TEST_BUILTINS_OBJS += test-bitmap.o > TEST_BUILTINS_OBJS += test-bloom.o > TEST_BUILTINS_OBJS += test-chmtime.o > +TEST_BUILTINS_OBJS += test-cmp.o > TEST_BUILTINS_OBJS += test-config.o > TEST_BUILTINS_OBJS += test-crontab.o > TEST_BUILTINS_OBJS += test-csprng.o > diff --git a/t/helper/test-cmp.c b/t/helper/test-cmp.c > new file mode 100644 > index 00000000000..1c646a54bf6 > --- /dev/null > +++ b/t/helper/test-cmp.c > @@ -0,0 +1,73 @@ > +#include "test-tool.h" > +#include "git-compat-util.h" > +#include "strbuf.h" > +#include "gettext.h" > +#include "parse-options.h" > +#include "run-command.h" > + > +#ifdef WIN32 > +#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid" > +#else > +#define NO_SUCH_DIR "/dev/null" > +#endif > + > +static int run_diff(const char *path1, const char *path2) > +{ > + const char *argv[] = { > + "diff", "--no-index", NULL, NULL, NULL > + }; > + const char *env[] = { > + "GIT_PAGER=cat", > + "GIT_DIR=" NO_SUCH_DIR, > + "HOME=" NO_SUCH_DIR, > + NULL > + }; > + > + argv[2] = path1; > + argv[3] = path2; > + return run_command_v_opt_cd_env(argv, > + RUN_COMMAND_NO_STDIN | RUN_GIT_CMD, > + NULL, env); > +} > + > +int cmd__cmp(int argc, const char **argv) > +{ > + FILE *f0, *f1; > + struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT; > + > + if (argc != 3) > + die("Require exactly 2 arguments, got %d", argc); > + > + if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"))) > + return error_errno("could not open '%s'", argv[1]); > + if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) { > + fclose(f0); > + return error_errno("could not open '%s'", argv[2]); > + } > + > + for (;;) { > + int r0 = strbuf_getline(&b0, f0); > + int r1 = strbuf_getline(&b1, f1); > + > + if (r0 == EOF) { > + fclose(f0); > + fclose(f1); > + strbuf_release(&b0); > + strbuf_release(&b1); > + if (r1 == EOF) > + return 0; > +cmp_failed: > + if (!run_diff(argv[1], argv[2])) > + die("Huh? 'diff --no-index %s %s' succeeded", > + argv[1], argv[2]); > + return 1; > + } > + if (r1 == EOF || strbuf_cmp(&b0, &b1)) { > + fclose(f0); > + fclose(f1); > + strbuf_release(&b0); > + strbuf_release(&b1); > + goto cmp_failed; > + } > + } > +} > diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c > index 318fdbab0c3..3334de248a1 100644 > --- a/t/helper/test-tool.c > +++ b/t/helper/test-tool.c > @@ -18,6 +18,7 @@ static struct test_cmd cmds[] = { > { "bitmap", cmd__bitmap }, > { "bloom", cmd__bloom }, > { "chmtime", cmd__chmtime }, > + { "cmp", cmd__cmp }, > { "config", cmd__config }, > { "crontab", cmd__crontab }, > { "csprng", cmd__csprng }, > diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h > index bb799271631..e1104898cc3 100644 > --- a/t/helper/test-tool.h > +++ b/t/helper/test-tool.h > @@ -8,6 +8,7 @@ int cmd__advise_if_enabled(int argc, const char **argv); > int cmd__bitmap(int argc, const char **argv); > int cmd__bloom(int argc, const char **argv); > int cmd__chmtime(int argc, const char **argv); > +int cmd__cmp(int argc, const char **argv); > int cmd__config(int argc, const char **argv); > int cmd__crontab(int argc, const char **argv); > int cmd__csprng(int argc, const char **argv); > diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh > index 8c44856eaec..28eddbc8e36 100644 > --- a/t/test-lib-functions.sh > +++ b/t/test-lib-functions.sh > @@ -1240,7 +1240,7 @@ test_expect_code () { > > test_cmp () { > test "$#" -ne 2 && BUG "2 param" > - eval "$GIT_TEST_CMP" '"$@"' > + GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"' > } > > # Check that the given config key has the expected value. > @@ -1541,72 +1541,6 @@ test_skip_or_die () { > error "$2" > } > > -# The following mingw_* functions obey POSIX shell syntax, but are actually > -# bash scripts, and are meant to be used only with bash on Windows. > - > -# A test_cmp function that treats LF and CRLF equal and avoids to fork > -# diff when possible. > -mingw_test_cmp () { > - # Read text into shell variables and compare them. If the results > - # are different, use regular diff to report the difference. > - local test_cmp_a= test_cmp_b= > - > - # When text came from stdin (one argument is '-') we must feed it > - # to diff. > - local stdin_for_diff= > - > - # Since it is difficult to detect the difference between an > - # empty input file and a failure to read the files, we go straight > - # to diff if one of the inputs is empty. > - if test -s "$1" && test -s "$2" > - then > - # regular case: both files non-empty > - mingw_read_file_strip_cr_ test_cmp_a <"$1" > - mingw_read_file_strip_cr_ test_cmp_b <"$2" > - elif test -s "$1" && test "$2" = - > - then > - # read 2nd file from stdin > - mingw_read_file_strip_cr_ test_cmp_a <"$1" > - mingw_read_file_strip_cr_ test_cmp_b > - stdin_for_diff='<<<"$test_cmp_b"' > - elif test "$1" = - && test -s "$2" > - then > - # read 1st file from stdin > - mingw_read_file_strip_cr_ test_cmp_a > - mingw_read_file_strip_cr_ test_cmp_b <"$2" > - stdin_for_diff='<<<"$test_cmp_a"' > - fi > - test -n "$test_cmp_a" && > - test -n "$test_cmp_b" && > - test "$test_cmp_a" = "$test_cmp_b" || > - eval "diff -u \"\$@\" $stdin_for_diff" > -} > - > -# $1 is the name of the shell variable to fill in > -mingw_read_file_strip_cr_ () { > - # Read line-wise using LF as the line separator > - # and use IFS to strip CR. > - local line > - while : > - do > - if IFS=$'\r' read -r -d $'\n' line > - then > - # good > - line=$line$'\n' > - else > - # we get here at EOF, but also if the last line > - # was not terminated by LF; in the latter case, > - # some text was read > - if test -z "$line" > - then > - # EOF, really > - break > - fi > - fi > - eval "$1=\$$1\$line" > - done > -} > - > # Like "env FOO=BAR some-program", but run inside a subshell, which means > # it also works for shell functions (though those functions cannot impact > # the environment outside of the test_env invocation). > diff --git a/t/test-lib.sh b/t/test-lib.sh > index 7726d1da88a..220c259e796 100644 > --- a/t/test-lib.sh > +++ b/t/test-lib.sh > @@ -1546,7 +1546,7 @@ case $uname_s in > test_set_prereq SED_STRIPS_CR > test_set_prereq GREP_STRIPS_CR > test_set_prereq WINDOWS > - GIT_TEST_CMP=mingw_test_cmp > + GIT_TEST_CMP="test-tool cmp" > ;; > *CYGWIN*) > test_set_prereq POSIXPERM > > base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c > -- > gitgitgadget > >
"Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com> writes: > + const char *argv[] = { > + "diff", "--no-index", NULL, NULL, NULL > + }; Don't we want to have "--" before the two paths? > + if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"))) > + return error_errno("could not open '%s'", argv[1]); > + if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) { > + fclose(f0); > + return error_errno("could not open '%s'", argv[2]); > + } It is tricky that you need to take "-" and treat it as the standard input stream in either argv[1] or argv[2] (but not both). If would be a different story in an end-user facing program, but because this is a test helper, feeding wrong input is developer's fault, and I do not mind lack of attention to detail of error checking to make sure we avoid comparing alternating lines of the standard input. > + for (;;) { > + int r0 = strbuf_getline(&b0, f0); > + int r1 = strbuf_getline(&b1, f1); > + > + if (r0 == EOF) { > + fclose(f0); > + fclose(f1); > + strbuf_release(&b0); > + strbuf_release(&b1); > + if (r1 == EOF) > + return 0; If both hit the EOF at the same time, we know they are the same, OK. > +cmp_failed: > + if (!run_diff(argv[1], argv[2])) If one of argv[] was "-", then this wouldn't work correctly, as the other file is read from the beginning but the "-" side have consumed the initial part of the input and we cannot unseek it. This bug needs to be fixed only if we expect a useful and reliable output from the helper. But otherwise the idea is sound. We compare them line by line, using strbuf_getline() to ignore differences in CRLF and LF that originates at 4d715ac0 (Windows: a test_cmp that is agnostic to random LF <> CRLF conversions, 2013-10-26). Only when we find the input different, we use "git diff --no-index" to make the difference (and unfortunately more, as it does not ignore CRLF <> LF differences) visible. > + die("Huh? 'diff --no-index %s %s' succeeded", > + argv[1], argv[2]); Nice attention to (possibly irrelevant) detail here. I would have ignored the return value and reported "they are different" at this point, though. The line-by-line comparison we did was the authoritative one, and "git diff --no-index" is merely used for human readable output. In any case, "test-tool mingwcmp" would be a better name that highlights the spirit of 4d715ac0 to ignore CRLF <> LF issues. IOW, it does a lot more than "cmp" replacement, and we shouldn't mislead users/developers into thinking it is a plain "cmp" replacement. Thanks. > diff --git a/t/test-lib.sh b/t/test-lib.sh > index 7726d1da88a..220c259e796 100644 > --- a/t/test-lib.sh > +++ b/t/test-lib.sh > @@ -1546,7 +1546,7 @@ case $uname_s in > test_set_prereq SED_STRIPS_CR > test_set_prereq GREP_STRIPS_CR > test_set_prereq WINDOWS > - GIT_TEST_CMP=mingw_test_cmp > + GIT_TEST_CMP="test-tool cmp" > ;; > *CYGWIN*) > test_set_prereq POSIXPERM > > base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c
Hi Junio, On Fri, 29 Jul 2022, Junio C Hamano wrote: > "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com> > writes: > > > + const char *argv[] = { > > + "diff", "--no-index", NULL, NULL, NULL > > + }; > > Don't we want to have "--" before the two paths? Yes! > > + if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"))) > > + return error_errno("could not open '%s'", argv[1]); > > + if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) { > > + fclose(f0); > > + return error_errno("could not open '%s'", argv[2]); > > + } > > It is tricky that you need to take "-" and treat it as the standard > input stream in either argv[1] or argv[2] (but not both). If would > be a different story in an end-user facing program, but because this > is a test helper, feeding wrong input is developer's fault, and I do > not mind lack of attention to detail of error checking to make sure > we avoid comparing alternating lines of the standard input. No, you're right, I've added a guard that prevents `test-tool cmp - -` from failing in obscure ways. > > + for (;;) { > > + int r0 = strbuf_getline(&b0, f0); > > + int r1 = strbuf_getline(&b1, f1); > > + > > + if (r0 == EOF) { > > + fclose(f0); > > + fclose(f1); > > + strbuf_release(&b0); > > + strbuf_release(&b1); > > + if (r1 == EOF) > > + return 0; > > If both hit the EOF at the same time, we know they are the same, OK. > > > +cmp_failed: > > + if (!run_diff(argv[1], argv[2])) > > If one of argv[] was "-", then this wouldn't work correctly, as the > other file is read from the beginning but the "-" side have consumed > the initial part of the input and we cannot unseek it. This bug > needs to be fixed only if we expect a useful and reliable output > from the helper. Right. I've added a clause that says that we cannot show the diff because `stdin` has been consumed already. > But otherwise the idea is sound. We compare them line by line, > using strbuf_getline() to ignore differences in CRLF and LF that > originates at 4d715ac0 (Windows: a test_cmp that is agnostic to > random LF <> CRLF conversions, 2013-10-26). Only when we find the > input different, we use "git diff --no-index" to make the difference > (and unfortunately more, as it does not ignore CRLF <> LF > differences) visible. > > > + die("Huh? 'diff --no-index %s %s' succeeded", > > + argv[1], argv[2]); > > Nice attention to (possibly irrelevant) detail here. I would have > ignored the return value and reported "they are different" at this > point, though. The line-by-line comparison we did was the > authoritative one, and "git diff --no-index" is merely used for > human readable output. > > In any case, "test-tool mingwcmp" would be a better name that > highlights the spirit of 4d715ac0 to ignore CRLF <> LF issues. IOW, > it does a lot more than "cmp" replacement, and we shouldn't mislead > users/developers into thinking it is a plain "cmp" replacement. Fair point. The Unix tool `cmp` does not care about line endings at all, so when you come from a Unix background you will expect the same to be true for `test-tool cmp`. On the other hand, you will expect the same to be true for `test_cmp`, too, which is not the case, and the root cause of why I had to come up with 32ed3314c10 (t5351: avoid using `test_cmp` for binary data, 2022-07-29). Having said that, I agree that the test tool name should reflect better what the subcommand does. I do dislike the proposed name `mingwcmp`. Not only because it is misleading, as the purpose is not to compare MINGW-specific files but instead the purpose is to compare text files (and, in fact, the tool works just fine on Linux and macOS, too). But also because it would contribute to just how much of a second-class citizen the MINGW-based build is in Git land: From choosing to implement large parts, including the entire test suite as well as the performance benchmarks, in POSIX scripts (which plays to Windows' weaknesses in a big way) to massively favoring spawned processes over multi-threading (which plays to Linux' strengths and to Windows' weaknesses), to a still-inherent assumption that the underlying filesystem is case-sensitive (think: branch names), to an implicit agreement in the core Git community that patch contributions need not take care of working well on Windows (but that that's the job "of Windows folk" instead). This is kind of at odds with the fact that we must assume that half of Git's users are Windows-based (we can only assume, based on surveys, because we successfully avoid any kind of even opt-in telemetry that would give us hard data). I definitely want to stay away from making that second-citizenry even worse. So I am going with the name `test-tool text-cmp` instead. Thank you for your review, Dscho > > Thanks. > > > diff --git a/t/test-lib.sh b/t/test-lib.sh > > index 7726d1da88a..220c259e796 100644 > > --- a/t/test-lib.sh > > +++ b/t/test-lib.sh > > @@ -1546,7 +1546,7 @@ case $uname_s in > > test_set_prereq SED_STRIPS_CR > > test_set_prereq GREP_STRIPS_CR > > test_set_prereq WINDOWS > > - GIT_TEST_CMP=mingw_test_cmp > > + GIT_TEST_CMP="test-tool cmp" > > ;; > > *CYGWIN*) > > test_set_prereq POSIXPERM > > > > base-commit: 23b219f8e3f2adfb0441e135f0a880e6124f766c >
Am 06.09.22 um 15:10 schrieb Johannes Schindelin: > Hi Junio, > > On Fri, 29 Jul 2022, Junio C Hamano wrote: > >> "Johannes Schindelin via GitGitGadget" <gitgitgadget@gmail.com> >> writes: >> >>> + const char *argv[] = { >>> + "diff", "--no-index", NULL, NULL, NULL >>> + }; >> >> Don't we want to have "--" before the two paths? > > Yes! > >>> + if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"))) >>> + return error_errno("could not open '%s'", argv[1]); >>> + if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) { >>> + fclose(f0); >>> + return error_errno("could not open '%s'", argv[2]); >>> + } >> >> It is tricky that you need to take "-" and treat it as the standard >> input stream in either argv[1] or argv[2] (but not both). If would >> be a different story in an end-user facing program, but because this >> is a test helper, feeding wrong input is developer's fault, and I do >> not mind lack of attention to detail of error checking to make sure >> we avoid comparing alternating lines of the standard input. "git diff --no-index - -" also doesn't complain, by the way. > No, you're right, I've added a guard that prevents `test-tool cmp - -` > from failing in obscure ways. > >>> + for (;;) { >>> + int r0 = strbuf_getline(&b0, f0); >>> + int r1 = strbuf_getline(&b1, f1); >>> + >>> + if (r0 == EOF) { >>> + fclose(f0); >>> + fclose(f1); >>> + strbuf_release(&b0); >>> + strbuf_release(&b1); >>> + if (r1 == EOF) >>> + return 0; >> >> If both hit the EOF at the same time, we know they are the same, OK. >> >>> +cmp_failed: >>> + if (!run_diff(argv[1], argv[2])) >> >> If one of argv[] was "-", then this wouldn't work correctly, as the >> other file is read from the beginning but the "-" side have consumed >> the initial part of the input and we cannot unseek it. This bug >> needs to be fixed only if we expect a useful and reliable output >> from the helper. > > Right. I've added a clause that says that we cannot show the diff because > `stdin` has been consumed already. > >> But otherwise the idea is sound. We compare them line by line, >> using strbuf_getline() to ignore differences in CRLF and LF that >> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to >> random LF <> CRLF conversions, 2013-10-26). Only when we find the >> input different, we use "git diff --no-index" to make the difference >> (and unfortunately more, as it does not ignore CRLF <> LF >> differences) visible. Why not use "git diff --no-index --ignore-cr-at-eol"? Do you even need to wrap it? René
René Scharfe <l.s.r@web.de> writes: > "git diff --no-index - -" also doesn't complain, by the way. True, but in this case hopefully it is worth to call it out, as both this code that uses "diff --no-index" and "diff --no-index" itself came from the same author ;-) I think "git diff --no-index - -" should just exit 0 after slurping all its input (i.e. allow it to be placed downstream of a pipe without blocking the upstream), but it is also fine to exit with 0 without reading a single byte from the standard input. Of course the latter is easier to implement ;-) >>> But otherwise the idea is sound. We compare them line by line, >>> using strbuf_getline() to ignore differences in CRLF and LF that >>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to >>> random LF <> CRLF conversions, 2013-10-26). Only when we find the >>> input different, we use "git diff --no-index" to make the difference >>> (and unfortunately more, as it does not ignore CRLF <> LF >>> differences) visible. > > Why not use "git diff --no-index --ignore-cr-at-eol"? Do you even need > to wrap it? Hmph. That surely sounds sensible if it works, and I offhand do not see why it shouldn't work.
Am 07.09.22 um 18:25 schrieb Junio C Hamano: > René Scharfe <l.s.r@web.de> writes: > >>>> But otherwise the idea is sound. We compare them line by line, >>>> using strbuf_getline() to ignore differences in CRLF and LF that >>>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to >>>> random LF <> CRLF conversions, 2013-10-26). Only when we find the >>>> input different, we use "git diff --no-index" to make the difference >>>> (and unfortunately more, as it does not ignore CRLF <> LF >>>> differences) visible. >> >> Why not use "git diff --no-index --ignore-cr-at-eol"? Do you even need >> to wrap it? > > Hmph. That surely sounds sensible if it works, and I offhand do not > see why it shouldn't work. Using git diff in test_cmp and using test_cmp to check if git diff works would become a cyclical dependency. Only doing that on one platform limits the potential blind spot to platform-specific bugs, though. Enough to go wrapper-less? Not sure. René
On Thu, Sep 08 2022, René Scharfe wrote: > Am 07.09.22 um 18:25 schrieb Junio C Hamano: >> René Scharfe <l.s.r@web.de> writes: >> >>>>> But otherwise the idea is sound. We compare them line by line, >>>>> using strbuf_getline() to ignore differences in CRLF and LF that >>>>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to >>>>> random LF <> CRLF conversions, 2013-10-26). Only when we find the >>>>> input different, we use "git diff --no-index" to make the difference >>>>> (and unfortunately more, as it does not ignore CRLF <> LF >>>>> differences) visible. >>> >>> Why not use "git diff --no-index --ignore-cr-at-eol"? Do you even need >>> to wrap it? >> >> Hmph. That surely sounds sensible if it works, and I offhand do not >> see why it shouldn't work. > > Using git diff in test_cmp and using test_cmp to check if git diff works > would become a cyclical dependency. Only doing that on one platform > limits the potential blind spot to platform-specific bugs, though. > Enough to go wrapper-less? Not sure. I don't see how being wrapper-less is less of a "cyclical dependency" than using "git diff" directly. If we are to postulate some bug where "git diff" thwarts us for the use of "test_cmp" it's going to be *very* broken. I don't see how such a "git diff" would pass the rest of the test suite (some of which involves comparing its exact output), but still be functional enough to work as a GIT_TEST_CMP. Even one where it just returns 0 unconditionally wouldn't pass, as we rely on "! test_cmp" in some cases. And any such breakage we imagine might just as well affect a wrapper for it, and I'd think that would be the more likely of two unlikely possibilities, as that wrapper would only be used for the test suite, whereas "git diff" is more widely tested. In any case, as long as we preserve the ability to set a GIT_TEST_CMP=cmp we can sanity check any such wrapper or dogfodding of "git diff" with an external program. Needing to deal just with "git diff" and "cmp" would be a step forward, as we'd be able to drop the current shellscript "mingw" wrapper, as well as the special support for a "diff" that doesn't understand "-u".
Hi Junio, On Wed, 7 Sep 2022, Junio C Hamano wrote: > René Scharfe <l.s.r@web.de> writes: > > > "git diff --no-index - -" also doesn't complain, by the way. > > True, but in this case hopefully it is worth to call it out, as both > this code that uses "diff --no-index" and "diff --no-index" itself > came from the same author ;-) > > I think "git diff --no-index - -" should just exit 0 after slurping > all its input (i.e. allow it to be placed downstream of a pipe > without blocking the upstream), but it is also fine to exit with 0 > without reading a single byte from the standard input. Of course > the latter is easier to implement ;-) > > >>> But otherwise the idea is sound. We compare them line by line, > >>> using strbuf_getline() to ignore differences in CRLF and LF that > >>> originates at 4d715ac0 (Windows: a test_cmp that is agnostic to > >>> random LF <> CRLF conversions, 2013-10-26). Only when we find the > >>> input different, we use "git diff --no-index" to make the difference > >>> (and unfortunately more, as it does not ignore CRLF <> LF > >>> differences) visible. > > > > Why not use "git diff --no-index --ignore-cr-at-eol"? Do you even need > > to wrap it? > > Hmph. That surely sounds sensible if it works, and I offhand do not > see why it shouldn't work. Is this a reversal of the stance you took in your reply in https://lore.kernel.org/git/7vps7xrfxa.fsf@assigned-by-dhcp.cox.net/ to my suggestion to replace `cmp` by `diff --no-index` (in that mail referred to as patch [7/8])? If I recall correctly, you clarified outside of that thread that "I do not think it is a good enough reason to make the tests slower" was you being concerned about employing the entire diff machinery instead of doing a simple byte-for-byte comparison. And while it is no longer _that_ simple a comparison (it now special-handles Carriage Returns), the speed and simplicity concern is still valid: `test-tool text-cmp` is vastly simpler (and provably faster) than `diff --no-index`. Just because it is easier to review a one-liner to switch from essentially `cmp` to `git diff --no-index --ignore-cr-at-eol` does not mean that it is reasonable: it would cause us to blast out that much more CO2, just for our one-time convenience. Or for that matter, it would willfully slow down the `windows-test` jobs that already is mired by sloooow performance (mostly due to running a shell script-based test suite). Can you please let me make the Windows situation better rather than worse? Thank you, Dscho
Johannes Schindelin <Johannes.Schindelin@gmx.de> writes: >> > Why not use "git diff --no-index --ignore-cr-at-eol"? Do you even need >> > to wrap it? >> >> Hmph. That surely sounds sensible if it works, and I offhand do not >> see why it shouldn't work. > > Is this a reversal of the stance you took in your reply in > https://lore.kernel.org/git/7vps7xrfxa.fsf@assigned-by-dhcp.cox.net/ to my > suggestion to replace `cmp` by `diff --no-index` (in that mail referred to > as patch [7/8])? cox.net? It is a lifetime ago and the world has changed. Hopefully that "diff --no-index" has matured a lot to earn more confidence by us than it had back then. > If I recall correctly, you clarified outside of that thread that "I do not > think it is a good enough reason to make the tests slower" was you being > concerned about employing the entire diff machinery instead of doing a > simple byte-for-byte comparison. Is it still relevant, now that we are talking about text-cmp that ignores cr-at-eol, to bring a random remark about byte-for-byte comparison from more than 10 years ago? > Just because it is easier to review a one-liner to switch from essentially > `cmp` to `git diff --no-index --ignore-cr-at-eol` does not mean that it is > reasonable: it would cause us to blast out that much more CO2, just for > our one-time convenience. Measurement, in tons per year, is needed, or please stop talking about CO2. It is not that funny. Developer cycle time is easier to measure and more meaningful. It would be much faster to run a byte-for-byte-with-ignore-cr-at-eol comparison than running "diff --no-index --ignore-cr-at-eol" on files with meaningful sizes. But comparison between "expect" and "actual", which are typically at most several lines long? Wouldn't the overhead to spawn a process dwarf everything else, especially on Windows, whether it is your byte-for-byte-with-ignore-cr-at-eol program or "git diff"?
diff --git a/Makefile b/Makefile index 1624471badc..45f108e43a1 100644 --- a/Makefile +++ b/Makefile @@ -723,6 +723,7 @@ TEST_BUILTINS_OBJS += test-advise.o TEST_BUILTINS_OBJS += test-bitmap.o TEST_BUILTINS_OBJS += test-bloom.o TEST_BUILTINS_OBJS += test-chmtime.o +TEST_BUILTINS_OBJS += test-cmp.o TEST_BUILTINS_OBJS += test-config.o TEST_BUILTINS_OBJS += test-crontab.o TEST_BUILTINS_OBJS += test-csprng.o diff --git a/t/helper/test-cmp.c b/t/helper/test-cmp.c new file mode 100644 index 00000000000..1c646a54bf6 --- /dev/null +++ b/t/helper/test-cmp.c @@ -0,0 +1,73 @@ +#include "test-tool.h" +#include "git-compat-util.h" +#include "strbuf.h" +#include "gettext.h" +#include "parse-options.h" +#include "run-command.h" + +#ifdef WIN32 +#define NO_SUCH_DIR "\\\\.\\GLOBALROOT\\invalid" +#else +#define NO_SUCH_DIR "/dev/null" +#endif + +static int run_diff(const char *path1, const char *path2) +{ + const char *argv[] = { + "diff", "--no-index", NULL, NULL, NULL + }; + const char *env[] = { + "GIT_PAGER=cat", + "GIT_DIR=" NO_SUCH_DIR, + "HOME=" NO_SUCH_DIR, + NULL + }; + + argv[2] = path1; + argv[3] = path2; + return run_command_v_opt_cd_env(argv, + RUN_COMMAND_NO_STDIN | RUN_GIT_CMD, + NULL, env); +} + +int cmd__cmp(int argc, const char **argv) +{ + FILE *f0, *f1; + struct strbuf b0 = STRBUF_INIT, b1 = STRBUF_INIT; + + if (argc != 3) + die("Require exactly 2 arguments, got %d", argc); + + if (!(f0 = !strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"))) + return error_errno("could not open '%s'", argv[1]); + if (!(f1 = !strcmp(argv[2], "-") ? stdin : fopen(argv[2], "r"))) { + fclose(f0); + return error_errno("could not open '%s'", argv[2]); + } + + for (;;) { + int r0 = strbuf_getline(&b0, f0); + int r1 = strbuf_getline(&b1, f1); + + if (r0 == EOF) { + fclose(f0); + fclose(f1); + strbuf_release(&b0); + strbuf_release(&b1); + if (r1 == EOF) + return 0; +cmp_failed: + if (!run_diff(argv[1], argv[2])) + die("Huh? 'diff --no-index %s %s' succeeded", + argv[1], argv[2]); + return 1; + } + if (r1 == EOF || strbuf_cmp(&b0, &b1)) { + fclose(f0); + fclose(f1); + strbuf_release(&b0); + strbuf_release(&b1); + goto cmp_failed; + } + } +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 318fdbab0c3..3334de248a1 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -18,6 +18,7 @@ static struct test_cmd cmds[] = { { "bitmap", cmd__bitmap }, { "bloom", cmd__bloom }, { "chmtime", cmd__chmtime }, + { "cmp", cmd__cmp }, { "config", cmd__config }, { "crontab", cmd__crontab }, { "csprng", cmd__csprng }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index bb799271631..e1104898cc3 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -8,6 +8,7 @@ int cmd__advise_if_enabled(int argc, const char **argv); int cmd__bitmap(int argc, const char **argv); int cmd__bloom(int argc, const char **argv); int cmd__chmtime(int argc, const char **argv); +int cmd__cmp(int argc, const char **argv); int cmd__config(int argc, const char **argv); int cmd__crontab(int argc, const char **argv); int cmd__csprng(int argc, const char **argv); diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 8c44856eaec..28eddbc8e36 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1240,7 +1240,7 @@ test_expect_code () { test_cmp () { test "$#" -ne 2 && BUG "2 param" - eval "$GIT_TEST_CMP" '"$@"' + GIT_ALLOC_LIMIT=0 eval "$GIT_TEST_CMP" '"$@"' } # Check that the given config key has the expected value. @@ -1541,72 +1541,6 @@ test_skip_or_die () { error "$2" } -# The following mingw_* functions obey POSIX shell syntax, but are actually -# bash scripts, and are meant to be used only with bash on Windows. - -# A test_cmp function that treats LF and CRLF equal and avoids to fork -# diff when possible. -mingw_test_cmp () { - # Read text into shell variables and compare them. If the results - # are different, use regular diff to report the difference. - local test_cmp_a= test_cmp_b= - - # When text came from stdin (one argument is '-') we must feed it - # to diff. - local stdin_for_diff= - - # Since it is difficult to detect the difference between an - # empty input file and a failure to read the files, we go straight - # to diff if one of the inputs is empty. - if test -s "$1" && test -s "$2" - then - # regular case: both files non-empty - mingw_read_file_strip_cr_ test_cmp_a <"$1" - mingw_read_file_strip_cr_ test_cmp_b <"$2" - elif test -s "$1" && test "$2" = - - then - # read 2nd file from stdin - mingw_read_file_strip_cr_ test_cmp_a <"$1" - mingw_read_file_strip_cr_ test_cmp_b - stdin_for_diff='<<<"$test_cmp_b"' - elif test "$1" = - && test -s "$2" - then - # read 1st file from stdin - mingw_read_file_strip_cr_ test_cmp_a - mingw_read_file_strip_cr_ test_cmp_b <"$2" - stdin_for_diff='<<<"$test_cmp_a"' - fi - test -n "$test_cmp_a" && - test -n "$test_cmp_b" && - test "$test_cmp_a" = "$test_cmp_b" || - eval "diff -u \"\$@\" $stdin_for_diff" -} - -# $1 is the name of the shell variable to fill in -mingw_read_file_strip_cr_ () { - # Read line-wise using LF as the line separator - # and use IFS to strip CR. - local line - while : - do - if IFS=$'\r' read -r -d $'\n' line - then - # good - line=$line$'\n' - else - # we get here at EOF, but also if the last line - # was not terminated by LF; in the latter case, - # some text was read - if test -z "$line" - then - # EOF, really - break - fi - fi - eval "$1=\$$1\$line" - done -} - # Like "env FOO=BAR some-program", but run inside a subshell, which means # it also works for shell functions (though those functions cannot impact # the environment outside of the test_env invocation). diff --git a/t/test-lib.sh b/t/test-lib.sh index 7726d1da88a..220c259e796 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1546,7 +1546,7 @@ case $uname_s in test_set_prereq SED_STRIPS_CR test_set_prereq GREP_STRIPS_CR test_set_prereq WINDOWS - GIT_TEST_CMP=mingw_test_cmp + GIT_TEST_CMP="test-tool cmp" ;; *CYGWIN*) test_set_prereq POSIXPERM