Message ID | 20210112022703.1884-2-worldhello.net@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 9901164d81dfc2e050860f7dd60e5590c3cfaa50 |
Headers | show |
Series | improvements for git-bundle | expand |
On Mon, Jan 11 2021, Jiang Xin wrote: > From: Jiang Xin <zhiyou.jx@alibaba-inc.com> > > Move git-bundle related functions from t5510 to a library, and this lib > will be shared with a new testcase t6020 which finds a known breakage of > "git-bundle". > [...] > + > +# Format the output of git commands to make a user-friendly and stable > +# text. We can easily prepare the expect text without having to worry > +# about future changes of the commit ID and spaces of the output. > +make_user_friendly_and_stable_output () { > + sed \ > + -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \ > + -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \ > + -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \ > + -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \ > + -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \ > + -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \ > + -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \ > + -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \ > + -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \ > + -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \ > + -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \ > + -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \ > + -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \ > + -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \ > + -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \ > + -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \ > + -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \ > + -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \ > + -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \ > + -e "s/ *\$//" > +} On one of the gcc farm boxes, a i386 box (gcc45) this fails because sed gets killed after >500MB of memory use (I was just eyeballing it in htop) on the "reate bundle from special rev: main^!" test. This with GNU sed 4.2.2. I suspect this regex pattern creates some runaway behavior in sed that's since been fixed (or maybe it's the glibc regex engine?). The glibc is 2.19-18+deb8u10: + git bundle list-heads special-rev.bdl + make_user_friendly_and_stable_output + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e s/[0-9a-f]*/<TAG-3>/g -e s/ *$// sed: couldn't re-allocate memory
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 上午2:51写道: > > > On Mon, Jan 11 2021, Jiang Xin wrote: > > > From: Jiang Xin <zhiyou.jx@alibaba-inc.com> > > > > Move git-bundle related functions from t5510 to a library, and this > > lib > > will be shared with a new testcase t6020 which finds a known > > breakage of > > "git-bundle". > > [...] > > + > > +# Format the output of git commands to make a user-friendly and > > stable > > +# text. We can easily prepare the expect text without having to > > worry > > +# about future changes of the commit ID and spaces of the output. > > +make_user_friendly_and_stable_output () { > > + sed \ > > + -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \ > > + -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \ > > + -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \ > > + -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \ > > + -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \ > > + -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \ > > + -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \ > > + -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \ > > + -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \ > > + -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \ > > + -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \ > > + -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \ > > + -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \ > > + -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \ > > + -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \ > > + -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \ > > + -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \ > > + -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \ > > + -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \ > > + -e "s/ *\$//" > > +} > > On one of the gcc farm boxes, a i386 box (gcc45) this fails because > sed > gets killed after >500MB of memory use (I was just eyeballing it in > htop) on the "reate bundle from special rev: main^!" test. This with > GNU > sed 4.2.2. > > I suspect this regex pattern creates some runaway behavior in sed > that's > since been fixed (or maybe it's the glibc regex engine?). The glibc is > 2.19-18+deb8u10: > > + git bundle list-heads special-rev.bdl > + make_user_friendly_and_stable_output > + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e > s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e > s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e > s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e > s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e > s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e > s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e > s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e > s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e > s/[0-9a-f]*/<TAG-3>/g -e s/ *$// > sed: couldn't re-allocate memory I wrote a program on macOS to check memory footprint for sed and perl. See: https://github.com/jiangxin/compare-sed-perl Test result: $ go build && ./compare-sed-perl Command: sed ..., MaxRSS: 901120 Command: gsed ..., MaxRSS: 2056192 Command: perl ..., MaxRSS: 2269184 It seems that sed (both the builtin version on macOS and GNU sed v4.8) has less memory consumed than perl. Can you run this program on the i386 box (gcc45) to check memory consumed by sed and perl? If this issue can be resolved by replacing sed with perl, the following patch may help: --- >8 --- From: Jiang Xin <zhiyou.jx@alibaba-inc.com> Date: Thu, 27 May 2021 14:31:49 +0800 Subject: [PATCH] test: use perl for complex text replacement Ævar reported that the function `make_user_friendly_and_stable_output()` failed on a i386 box (gcc45) in the gcc farm boxes with error: sed: couldn't re-allocate memory It seems that sed (GNU sed 4.2.2) gets killed after >500MB of memory use on the "create bundle from special rev: main^!" test. Call perl instead of sed for complex text replacement. Reported-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com> --- t/t5411/common-functions.sh | 27 ++++++++++++------------ t/t5548-push-porcelain.sh | 20 +++++++++--------- t/t6020-bundle-misc.sh | 42 ++++++++++++++++++------------------- 3 files changed, 45 insertions(+), 44 deletions(-) diff --git a/t/t5411/common-functions.sh b/t/t5411/common-functions.sh index 6694858e18..b6d33bdfdc 100644 --- a/t/t5411/common-functions.sh +++ b/t/t5411/common-functions.sh @@ -39,19 +39,20 @@ create_commits_in () { # remove some locale error messages. The emitted human-readable errors are # redundant to the more machine-readable output the tests already assert. make_user_friendly_and_stable_output () { - sed \ - -e "s/ *\$//" \ - -e "s/ */ /g" \ - -e "s/'/\"/g" \ - -e "s/ / /g" \ - -e "s/$A/<COMMIT-A>/g" \ - -e "s/$B/<COMMIT-B>/g" \ - -e "s/$TAG/<TAG-v123>/g" \ - -e "s/$ZERO_OID/<ZERO-OID>/g" \ - -e "s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g" \ - -e "s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g" \ - -e "s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#" \ - -e "/^error: / d" + perl -ne " + s/ *\$//; + s/ */ /g; + s/'/\"/g; + s/ / /g; + s/$A/<COMMIT-A>/g; + s/$B/<COMMIT-B>/g; + s/$TAG/<TAG-v123>/g; + s/$ZERO_OID/<ZERO-OID>/g; + s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g; + s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g; + s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#; + next if /^error: .*$/; + print" } filter_out_user_friendly_and_stable_output () { diff --git a/t/t5548-push-porcelain.sh b/t/t5548-push-porcelain.sh index 5a761f3642..95e216973d 100755 --- a/t/t5548-push-porcelain.sh +++ b/t/t5548-push-porcelain.sh @@ -44,16 +44,16 @@ create_commits_in () { # without having to worry about future changes of the commit ID and spaces # of the output. make_user_friendly_and_stable_output () { - sed \ - -e "s/ *\$//" \ - -e "s/ */ /g" \ - -e "s/ / /g" \ - -e "s/$A/<COMMIT-A>/g" \ - -e "s/$B/<COMMIT-B>/g" \ - -e "s/$ZERO_OID/<ZERO-OID>/g" \ - -e "s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g" \ - -e "s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g" \ - -e "s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#" + perl -pe " + s/ *\$//; + s/ */ /g; + s/ / /g; + s/$A/<COMMIT-A>/g; + s/$B/<COMMIT-B>/g; + s/$ZERO_OID/<ZERO-OID>/g; + s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g; + s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g; + s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#" } setup_upstream_and_workbench () { diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh index 881f72fd44..f284be820f 100755 --- a/t/t6020-bundle-misc.sh +++ b/t/t6020-bundle-misc.sh @@ -84,27 +84,27 @@ test_commit_setvar () { # text. We can easily prepare the expect text without having to worry # about future changes of the commit ID and spaces of the output. make_user_friendly_and_stable_output () { - sed \ - -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \ - -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \ - -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \ - -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \ - -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \ - -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \ - -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \ - -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \ - -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \ - -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \ - -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \ - -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \ - -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \ - -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \ - -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \ - -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \ - -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \ - -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \ - -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \ - -e "s/ *\$//" + perl -pe " + s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g; + s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g; + s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g; + s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g; + s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g; + s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g; + s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g; + s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g; + s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g; + s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g; + s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g; + s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g; + s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g; + s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g; + s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g; + s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g; + s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g; + s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g; + s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g; + s/ *\$//" } # (C) (D, pull/1/head, topic/1)
On Thu, May 27 2021, Jiang Xin wrote: > Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 > 上午2:51写道: >> >> >> On Mon, Jan 11 2021, Jiang Xin wrote: >> >> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com> >> > >> > Move git-bundle related functions from t5510 to a library, and this >> > lib >> > will be shared with a new testcase t6020 which finds a known >> > breakage of >> > "git-bundle". >> > [...] >> > + >> > +# Format the output of git commands to make a user-friendly and >> > stable >> > +# text. We can easily prepare the expect text without having to >> > worry >> > +# about future changes of the commit ID and spaces of the output. >> > +make_user_friendly_and_stable_output () { >> > + sed \ >> > + -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \ >> > + -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \ >> > + -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \ >> > + -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \ >> > + -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \ >> > + -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \ >> > + -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \ >> > + -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \ >> > + -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \ >> > + -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \ >> > + -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \ >> > + -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \ >> > + -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \ >> > + -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \ >> > + -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \ >> > + -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \ >> > + -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \ >> > + -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \ >> > + -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \ >> > + -e "s/ *\$//" >> > +} >> >> On one of the gcc farm boxes, a i386 box (gcc45) this fails because >> sed >> gets killed after >500MB of memory use (I was just eyeballing it in >> htop) on the "reate bundle from special rev: main^!" test. This with >> GNU >> sed 4.2.2. >> >> I suspect this regex pattern creates some runaway behavior in sed >> that's >> since been fixed (or maybe it's the glibc regex engine?). The glibc is >> 2.19-18+deb8u10: >> >> + git bundle list-heads special-rev.bdl >> + make_user_friendly_and_stable_output >> + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e >> s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e >> s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e >> s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e >> s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e >> s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e >> s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e >> s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e >> s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e >> s/[0-9a-f]*/<TAG-3>/g -e s/ *$// >> sed: couldn't re-allocate memory > > I wrote a program on macOS to check memory footprint for sed and perl. > See: > > https://github.com/jiangxin/compare-sed-perl Interesting use of Go for as a /usr/bin/time -v replacement :) After changing your int64 to int32 and digging up how to cross-compile Go I get similar results, it's because your test has actual short SHA-1s in the "-e 's///g'"'s, but notice how in the trace I have it's e.g. "s/[0-9a-f]*/<COMMIT-A>/g". That's the problem, so that Go command won't reproduce it. Anyway, changing the test to emit to "input" first and running this shows it: avar@gcc45:/run/user/1632/git/t/trash directory.t6020-bundle-misc$ /usr/bin/time -v sed -e 's/[0-9a-f]*/<COMMIT-A>/g' -e 's/[0-9a-f]*/<COMMIT-B>/g' -e 's/[0-9a-f]*/<COMMIT-C>/g' -e 's/[0-9a-f]*/<COMMIT-D>/g' -e 's/[0-9a-f]*/<COMMIT-E>/g' -e 's/[0-9a-f]*/<COMMIT-F>/g' -e 's/[0-9a-f]*/<COMMIT-G>/g' -e 's/[0-9a-f]*/<COMMIT-H>/g' -e 's/[0-9a-f]*/<COMMIT-I>/g' -e 's/[0-9a-f]*/<COMMIT-J>/g' -e 's/[0-9a-f]*/<COMMIT-K>/g' -e 's/[0-9a-f]*/<COMMIT-L>/g' -e 's/[0-9a-f]*/<COMMIT-M>/g' -e 's/[0-9a-f]*/<COMMIT-N>/g' -e 's/[0-9a-f]*/<COMMIT-O>/g' -e 's/[0-9a-f]*/<COMMIT-P>/g' -e 's/[0-9a-f]*/<TAG-1>/g' -e 's/[0-9a-f]*/<TAG-2>/g' -e 's/[0-9a-f]*/<TAG-3>/g' -e 's/ *$//' <input sed: couldn't re-allocate memory Command exited with non-zero status 4 Command being timed: "sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e s/[0-9a-f]*/<TAG-3>/g -e s/ *$//" User time (seconds): 130.00 System time (seconds): 2.42 Percent of CPU this job got: 100% Elapsed (wall clock) time (h:mm:ss or m:ss): 2:12.41 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 1030968 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 257333 Voluntary context switches: 1 Involuntary context switches: 12578 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 4 But no, the issue as it turns out is not Perl v.s. Sed, it's that there's some bug in the shellscript / tooling version (happens with both dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions like ${A%${A#??????0?}} resolve to nothing. So if we make that: cat >input && cat input >&2 && sed -e "s/${A%${A#??????0?}}[0-9a-f]*/<COMMIT-A>/g" <input >input.tmp && mv input.tmp input && cat input >&2 && sed -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" <input >input.tmp && mv input.tmp input && cat input >&2 && We get things like: + sed -e s/[0-9a-f]*/<COMMIT-A>/g + mv input.tmp input + cat input <COMMIT-A> <COMMIT-A>r<COMMIT-A>s<COMMIT-A>/<COMMIT-A>h<COMMIT-A>s<COMMIT-A>/<COMMIT-A>m<COMMIT-A>i<COMMIT-A>n<COMMIT-A> + sed -e s/[0-9a-f]*/<COMMIT-B>/g + mv input.tmp input + cat input <COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B> <COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>r<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>s<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>/<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>h<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>s<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>/<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>m<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>i<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B>n<COMMIT-B><<COMMIT-B>C<COMMIT-B>O<COMMIT-B>M<COMMIT-B>M<COMMIT-B>I<COMMIT-B>T<COMMIT-B>-<COMMIT-B>A<COMMIT-B>><COMMIT-B> [...] etc. I.e. it's the sed expression itself that's the issue. I.e. you should be able to reproduce this locally with something like: echo 0 | sed -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' -e 's/[0-9]*/<BEGIN>0<END>/g' If not just copy the -e a few more times. Anyway, looking at this whole test file with fresh eyes this pattern seems very strange. You duplicated most of test_commit with this test_commit_setvar. It's a bit more verbosity but why not just use: test_commit ... A=$(git rev-parse HEAD) Or teach test_commit a --rev-parse option or something and: A=$(test_commit ...) This make_user_friendly_and_stable_output then actually loses information, e.g. sometimes the bundle output you're testing emits trailing spaces, but the normalization function overzelously trims that. I think this whole thing would be much simpler with the above and then something like: @@ -146,7 +126,8 @@ test_expect_success 'setup' ' # branch main: merge commit I & J git checkout main && - test_commit_setvar --merge I topic/1 "Merge commit I" && + git merge --no-edit --no-ff -m"Merge commit I" topic/1 && + I=$(git rev-parse HEAD) && test_commit_setvar --merge J refs/pull/2/head "Merge commit J" && # branch main: commit K @@ -172,18 +153,18 @@ test_expect_success 'create bundle from special rev: main^!' ' git bundle list-heads special-rev.bdl | make_user_friendly_and_stable_output >actual && - cat >expect <<-\EOF && - <COMMIT-P> refs/heads/main + cat >expect <<-EOF && + $P refs/heads/main EOF test_cmp expect actual && Or just add a --merge option to test_commit itself.
On Thu, May 27, 2021 at 02:19:04PM +0200, Ævar Arnfjörð Bjarmason wrote: > Anyway, looking at this whole test file with fresh eyes this pattern > seems very strange. You duplicated most of test_commit with this > test_commit_setvar. It's a bit more verbosity but why not just use: > > test_commit ... > A=$(git rev-parse HEAD) > > Or teach test_commit a --rev-parse option or something and: > > A=$(test_commit ...) The latter would be nice (and maybe even could be the default, as the stdout is usually just going to --verbose output anyway). But I don't think it works, because the $() is a subshell, so we lose the side effect of test_tick incrementing the timestamp counter. I wasn't following this thread carefully, but your suggestion to just do "A=$(git rev-parse HEAD)" seems quite readable to me. -Peff
Ævar Arnfjörð Bjarmason wrote: > > On Thu, May 27 2021, Jiang Xin wrote: > > > Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 > > 上午2:51写道: > >> > >> > >> On Mon, Jan 11 2021, Jiang Xin wrote: > >> > >> > From: Jiang Xin <zhiyou.jx@alibaba-inc.com> > >> > > >> > Move git-bundle related functions from t5510 to a library, and this > >> > lib > >> > will be shared with a new testcase t6020 which finds a known > >> > breakage of > >> > "git-bundle". > >> > [...] > >> > + > >> > +# Format the output of git commands to make a user-friendly and > >> > stable > >> > +# text. We can easily prepare the expect text without having to > >> > worry > >> > +# about future changes of the commit ID and spaces of the output. > >> > +make_user_friendly_and_stable_output () { > >> > + sed \ > >> > + -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \ > >> > + -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \ > >> > + -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \ > >> > + -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \ > >> > + -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \ > >> > + -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \ > >> > + -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \ > >> > + -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \ > >> > + -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \ > >> > + -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \ > >> > + -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \ > >> > + -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \ > >> > + -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \ > >> > + -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \ > >> > + -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \ > >> > + -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \ > >> > + -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \ > >> > + -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \ > >> > + -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \ > >> > + -e "s/ *\$//" > >> > +} > >> > >> On one of the gcc farm boxes, a i386 box (gcc45) this fails because > >> sed > >> gets killed after >500MB of memory use (I was just eyeballing it in > >> htop) on the "reate bundle from special rev: main^!" test. This with > >> GNU > >> sed 4.2.2. > >> > >> I suspect this regex pattern creates some runaway behavior in sed > >> that's > >> since been fixed (or maybe it's the glibc regex engine?). The glibc is > >> 2.19-18+deb8u10: > >> > >> + git bundle list-heads special-rev.bdl > >> + make_user_friendly_and_stable_output > >> + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e > >> s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e > >> s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e > >> s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e > >> s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e > >> s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e > >> s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e > >> s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e > >> s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e > >> s/[0-9a-f]*/<TAG-3>/g -e s/ *$// > >> sed: couldn't re-allocate memory > > > > I wrote a program on macOS to check memory footprint for sed and perl. > > See: > > > > https://github.com/jiangxin/compare-sed-perl > > Interesting use of Go for as a /usr/bin/time -v replacement :) Here's a Ruby version: https://dpaste.com/FYT2QKHJE I'm not sure if will be useful in this particular case, but Ruby code always ends up simpler ;)
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 下午8:49写道: > > But no, the issue as it turns out is not Perl v.s. Sed, it's that > there's some bug in the shellscript / tooling version (happens with both > dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions > like ${A%${A#??????0?}} resolve to nothing. That's the root cause. It can be reproduced by running the following test script: ``` #!/bin/sh # test script: test.sh test_commit_setvar () { var=$1 && oid=1234567890123456789012345678901234567890 && eval $var=$oid } test_commit_setvar A echo "A: $A" echo "Abbrev of A: ${A%${A#???????}}" ``` By running different version of dash, we can see that dash 0.5.7 fail the test: ``` $ /opt/dash/0.5.11/bin/dash test.sh A: 1234567890123456789012345678901234567890 Abbrev of A: 1234567 $ /opt/dash/0.5.7/bin/dash test.sh A: 1234567890123456789012345678901234567890 Abbrev of A: ``` This issue can be fixed using the following example: ``` #!/bin/sh test_commit_setvar () { var=$1 && oid=1234567890123456789012345678901234567890 && suffix=${oid#???????} && oid=${oid%$suffix} && eval $var=$oid } test_commit_setvar A echo "Abbrev of A: $A" ``` > Anyway, looking at this whole test file with fresh eyes this pattern > seems very strange. You duplicated most of test_commit with this > test_commit_setvar. It's a bit more verbosity but why not just use: > > test_commit ... > A=$(git rev-parse HEAD) The function "test_commit()" in "test-lib-function.sh" always creates tags and it cannot make merge commit. So I rewrite a new function which reuse the scaffold of "test_commit". BTW, sorry for the late reply, will send patch later. -- Jiang Xin
Felipe Contreras <felipe.contreras@gmail.com> 于2021年5月28日周五 上午3:19写道: > > Ævar Arnfjörð Bjarmason wrote: > > > > On Thu, May 27 2021, Jiang Xin wrote: > > > I wrote a program on macOS to check memory footprint for sed and perl. > > > See: > > > > > > https://github.com/jiangxin/compare-sed-perl > > > > Interesting use of Go for as a /usr/bin/time -v replacement :) > > Here's a Ruby version: > https://dpaste.com/FYT2QKHJE Nice, it's much simpler. > I'm not sure if will be useful in this particular case, but Ruby code > always ends up simpler ;) > > -- > Felipe Contreras
On Tue, Jun 01 2021, Jiang Xin wrote: > Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 下午8:49写道: >> >> But no, the issue as it turns out is not Perl v.s. Sed, it's that >> there's some bug in the shellscript / tooling version (happens with both >> dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions >> like ${A%${A#??????0?}} resolve to nothing. > > That's the root cause. It can be reproduced by running the following > test script: > > ``` > #!/bin/sh > # test script: test.sh > > test_commit_setvar () { > var=$1 && > oid=1234567890123456789012345678901234567890 && > eval $var=$oid > } > > test_commit_setvar A > echo "A: $A" > echo "Abbrev of A: ${A%${A#???????}}" > ``` > > By running different version of dash, we can see that dash 0.5.7 fail the test: > > ``` > $ /opt/dash/0.5.11/bin/dash test.sh > A: 1234567890123456789012345678901234567890 > Abbrev of A: 1234567 > > $ /opt/dash/0.5.7/bin/dash test.sh > A: 1234567890123456789012345678901234567890 > Abbrev of A: > ``` > > This issue can be fixed using the following example: > > ``` > #!/bin/sh > > test_commit_setvar () { > var=$1 && > oid=1234567890123456789012345678901234567890 && > suffix=${oid#???????} && > oid=${oid%$suffix} && > eval $var=$oid > } > > test_commit_setvar A > echo "Abbrev of A: $A" > ``` *nod* >> Anyway, looking at this whole test file with fresh eyes this pattern >> seems very strange. You duplicated most of test_commit with this >> test_commit_setvar. It's a bit more verbosity but why not just use: >> >> test_commit ... >> A=$(git rev-parse HEAD) > > The function "test_commit()" in "test-lib-function.sh" always creates > tags and it cannot make merge commit. So I rewrite a new function > which reuse the scaffold of "test_commit". It's had a --no-tag since 3803a3a099 (t: add --no-tag option to test_commit, 2021-02-09). I also have patches in "next" to add more options, you can just add more, having a --merge and maybe a way to tell it to eval the rev-parse into a given variable seem like sensible additions. > BTW, sorry for the late reply, will send patch later. My main point was that looking at this I think it's very much over the complexity v.s. benefit line on the "complexity" side. Even if there wasn't a --no-tag just using "test_commit" with a "git tag -d" and "commit_X=$(git rev-parse HEAD)" is less magical and more readable. I.e. the mostly copy/pasted from test-lib-functions.sh function is ~70 lines, the whole setup function is 50 lines. And as I noted with the whitespace getting lost in the munging the end result is actually less reliable than just doing a test_cmp with $(git rev-parse ...) instead of <COMMIT-XYZ>. If you were trying to avoid the whitespace warnings then see the "'s/Z$//'" pattern in t0000-basic.sh for how we've usually tested that, i.e. had a "Z" at the end mark intentional whitespace for test_cmp-alike. There's a big value in the test suite being mostly consistent (which it somewhat isn't, but we're hopefully getting there). I.e. the goal isn't to optimize each test file to be as small as possible, but to e.g. have the next person maintaining it not wondering where <COMMIT-P> comes from, understanding some test_commit-alike that eval's variables into existence, how it's subtly different (if at all) from test_commit etc.
Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年6月1日周二 下午8:04写道: > > > On Tue, Jun 01 2021, Jiang Xin wrote: > > > Ævar Arnfjörð Bjarmason <avarab@gmail.com> 于2021年5月27日周四 下午8:49写道: > >> > >> But no, the issue as it turns out is not Perl v.s. Sed, it's that > >> there's some bug in the shellscript / tooling version (happens with both > >> dash 0.5.7-4 and bash 4.3-11+deb8u2 on that box) where those expansions > >> like ${A%${A#??????0?}} resolve to nothing. > > > > That's the root cause. It can be reproduced by running the following > > test script: > > > > ``` > > #!/bin/sh > > # test script: test.sh > > > > test_commit_setvar () { > > var=$1 && > > oid=1234567890123456789012345678901234567890 && > > eval $var=$oid > > } > > > > test_commit_setvar A > > echo "A: $A" > > echo "Abbrev of A: ${A%${A#???????}}" > > ``` > > > > By running different version of dash, we can see that dash 0.5.7 fail the test: > > > > ``` > > $ /opt/dash/0.5.11/bin/dash test.sh > > A: 1234567890123456789012345678901234567890 > > Abbrev of A: 1234567 > > > > $ /opt/dash/0.5.7/bin/dash test.sh > > A: 1234567890123456789012345678901234567890 > > Abbrev of A: > > ``` > > > > This issue can be fixed using the following example: > > > > ``` > > #!/bin/sh > > > > test_commit_setvar () { > > var=$1 && > > oid=1234567890123456789012345678901234567890 && > > suffix=${oid#???????} && > > oid=${oid%$suffix} && > > eval $var=$oid > > } > > > > test_commit_setvar A > > echo "Abbrev of A: $A" > > ``` > > *nod* > > >> Anyway, looking at this whole test file with fresh eyes this pattern > >> seems very strange. You duplicated most of test_commit with this > >> test_commit_setvar. It's a bit more verbosity but why not just use: > >> > >> test_commit ... > >> A=$(git rev-parse HEAD) > > > > The function "test_commit()" in "test-lib-function.sh" always creates > > tags and it cannot make merge commit. So I rewrite a new function > > which reuse the scaffold of "test_commit". > > It's had a --no-tag since 3803a3a099 (t: add --no-tag option to > test_commit, 2021-02-09). I also have patches in "next" to add more > options, you can just add more, having a --merge and maybe a way to tell > it to eval the rev-parse into a given variable seem like sensible > additions. > > > BTW, sorry for the late reply, will send patch later. > > My main point was that looking at this I think it's very much over the > complexity v.s. benefit line on the "complexity" side. > > Even if there wasn't a --no-tag just using "test_commit" with a "git tag > -d" and "commit_X=$(git rev-parse HEAD)" is less magical and more > readable. > > I.e. the mostly copy/pasted from test-lib-functions.sh function is ~70 > lines, the whole setup function is 50 lines. > > And as I noted with the whitespace getting lost in the munging the end > result is actually less reliable than just doing a test_cmp with $(git > rev-parse ...) instead of <COMMIT-XYZ>. > > If you were trying to avoid the whitespace warnings then see the > "'s/Z$//'" pattern in t0000-basic.sh for how we've usually tested that, > i.e. had a "Z" at the end mark intentional whitespace for > test_cmp-alike. > > There's a big value in the test suite being mostly consistent (which it > somewhat isn't, but we're hopefully getting there). I.e. the goal isn't > to optimize each test file to be as small as possible, but to e.g. have > the next person maintaining it not wondering where <COMMIT-P> comes > from, understanding some test_commit-alike that eval's variables into > existence, how it's subtly different (if at all) from test_commit etc. Will send a patch for quick fix for t6020 which is broken on older version of bash. After changes on "test_commit()" of "test-lib-function.sh" has been merge to master branch, I will try to refactor t6020 again to remove `test_commit_setvar()` and reuse `test_commit()`.
diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh index 2013051a64..1e398380eb 100755 --- a/t/t5510-fetch.sh +++ b/t/t5510-fetch.sh @@ -6,22 +6,10 @@ test_description='Per branch config variables affects "git fetch". ' . ./test-lib.sh +. "$TEST_DIRECTORY"/test-bundle-functions.sh D=$(pwd) -test_bundle_object_count () { - git verify-pack -v "$1" >verify.out && - test "$2" = $(grep "^$OID_REGEX " verify.out | wc -l) -} - -convert_bundle_to_pack () { - while read x && test -n "$x" - do - :; - done - cat -} - test_expect_success setup ' echo >file original && git add file && @@ -312,9 +300,7 @@ test_expect_success 'unbundle 1' ' test_expect_success 'bundle 1 has only 3 files ' ' cd "$D" && - convert_bundle_to_pack <bundle1 >bundle.pack && - git index-pack bundle.pack && - test_bundle_object_count bundle.pack 3 + test_bundle_object_count bundle1 3 ' test_expect_success 'unbundle 2' ' @@ -329,9 +315,7 @@ test_expect_success 'bundle does not prerequisite objects' ' git add file2 && git commit -m add.file2 file2 && git bundle create bundle3 -1 HEAD && - convert_bundle_to_pack <bundle3 >bundle.pack && - git index-pack bundle.pack && - test_bundle_object_count bundle.pack 3 + test_bundle_object_count bundle3 3 ' test_expect_success 'bundle should be able to create a full history' ' @@ -884,9 +868,7 @@ test_expect_success 'all boundary commits are excluded' ' git merge otherside && ad=$(git log --no-walk --format=%ad HEAD) && git bundle create twoside-boundary.bdl main --since="$ad" && - convert_bundle_to_pack <twoside-boundary.bdl >twoside-boundary.pack && - pack=$(git index-pack --fix-thin --stdin <twoside-boundary.pack) && - test_bundle_object_count .git/objects/pack/pack-${pack##pack }.pack 3 + test_bundle_object_count --thin twoside-boundary.bdl 3 ' test_expect_success 'fetch --prune prints the remotes url' ' diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh new file mode 100755 index 0000000000..201f34b5c3 --- /dev/null +++ b/t/t6020-bundle-misc.sh @@ -0,0 +1,394 @@ +#!/bin/sh +# +# Copyright (c) 2021 Jiang Xin +# + +test_description='Test git-bundle' + +GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main +export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME + +. ./test-lib.sh +. "$TEST_DIRECTORY"/test-bundle-functions.sh + +# Create a commit or tag and set the variable with the object ID. +test_commit_setvar () { + notick= + signoff= + indir= + merge= + tag= + var= + + while test $# != 0 + do + case "$1" in + --merge) + merge=t + ;; + --tag) + tag=t + ;; + --notick) + notick=t + ;; + --signoff) + signoff="$1" + ;; + -C) + shift + indir="$1" + ;; + -*) + echo >&2 "error: unknown option $1" + return 1 + ;; + *) + break + ;; + esac + shift + done + if test $# -lt 2 + then + echo >&2 "error: test_commit_setvar must have at least 2 arguments" + return 1 + fi + var=$1 + shift + indir=${indir:+"$indir"/} + if test -z "$notick" + then + test_tick + fi && + if test -n "$merge" + then + git ${indir:+ -C "$indir"} merge --no-edit --no-ff \ + ${2:+-m "$2"} "$1" && + oid=$(git ${indir:+ -C "$indir"} rev-parse HEAD) + elif test -n "$tag" + then + git ${indir:+ -C "$indir"} tag -m "$1" "$1" "${2:-HEAD}" && + oid=$(git ${indir:+ -C "$indir"} rev-parse "$1") + else + file=${2:-"$1.t"} && + echo "${3-$1}" >"$indir$file" && + git ${indir:+ -C "$indir"} add "$file" && + git ${indir:+ -C "$indir"} commit $signoff -m "$1" && + oid=$(git ${indir:+ -C "$indir"} rev-parse HEAD) + fi && + eval $var=$oid +} + +# Format the output of git commands to make a user-friendly and stable +# text. We can easily prepare the expect text without having to worry +# about future changes of the commit ID and spaces of the output. +make_user_friendly_and_stable_output () { + sed \ + -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \ + -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \ + -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \ + -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \ + -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \ + -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \ + -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \ + -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \ + -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \ + -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \ + -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \ + -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \ + -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \ + -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \ + -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \ + -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \ + -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \ + -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \ + -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \ + -e "s/ *\$//" +} + +# (C) (D, pull/1/head, topic/1) +# o --- o +# / \ (L) +# / \ o (H, topic/2) (M, tag:v2) +# / (F) \ / (N, tag:v3) +# / o --------- o (G, pull/2/head) o --- o --- o (release) +# / / \ \ / \ +# o --- o --- o -------- o -- o ------------------ o ------- o --- o (main) +# (A) (B) (E, tag:v1) (I) (J) (K) (O) (P) +# +test_expect_success 'setup' ' + # Try to make a stable fixed width for abbreviated commit ID, + # this fixed-width oid will be replaced with "<OID>". + git config core.abbrev 7 && + + # branch main: commit A & B + test_commit_setvar A "Commit A" main.txt && + test_commit_setvar B "Commit B" main.txt && + + # branch topic/1: commit C & D, refs/pull/1/head + git checkout -b topic/1 && + test_commit_setvar C "Commit C" topic-1.txt && + test_commit_setvar D "Commit D" topic-1.txt && + git update-ref refs/pull/1/head HEAD && + + # branch topic/1: commit E, tag v1 + git checkout main && + test_commit_setvar E "Commit E" main.txt && + test_commit_setvar --tag TAG1 v1 && + + # branch topic/2: commit F & G, refs/pull/2/head + git checkout -b topic/2 && + test_commit_setvar F "Commit F" topic-2.txt && + test_commit_setvar G "Commit G" topic-2.txt && + git update-ref refs/pull/2/head HEAD && + test_commit_setvar H "Commit H" topic-2.txt && + + # branch main: merge commit I & J + git checkout main && + test_commit_setvar --merge I topic/1 "Merge commit I" && + test_commit_setvar --merge J refs/pull/2/head "Merge commit J" && + + # branch main: commit K + git checkout main && + test_commit_setvar K "Commit K" main.txt && + + # branch release: + git checkout -b release && + test_commit_setvar L "Commit L" release.txt && + test_commit_setvar M "Commit M" release.txt && + test_commit_setvar --tag TAG2 v2 && + test_commit_setvar N "Commit N" release.txt && + test_commit_setvar --tag TAG3 v3 && + + # branch main: merge commit O, commit P + git checkout main && + test_commit_setvar --merge O tags/v2 "Merge commit O" && + test_commit_setvar P "Commit P" main.txt +' + +test_expect_failure 'create bundle from special rev: main^!' ' + git bundle create special-rev.bdl "main^!" && + + git bundle list-heads special-rev.bdl | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + <COMMIT-P> refs/heads/main + EOF + test_i18ncmp expect actual && + + git bundle verify special-rev.bdl | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + The bundle contains this ref: + <COMMIT-P> refs/heads/main + The bundle requires this ref: + <COMMIT-O> + EOF + test_i18ncmp expect actual && + + test_bundle_object_count special-rev.bdl 3 +' + +test_expect_success 'create bundle with --max-count option' ' + git bundle create max-count.bdl --max-count 1 \ + main \ + "^release" \ + refs/tags/v1 \ + refs/pull/1/head \ + refs/pull/2/head && + + git bundle verify max-count.bdl | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + The bundle contains these 2 refs: + <COMMIT-P> refs/heads/main + <TAG-1> refs/tags/v1 + The bundle requires this ref: + <COMMIT-O> + EOF + test_i18ncmp expect actual && + + test_bundle_object_count max-count.bdl 4 +' + +test_expect_success 'create bundle with --since option' ' + git log -1 --pretty="%ad" $M >actual && + cat >expect <<-\EOF && + Thu Apr 7 15:26:13 2005 -0700 + EOF + test_cmp expect actual && + + git bundle create since.bdl \ + --since "Thu Apr 7 15:27:00 2005 -0700" \ + --all && + + git bundle verify since.bdl | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + The bundle contains these 5 refs: + <COMMIT-P> refs/heads/main + <COMMIT-N> refs/heads/release + <TAG-2> refs/tags/v2 + <TAG-3> refs/tags/v3 + <COMMIT-P> HEAD + The bundle requires these 2 refs: + <COMMIT-M> + <COMMIT-K> + EOF + test_i18ncmp expect actual && + + test_bundle_object_count --thin since.bdl 13 +' + +test_expect_success 'create bundle 1 - no prerequisites' ' + git bundle create 1.bdl topic/1 topic/2 && + + cat >expect <<-\EOF && + The bundle contains these 2 refs: + <COMMIT-D> refs/heads/topic/1 + <COMMIT-H> refs/heads/topic/2 + The bundle records a complete history. + EOF + + # verify bundle, which has no prerequisites + git bundle verify 1.bdl | + make_user_friendly_and_stable_output >actual && + test_i18ncmp expect actual && + + test_bundle_object_count 1.bdl 24 +' + +test_expect_success 'create bundle 2 - has prerequisites' ' + git bundle create 2.bdl \ + --ignore-missing \ + ^topic/deleted \ + ^$D \ + ^topic/2 \ + release && + + cat >expect <<-\EOF && + The bundle contains this ref: + <COMMIT-N> refs/heads/release + The bundle requires these 3 refs: + <COMMIT-D> + <COMMIT-E> + <COMMIT-G> + EOF + + git bundle verify 2.bdl | + make_user_friendly_and_stable_output >actual && + test_i18ncmp expect actual && + + test_bundle_object_count 2.bdl 16 +' + +test_expect_success 'fail to verify bundle without prerequisites' ' + git init --bare test1.git && + + cat >expect <<-\EOF && + error: Repository lacks these prerequisite commits: + error: <COMMIT-D> + error: <COMMIT-E> + error: <COMMIT-G> + EOF + + test_must_fail git -C test1.git bundle verify ../2.bdl 2>&1 | + make_user_friendly_and_stable_output >actual && + test_i18ncmp expect actual +' + +test_expect_success 'create bundle 3 - two refs, same object' ' + git bundle create --version=3 3.bdl \ + ^release \ + ^topic/1 \ + ^topic/2 \ + main \ + HEAD && + + cat >expect <<-\EOF && + The bundle contains these 2 refs: + <COMMIT-P> refs/heads/main + <COMMIT-P> HEAD + The bundle requires these 2 refs: + <COMMIT-M> + <COMMIT-K> + EOF + + git bundle verify 3.bdl | + make_user_friendly_and_stable_output >actual && + test_i18ncmp expect actual && + + test_bundle_object_count 3.bdl 4 +' + +test_expect_success 'create bundle 4 - with tags' ' + git bundle create 4.bdl \ + ^main \ + ^release \ + ^topic/1 \ + ^topic/2 \ + --all && + + cat >expect <<-\EOF && + The bundle contains these 3 refs: + <TAG-1> refs/tags/v1 + <TAG-2> refs/tags/v2 + <TAG-3> refs/tags/v3 + The bundle records a complete history. + EOF + + git bundle verify 4.bdl | + make_user_friendly_and_stable_output >actual && + test_i18ncmp expect actual && + + test_bundle_object_count 4.bdl 3 +' + +test_expect_success 'clone from bundle' ' + git clone --mirror 1.bdl mirror.git && + git -C mirror.git show-ref | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + <COMMIT-D> refs/heads/topic/1 + <COMMIT-H> refs/heads/topic/2 + EOF + test_cmp expect actual && + + git -C mirror.git fetch ../2.bdl "+refs/*:refs/*" && + git -C mirror.git show-ref | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + <COMMIT-N> refs/heads/release + <COMMIT-D> refs/heads/topic/1 + <COMMIT-H> refs/heads/topic/2 + EOF + test_cmp expect actual && + + git -C mirror.git fetch ../3.bdl "+refs/*:refs/*" && + git -C mirror.git show-ref | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + <COMMIT-P> refs/heads/main + <COMMIT-N> refs/heads/release + <COMMIT-D> refs/heads/topic/1 + <COMMIT-H> refs/heads/topic/2 + EOF + test_cmp expect actual && + + git -C mirror.git fetch ../4.bdl "+refs/*:refs/*" && + git -C mirror.git show-ref | + make_user_friendly_and_stable_output >actual && + cat >expect <<-\EOF && + <COMMIT-P> refs/heads/main + <COMMIT-N> refs/heads/release + <COMMIT-D> refs/heads/topic/1 + <COMMIT-H> refs/heads/topic/2 + <TAG-1> refs/tags/v1 + <TAG-2> refs/tags/v2 + <TAG-3> refs/tags/v3 + EOF + test_cmp expect actual +' + +test_done diff --git a/t/test-bundle-functions.sh b/t/test-bundle-functions.sh new file mode 100644 index 0000000000..cf7ed818b2 --- /dev/null +++ b/t/test-bundle-functions.sh @@ -0,0 +1,42 @@ +# Library of git-bundle related functions. + +# Display the pack data contained in the bundle file, bypassing the +# header that contains the signature, prerequisites and references. +convert_bundle_to_pack () { + while read x && test -n "$x" + do + :; + done + cat +} + +# Check count of objects in a bundle file. +# We can use "--thin" opiton to check thin pack, which must be fixed by +# command `git-index-pack --fix-thin --stdin`. +test_bundle_object_count () { + thin= + if test "$1" = "--thin" + then + thin=t + shift + fi + if test $# -ne 2 + then + echo >&2 "args should be: <bundle> <count>" + return 1 + fi + bundle=$1 + pack=$bundle.pack + convert_bundle_to_pack <"$bundle" >"$pack" && + if test -n "$thin" + then + mv "$pack" "$bundle.thin.pack" && + git index-pack --stdin --fix-thin "$pack" <"$bundle.thin.pack" + else + git index-pack "$pack" + fi || return 1 + count=$(git show-index <"${pack%pack}idx" | wc -l) && + test $2 = $count && return 0 + echo >&2 "error: object count for $bundle is $count, not $2" + return 1 +}