diff mbox series

[v2,26/26] t/perf: add performance tests for multi-pack reuse

Message ID 94e5ae4cf6e0f53d4141fc486f32d73d168cf993.1702592604.git.me@ttaylorr.com (mailing list archive)
State Accepted
Commit ba47d88795e12193e7b0fffc5130757a5517a5da
Headers show
Series pack-objects: multi-pack verbatim reuse | expand

Commit Message

Taylor Blau Dec. 14, 2023, 10:24 p.m. UTC
To ensure that we don't regress either the size or runtime performance
of multi-pack reuse, add a performance test to measure both of these.

The test partitions the objects in GIT_TEST_PERF_LARGE_REPO into 1, 10,
and 100 packs, and then tries to perform a "clone" at each stage with
both single- and multi-pack reuse enabled.

Note that the `repack_into_n_chunks()` function in this new test script
differs from the existing `repack_into_n()`. The former partitions the
repository into N equal-sized chunks, while the latter produces N packs
of five commits each (plus their objects), and then another pack with
the remainder.

On git.git, I can produce the following results on my machine:

    Test                                                            this tree
    --------------------------------------------------------------------------------
    5332.3: clone for 1-pack scenario (single-pack reuse)           1.57(2.99+0.15)
    5332.4: clone size for 1-pack scenario (single-pack reuse)               231.8M
    5332.5: clone for 1-pack scenario (multi-pack reuse)            1.79(2.96+0.21)
    5332.6: clone size for 1-pack scenario (multi-pack reuse)                231.7M
    5332.9: clone for 10-pack scenario (single-pack reuse)          3.89(16.75+0.35)
    5332.10: clone size for 10-pack scenario (single-pack reuse)             209.9M
    5332.11: clone for 10-pack scenario (multi-pack reuse)          1.56(2.99+0.17)
    5332.12: clone size for 10-pack scenario (multi-pack reuse)              224.4M
    5332.15: clone for 100-pack scenario (single-pack reuse)        8.24(54.31+0.59)
    5332.16: clone size for 100-pack scenario (single-pack reuse)            278.3M
    5332.17: clone for 100-pack scenario (multi-pack reuse)         2.13(2.44+0.33)
    5332.18: clone size for 100-pack scenario (multi-pack reuse)             357.9M

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 t/perf/p5332-multi-pack-reuse.sh | 81 ++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100755 t/perf/p5332-multi-pack-reuse.sh
diff mbox series

Patch

diff --git a/t/perf/p5332-multi-pack-reuse.sh b/t/perf/p5332-multi-pack-reuse.sh
new file mode 100755
index 0000000000..5c6c575d62
--- /dev/null
+++ b/t/perf/p5332-multi-pack-reuse.sh
@@ -0,0 +1,81 @@ 
+#!/bin/sh
+
+test_description='tests pack performance with multi-pack reuse'
+
+. ./perf-lib.sh
+. "${TEST_DIRECTORY}/perf/lib-pack.sh"
+
+packdir=.git/objects/pack
+
+test_perf_large_repo
+
+find_pack () {
+	for idx in $packdir/pack-*.idx
+	do
+		if git show-index <$idx | grep -q "$1"
+		then
+			basename $idx
+		fi || return 1
+	done
+}
+
+repack_into_n_chunks () {
+	git repack -adk &&
+
+	test "$1" -eq 1 && return ||
+
+	find $packdir -type f | sort >packs.before &&
+
+	# partition the repository into $1 chunks of consecutive commits, and
+	# then create $1 packs with the objects reachable from each chunk
+	# (excluding any objects reachable from the previous chunks)
+	sz="$(($(git rev-list --count --all) / $1))"
+	for rev in $(git rev-list --all | awk "NR % $sz == 0" | tac)
+	do
+		pack="$(echo "$rev" | git pack-objects --revs \
+			--honor-pack-keep --delta-base-offset $packdir/pack)" &&
+		touch $packdir/pack-$pack.keep || return 1
+	done
+
+	# grab any remaining objects not packed by the previous step(s)
+	git pack-objects --revs --all --honor-pack-keep --delta-base-offset \
+		$packdir/pack &&
+
+	find $packdir -type f | sort >packs.after &&
+
+	# and install the whole thing
+	for f in $(comm -12 packs.before packs.after)
+	do
+		rm -f "$f" || return 1
+	done
+	rm -fr $packdir/*.keep
+}
+
+for nr_packs in 1 10 100
+do
+	test_expect_success "create $nr_packs-pack scenario" '
+		repack_into_n_chunks $nr_packs
+	'
+
+	test_expect_success "setup bitmaps for $nr_packs-pack scenario" '
+		find $packdir -type f -name "*.idx" | sed -e "s/.*\/\(.*\)$/+\1/g" |
+		git multi-pack-index write --stdin-packs --bitmap \
+			--preferred-pack="$(find_pack $(git rev-parse HEAD))"
+	'
+
+	for reuse in single multi
+	do
+		test_perf "clone for $nr_packs-pack scenario ($reuse-pack reuse)" "
+			git for-each-ref --format='%(objectname)' refs/heads refs/tags >in &&
+			git -c pack.allowPackReuse=$reuse pack-objects \
+				--revs --delta-base-offset --use-bitmap-index \
+				--stdout <in >result
+		"
+
+		test_size "clone size for $nr_packs-pack scenario ($reuse-pack reuse)" '
+			wc -c <result
+		'
+	done
+done
+
+test_done