diff mbox series

[v2] builtin/repack.c: invalidate MIDX only when necessary

Message ID 87a3b7a5a2f091e2a23c163a7d86effbbbedfa3a.1598371475.git.me@ttaylorr.com (mailing list archive)
State Accepted
Commit e08f7bb09369d739ef03e1ac0180140a68e70982
Headers show
Series [v2] builtin/repack.c: invalidate MIDX only when necessary | expand

Commit Message

Taylor Blau Aug. 25, 2020, 4:04 p.m. UTC
In 525e18c04b (midx: clear midx on repack, 2018-07-12), 'git repack'
learned to remove a multi-pack-index file if it added or removed a pack
from the object store.

This mechanism is a little over-eager, since it is only necessary to
drop a MIDX if 'git repack' removes a pack that the MIDX references.
Adding a pack outside of the MIDX does not require invalidating the
MIDX, and likewise for removing a pack the MIDX does not know about.

Teach 'git repack' to check for this by loading the MIDX, and checking
whether the to-be-removed pack is known to the MIDX. This requires a
slightly odd alternation to a test in t5319, which is explained with a
comment. A new test is added to show that the MIDX is left alone when
both packs known to it are marked as .keep, but two packs unknown to it
are removed and combined into one new pack.

Helped-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Range-diff against v1:
1:  ef9186a8df ! 1:  87a3b7a5a2 builtin/repack.c: invalidate MIDX only when necessary
    @@ Commit message
         Teach 'git repack' to check for this by loading the MIDX, and checking
         whether the to-be-removed pack is known to the MIDX. This requires a
         slightly odd alternation to a test in t5319, which is explained with a
    -    comment.
    +    comment. A new test is added to show that the MIDX is left alone when
    +    both packs known to it are marked as .keep, but two packs unknown to it
    +    are removed and combined into one new pack.
     
    +    Helped-by: Derrick Stolee <dstolee@microsoft.com>
         Signed-off-by: Taylor Blau <me@ttaylorr.com>
     
      ## builtin/repack.c ##
    @@ t/t5319-multi-pack-index.sh: test_expect_success 'repack with the --no-progress
      	test_path_is_missing $objdir/pack/multi-pack-index
      '
      
    -+test_expect_success 'repack preserves multi-pack-index when deleting unknown packs' '
    -+	git multi-pack-index write &&
    -+	cp $objdir/pack/multi-pack-index $objdir/pack/multi-pack-index.bak &&
    -+	test_when_finished "rm -f $objdir/pack/multi-pack-index.bak" &&
    -+
    -+	# Write a new pack that is unknown to the multi-pack-index.
    -+	git hash-object -w </dev/null >blob &&
    -+	git pack-objects $objdir/pack/pack <blob &&
    -+
    -+	GIT_TEST_MULTI_PACK_INDEX=0 git -c core.multiPackIndex repack -d &&
    -+	test_cmp_bin $objdir/pack/multi-pack-index \
    -+		$objdir/pack/multi-pack-index.bak
    ++test_expect_success 'repack preserves multi-pack-index when creating packs' '
    ++	git init preserve &&
    ++	test_when_finished "rm -fr preserve" &&
    ++	(
    ++		cd preserve &&
    ++		packdir=.git/objects/pack &&
    ++		midx=$packdir/multi-pack-index &&
    ++
    ++		test_commit 1 &&
    ++		pack1=$(git pack-objects --all $packdir/pack) &&
    ++		touch $packdir/pack-$pack1.keep &&
    ++		test_commit 2 &&
    ++		pack2=$(git pack-objects --revs $packdir/pack) &&
    ++		touch $packdir/pack-$pack2.keep &&
    ++
    ++		git multi-pack-index write &&
    ++		cp $midx $midx.bak &&
    ++
    ++		cat >pack-input <<-EOF &&
    ++		HEAD
    ++		^HEAD~1
    ++		EOF
    ++		test_commit 3 &&
    ++		pack3=$(git pack-objects --revs $packdir/pack <pack-input) &&
    ++		test_commit 4 &&
    ++		pack4=$(git pack-objects --revs $packdir/pack <pack-input) &&
    ++
    ++		GIT_TEST_MULTI_PACK_INDEX=0 git -c core.multiPackIndex repack -ad &&
    ++		ls -la $packdir &&
    ++		test_path_is_file $packdir/pack-$pack1.pack &&
    ++		test_path_is_file $packdir/pack-$pack2.pack &&
    ++		test_path_is_missing $packdir/pack-$pack3.pack &&
    ++		test_path_is_missing $packdir/pack-$pack4.pack &&
    ++		test_cmp_bin $midx.bak $midx
    ++	)
     +'
     +
      compare_results_with_midx "after repack"

 builtin/repack.c            | 12 +++++-----
 t/t5319-multi-pack-index.sh | 44 +++++++++++++++++++++++++++++++++++--
 2 files changed, 47 insertions(+), 9 deletions(-)

Comments

Derrick Stolee Aug. 26, 2020, 8:51 p.m. UTC | #1
On 8/25/2020 12:04 PM, Taylor Blau wrote:
> In 525e18c04b (midx: clear midx on repack, 2018-07-12), 'git repack'
> learned to remove a multi-pack-index file if it added or removed a pack
> from the object store.
> 
> This mechanism is a little over-eager, since it is only necessary to
> drop a MIDX if 'git repack' removes a pack that the MIDX references.
> Adding a pack outside of the MIDX does not require invalidating the
> MIDX, and likewise for removing a pack the MIDX does not know about.
> 
> Teach 'git repack' to check for this by loading the MIDX, and checking
> whether the to-be-removed pack is known to the MIDX. This requires a
> slightly odd alternation to a test in t5319, which is explained with a
> comment. A new test is added to show that the MIDX is left alone when
> both packs known to it are marked as .keep, but two packs unknown to it
> are removed and combined into one new pack.
> 
> Helped-by: Derrick Stolee <dstolee@microsoft.com>
> Signed-off-by: Taylor Blau <me@ttaylorr.com>
> ---
> Range-diff against v1:

I know this thread went in a new direction about pack-redundant and
dashed-git, but this version looks good to me. I wanted to make sure
it wasn't lost in the shuffle.

Thanks,
-Stolee
Junio C Hamano Aug. 26, 2020, 8:54 p.m. UTC | #2
Derrick Stolee <stolee@gmail.com> writes:

> On 8/25/2020 12:04 PM, Taylor Blau wrote:
>> In 525e18c04b (midx: clear midx on repack, 2018-07-12), 'git repack'
>> learned to remove a multi-pack-index file if it added or removed a pack
>> from the object store.
>> 
>> This mechanism is a little over-eager, since it is only necessary to
>> drop a MIDX if 'git repack' removes a pack that the MIDX references.
>> Adding a pack outside of the MIDX does not require invalidating the
>> MIDX, and likewise for removing a pack the MIDX does not know about.
>> 
>> Teach 'git repack' to check for this by loading the MIDX, and checking
>> whether the to-be-removed pack is known to the MIDX. This requires a
>> slightly odd alternation to a test in t5319, which is explained with a
>> comment. A new test is added to show that the MIDX is left alone when
>> both packs known to it are marked as .keep, but two packs unknown to it
>> are removed and combined into one new pack.
>> 
>> Helped-by: Derrick Stolee <dstolee@microsoft.com>
>> Signed-off-by: Taylor Blau <me@ttaylorr.com>
>> ---
>> Range-diff against v1:
>
> I know this thread went in a new direction about pack-redundant and
> dashed-git, but this version looks good to me. I wanted to make sure
> it wasn't lost in the shuffle.

Very much appreciated.  I was wondering if I'll see a final reroll
or v2 already had all we wanted.

Thanks.
diff mbox series

Patch

diff --git a/builtin/repack.c b/builtin/repack.c
index 04c5ceaf7e..98fac03946 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -133,7 +133,11 @@  static void get_non_kept_pack_filenames(struct string_list *fname_list,
 static void remove_redundant_pack(const char *dir_name, const char *base_name)
 {
 	struct strbuf buf = STRBUF_INIT;
-	strbuf_addf(&buf, "%s/%s.pack", dir_name, base_name);
+	struct multi_pack_index *m = get_multi_pack_index(the_repository);
+	strbuf_addf(&buf, "%s.pack", base_name);
+	if (m && midx_contains_pack(m, buf.buf))
+		clear_midx_file(the_repository);
+	strbuf_insertf(&buf, 0, "%s/", dir_name);
 	unlink_pack_path(buf.buf, 1);
 	strbuf_release(&buf);
 }
@@ -286,7 +290,6 @@  int cmd_repack(int argc, const char **argv, const char *prefix)
 	int keep_unreachable = 0;
 	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
 	int no_update_server_info = 0;
-	int midx_cleared = 0;
 	struct pack_objects_args po_args = {NULL};
 
 	struct option builtin_repack_options[] = {
@@ -439,11 +442,6 @@  int cmd_repack(int argc, const char **argv, const char *prefix)
 		for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
 			char *fname, *fname_old;
 
-			if (!midx_cleared) {
-				clear_midx_file(the_repository);
-				midx_cleared = 1;
-			}
-
 			fname = mkpathdup("%s/pack-%s%s", packdir,
 						item->string, exts[ext].name);
 			if (!file_exists(fname)) {
diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh
index 43b1b5b2af..f340b376bc 100755
--- a/t/t5319-multi-pack-index.sh
+++ b/t/t5319-multi-pack-index.sh
@@ -382,12 +382,52 @@  test_expect_success 'repack with the --no-progress option' '
 	test_line_count = 0 err
 '
 
-test_expect_success 'repack removes multi-pack-index' '
+test_expect_success 'repack removes multi-pack-index when deleting packs' '
 	test_path_is_file $objdir/pack/multi-pack-index &&
-	GIT_TEST_MULTI_PACK_INDEX=0 git repack -adf &&
+	# Set GIT_TEST_MULTI_PACK_INDEX to 0 to avoid writing a new
+	# multi-pack-index after repacking, but set "core.multiPackIndex" to
+	# true so that "git repack" can read the existing MIDX.
+	GIT_TEST_MULTI_PACK_INDEX=0 git -c core.multiPackIndex repack -adf &&
 	test_path_is_missing $objdir/pack/multi-pack-index
 '
 
+test_expect_success 'repack preserves multi-pack-index when creating packs' '
+	git init preserve &&
+	test_when_finished "rm -fr preserve" &&
+	(
+		cd preserve &&
+		packdir=.git/objects/pack &&
+		midx=$packdir/multi-pack-index &&
+
+		test_commit 1 &&
+		pack1=$(git pack-objects --all $packdir/pack) &&
+		touch $packdir/pack-$pack1.keep &&
+		test_commit 2 &&
+		pack2=$(git pack-objects --revs $packdir/pack) &&
+		touch $packdir/pack-$pack2.keep &&
+
+		git multi-pack-index write &&
+		cp $midx $midx.bak &&
+
+		cat >pack-input <<-EOF &&
+		HEAD
+		^HEAD~1
+		EOF
+		test_commit 3 &&
+		pack3=$(git pack-objects --revs $packdir/pack <pack-input) &&
+		test_commit 4 &&
+		pack4=$(git pack-objects --revs $packdir/pack <pack-input) &&
+
+		GIT_TEST_MULTI_PACK_INDEX=0 git -c core.multiPackIndex repack -ad &&
+		ls -la $packdir &&
+		test_path_is_file $packdir/pack-$pack1.pack &&
+		test_path_is_file $packdir/pack-$pack2.pack &&
+		test_path_is_missing $packdir/pack-$pack3.pack &&
+		test_path_is_missing $packdir/pack-$pack4.pack &&
+		test_cmp_bin $midx.bak $midx
+	)
+'
+
 compare_results_with_midx "after repack"
 
 test_expect_success 'multi-pack-index and pack-bitmap' '