diff mbox series

[v4,4/4] unbundle: introduce option VERIFY_BUNDLE_FSCK_FOLLOW_FETCH

Message ID 68b9bca9f8b19897997c2adc9a278ac5052e75cd.1717057290.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series object checking related additions and fixes for bundles in fetches | expand

Commit Message

Xing Xin May 30, 2024, 8:21 a.m. UTC
From: Xing Xin <xingxin.xx@bytedance.com>

This commit introduces a new option `VERIFY_BUNDLE_FSCK_FOLLOW_FETCH` to
`verify_bundle_flags`. In `bundle.c:unbundle`, this new option controls
whether broken object checks should be enabled by invoking
`fetch-pack.c:fetch_pack_fsck_objects`. Note that the option
`VERIFY_BUNDLE_FSCK_ALWAYS` takes precedence over
`VERIFY_BUNDLE_FSCK_FOLLOW_FETCH`.

This flag is now used in the fetching process by:

- `transport.c:fetch_refs_from_bundle` for direct bundle fetches.
- `bundle-uri.c:unbundle_from_file` for bundle-uri enabled fetches.

This addition ensures a consistent logic for object verification during
fetch operations. Tests have been added to confirm functionality in the
scenarios mentioned above.

Signed-off-by: Xing Xin <xingxin.xx@bytedance.com>
---
 bundle-uri.c                |  2 +-
 bundle.c                    |  7 +++++++
 bundle.h                    |  1 +
 t/t5558-clone-bundle-uri.sh | 35 ++++++++++++++++++++++++++++++++++-
 t/t5607-clone-bundle.sh     | 33 +++++++++++++++++++++++++++++++++
 transport.c                 |  2 +-
 6 files changed, 77 insertions(+), 3 deletions(-)

Comments

Patrick Steinhardt June 6, 2024, 12:06 p.m. UTC | #1
On Thu, May 30, 2024 at 08:21:30AM +0000, Xing Xin via GitGitGadget wrote:
> From: Xing Xin <xingxin.xx@bytedance.com>

Same here, the important part is not that we introduce the flag, but
that we start using it in `unbundle_from_file()`.

> diff --git a/bundle-uri.c b/bundle-uri.c
> index 066ff788104..e7ebac6ce57 100644
> --- a/bundle-uri.c
> +++ b/bundle-uri.c
> @@ -373,7 +373,7 @@ static int unbundle_from_file(struct repository *r, const char *file)
>  	 * the prerequisite commits.
>  	 */
>  	if ((result = unbundle(r, &header, bundle_fd, NULL,
> -			       VERIFY_BUNDLE_QUIET | VERIFY_BUNDLE_FSCK_ALWAYS)))
> +			       VERIFY_BUNDLE_QUIET | VERIFY_BUNDLE_FSCK_FOLLOW_FETCH)))
>  		return 1;
>  
>  	/*

One thing that is a bit weird is that we first change `unbundle()` to
use `FSCK_ALWAYS` in a preceding patch, and then convert it to use
`FSCK_FOLLOW_FETCH` in the same series. It could be restructured a bit
to first introduce the flags, only, while not modifying any of the
callsites yet. Passing the respective flags would then be done in a
separate commit.

Patrick
Xing Xin June 11, 2024, 6:46 a.m. UTC | #2
At 2024-06-06 20:06:47, "Patrick Steinhardt" <ps@pks.im> wrote:
>On Thu, May 30, 2024 at 08:21:30AM +0000, Xing Xin via GitGitGadget wrote:
>> From: Xing Xin <xingxin.xx@bytedance.com>
>
>Same here, the important part is not that we introduce the flag, but
>that we start using it in `unbundle_from_file()`.
>
>> diff --git a/bundle-uri.c b/bundle-uri.c
>> index 066ff788104..e7ebac6ce57 100644
>> --- a/bundle-uri.c
>> +++ b/bundle-uri.c
>> @@ -373,7 +373,7 @@ static int unbundle_from_file(struct repository *r, const char *file)
>>  	 * the prerequisite commits.
>>  	 */
>>  	if ((result = unbundle(r, &header, bundle_fd, NULL,
>> -			       VERIFY_BUNDLE_QUIET | VERIFY_BUNDLE_FSCK_ALWAYS)))
>> +			       VERIFY_BUNDLE_QUIET | VERIFY_BUNDLE_FSCK_FOLLOW_FETCH)))
>>  		return 1;
>>  
>>  	/*
>
>One thing that is a bit weird is that we first change `unbundle()` to
>use `FSCK_ALWAYS` in a preceding patch, and then convert it to use
>`FSCK_FOLLOW_FETCH` in the same series. It could be restructured a bit
>to first introduce the flags, only, while not modifying any of the
>callsites yet. Passing the respective flags would then be done in a
>separate commit.

This makes sense to me, thanks!

Xing Xin
diff mbox series

Patch

diff --git a/bundle-uri.c b/bundle-uri.c
index 066ff788104..e7ebac6ce57 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -373,7 +373,7 @@  static int unbundle_from_file(struct repository *r, const char *file)
 	 * the prerequisite commits.
 	 */
 	if ((result = unbundle(r, &header, bundle_fd, NULL,
-			       VERIFY_BUNDLE_QUIET | VERIFY_BUNDLE_FSCK_ALWAYS)))
+			       VERIFY_BUNDLE_QUIET | VERIFY_BUNDLE_FSCK_FOLLOW_FETCH)))
 		return 1;
 
 	/*
diff --git a/bundle.c b/bundle.c
index 26574e74bdd..53ac73834ea 100644
--- a/bundle.c
+++ b/bundle.c
@@ -17,6 +17,7 @@ 
 #include "list-objects-filter-options.h"
 #include "connected.h"
 #include "write-or-die.h"
+#include "fetch-pack.h"
 
 static const char v2_bundle_signature[] = "# v2 git bundle\n";
 static const char v3_bundle_signature[] = "# v3 git bundle\n";
@@ -615,6 +616,7 @@  int unbundle(struct repository *r, struct bundle_header *header,
 	     enum verify_bundle_flags flags)
 {
 	struct child_process ip = CHILD_PROCESS_INIT;
+	int fsck_objects = 0;
 
 	if (verify_bundle(r, header, flags))
 		return -1;
@@ -626,6 +628,11 @@  int unbundle(struct repository *r, struct bundle_header *header,
 		strvec_push(&ip.args, "--promisor=from-bundle");
 
 	if (flags & VERIFY_BUNDLE_FSCK_ALWAYS)
+		fsck_objects = 1;
+	else if (flags & VERIFY_BUNDLE_FSCK_FOLLOW_FETCH)
+		fsck_objects = fetch_pack_fsck_objects();
+
+	if (fsck_objects)
 		strvec_push(&ip.args, "--fsck-objects");
 
 	if (extra_index_pack_args) {
diff --git a/bundle.h b/bundle.h
index cf23c8615d3..a39d8ea1a7e 100644
--- a/bundle.h
+++ b/bundle.h
@@ -34,6 +34,7 @@  enum verify_bundle_flags {
 	VERIFY_BUNDLE_VERBOSE = (1 << 0),
 	VERIFY_BUNDLE_QUIET = (1 << 1),
 	VERIFY_BUNDLE_FSCK_ALWAYS = (1 << 2),
+	VERIFY_BUNDLE_FSCK_FOLLOW_FETCH = (1 << 3),
 };
 
 int verify_bundle(struct repository *r, struct bundle_header *header,
diff --git a/t/t5558-clone-bundle-uri.sh b/t/t5558-clone-bundle-uri.sh
index 8f4f802e4f1..48be1b18802 100755
--- a/t/t5558-clone-bundle-uri.sh
+++ b/t/t5558-clone-bundle-uri.sh
@@ -30,7 +30,21 @@  test_expect_success 'create bundle' '
 		git bundle create B.bundle topic &&
 
 		# Create a bundle with reference pointing to non-existent object.
-		sed "s/$(git rev-parse A)/$(git rev-parse B)/" <A.bundle >bad-header.bundle
+		sed "s/$(git rev-parse A)/$(git rev-parse B)/" <A.bundle >bad-header.bundle &&
+
+		cat >data <<-EOF &&
+		tree $(git rev-parse HEAD^{tree})
+		parent $(git rev-parse HEAD)
+		author A U Thor
+		committer A U Thor
+
+		commit: this is a commit with bad emails
+
+		EOF
+		git hash-object --literally -t commit -w --stdin <data >commit &&
+		git branch bad $(cat commit) &&
+		git bundle create bad-object.bundle bad &&
+		git update-ref -d refs/heads/bad
 	)
 '
 
@@ -52,6 +66,25 @@  test_expect_success 'clone with bundle that has bad header' '
 	! grep "refs/bundles/" refs
 '
 
+test_expect_success 'clone with bundle that has bad object' '
+	# Unbundle succeeds if no fsckObjects confugured.
+	git clone --bundle-uri="clone-from/bad-object.bundle" \
+		clone-from clone-bad-object-no-fsck &&
+	git -C clone-bad-object-no-fsck for-each-ref --format="%(refname)" >refs &&
+	grep "refs/bundles/" refs >actual &&
+	cat >expect <<-\EOF &&
+	refs/bundles/bad
+	EOF
+	test_cmp expect actual &&
+
+	# Unbundle fails with fsckObjects set true, but clone can still proceed.
+	git -c fetch.fsckObjects=true clone --bundle-uri="clone-from/bad-object.bundle" \
+		clone-from clone-bad-object-fsck 2>err &&
+	test_grep "missingEmail" err &&
+	git -C clone-bad-object-fsck for-each-ref --format="%(refname)" >refs &&
+	! grep "refs/bundles/" refs
+'
+
 test_expect_success 'clone with path bundle and non-default hash' '
 	test_when_finished "rm -rf clone-path-non-default-hash" &&
 	GIT_DEFAULT_HASH=sha256 git clone --bundle-uri="clone-from/B.bundle" \
diff --git a/t/t5607-clone-bundle.sh b/t/t5607-clone-bundle.sh
index 0d1e92d9963..5182efc0b45 100755
--- a/t/t5607-clone-bundle.sh
+++ b/t/t5607-clone-bundle.sh
@@ -138,6 +138,39 @@  test_expect_success 'fetch SHA-1 from bundle' '
 	git fetch --no-tags foo/tip.bundle "$(cat hash)"
 '
 
+test_expect_success 'clone bundle with different fsckObjects configurations' '
+	test_create_repo bundle-fsck &&
+	(
+		cd bundle-fsck &&
+		test_commit first &&
+		cat >data <<-EOF &&
+		tree $(git rev-parse HEAD^{tree})
+		parent $(git rev-parse HEAD)
+		author A U Thor
+		committer A U Thor
+
+		commit: this is a commit with bad emails
+
+		EOF
+		git hash-object --literally -t commit -w --stdin <data >commit &&
+		git branch bad $(cat commit) &&
+		git bundle create bad.bundle bad
+	) &&
+
+	git clone bundle-fsck/bad.bundle bundle-no-fsck &&
+
+	git -c fetch.fsckObjects=false -c transfer.fsckObjects=true \
+		clone bundle-fsck/bad.bundle bundle-fetch-no-fsck &&
+
+	test_must_fail git -c fetch.fsckObjects=true \
+		clone bundle-fsck/bad.bundle bundle-fetch-fsck 2>err &&
+	test_grep "missingEmail" err &&
+
+	test_must_fail git -c transfer.fsckObjects=true \
+		clone bundle-fsck/bad.bundle bundle-transfer-fsck 2>err &&
+	test_grep "missingEmail" err
+'
+
 test_expect_success 'git bundle uses expected default format' '
 	git bundle create bundle HEAD^.. &&
 	cat >expect <<-EOF &&
diff --git a/transport.c b/transport.c
index 1b3d61ffcec..6cd5683bb45 100644
--- a/transport.c
+++ b/transport.c
@@ -184,7 +184,7 @@  static int fetch_refs_from_bundle(struct transport *transport,
 	if (!data->get_refs_from_bundle_called)
 		get_refs_from_bundle_inner(transport);
 	ret = unbundle(the_repository, &data->header, data->fd,
-		       &extra_index_pack_args, VERIFY_BUNDLE_FSCK_ALWAYS);
+		       &extra_index_pack_args, VERIFY_BUNDLE_FSCK_FOLLOW_FETCH);
 	transport->hash_algo = data->header.hash_algo;
 	return ret;
 }