diff mbox series

[2/2] apply: do not use the_repository

Message ID 36b44eb4c18cfd805ccecd8df695b0d5ee9c409f.1716877921.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Remove some usages of the_repository | expand

Commit Message

Philip Peterson May 28, 2024, 6:32 a.m. UTC
From: Philip Peterson <philip.c.peterson@gmail.com>

Because usage of the global the_repository is deprecated, remove
the usage of it in favor of a passed arg representing the repository.

Signed-off-by: Philip Peterson <philip.c.peterson@gmail.com>
---
 apply.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

Comments

Patrick Steinhardt May 28, 2024, 7:28 a.m. UTC | #1
On Tue, May 28, 2024 at 06:32:01AM +0000, Philip Peterson via GitGitGadget wrote:
> diff --git a/apply.c b/apply.c
> index 901b67e6255..364c05fbd06 100644
> --- a/apply.c
> +++ b/apply.c
> @@ -3218,13 +3218,13 @@ static int apply_binary(struct apply_state *state,
>  		return 0; /* deletion patch */
>  	}
>  
> -	if (has_object(the_repository, &oid, 0)) {
> +	if (has_object(state->repo, &oid, 0)) {
>  		/* We already have the postimage */
>  		enum object_type type;
>  		unsigned long size;
>  		char *result;
>  
> -		result = repo_read_object_file(the_repository, &oid, &type,
> +		result = repo_read_object_file(state->repo, &oid, &type,
>  					       &size);
>  		if (!result)
>  			return error(_("the necessary postimage %s for "

We call `get_oid_hex()` in this function, which ultimately ends up
accessing `the_repository` via `the_hash_algo`. We should likely convert
those to `repo_get_oid()`.

There are also other accesses to `the_hash_algo` in this function, which
should be converted to use `state->repo->hash_algo`, as well.

> @@ -3539,9 +3539,9 @@ static int three_way_merge(struct apply_state *state,
>  
>  	/* resolve trivial cases first */
>  	if (oideq(base, ours))
> -		return resolve_to(image, theirs);
> +		return resolve_to(state->repo, image, theirs);
>  	else if (oideq(base, theirs) || oideq(ours, theirs))
> -		return resolve_to(image, ours);
> +		return resolve_to(state->repo, image, ours);
>  
>  	read_mmblob(&base_file, base);
>  	read_mmblob(&our_file, ours);

The calls to `read_mmblob()` also end up accessing `the_repository`.

Other than that the patches look good to me. Thanks for working on this!

Patrick
Junio C Hamano May 28, 2024, 4:33 p.m. UTC | #2
Patrick Steinhardt <ps@pks.im> writes:

> On Tue, May 28, 2024 at 06:32:01AM +0000, Philip Peterson via GitGitGadget wrote:
>> diff --git a/apply.c b/apply.c
>> index 901b67e6255..364c05fbd06 100644
>> --- a/apply.c
>> +++ b/apply.c
>> @@ -3218,13 +3218,13 @@ static int apply_binary(struct apply_state *state,
>>  		return 0; /* deletion patch */
>>  	}
>>  
>> -	if (has_object(the_repository, &oid, 0)) {
>> +	if (has_object(state->repo, &oid, 0)) {
>>  		/* We already have the postimage */
>>  		enum object_type type;
>>  		unsigned long size;
>>  		char *result;
>>  
>> -		result = repo_read_object_file(the_repository, &oid, &type,
>> +		result = repo_read_object_file(state->repo, &oid, &type,
>>  					       &size);
>>  		if (!result)
>>  			return error(_("the necessary postimage %s for "
>
> We call `get_oid_hex()` in this function, which ultimately ends up
> accessing `the_repository` via `the_hash_algo`. We should likely convert
> those to `repo_get_oid()`.
>
> There are also other accesses to `the_hash_algo` in this function, which
> should be converted to use `state->repo->hash_algo`, as well.

We as a more experienced developers should come up with a way to
help developers who are less familiar with the API set, so that they
can chip in this effort to wean ourselves off of globals.

Would a bug like the ones you pointed out be easily caught by say
running "GIT_TEST_DEFAULT_HASH=sha256 make test"?

By the way, for commands like "git apply" that can and does work
outside a repository, a change to use state->repo instead of
the_repository is only half a story.  Dealing with cases where there
is no repository is the other half.  I _think_ we have drawn a
reasonable line to punt and refuse binary patches and three-way
fallback outside a repository, but there may be use cases that
benefit from being able to do these things in a tarball extract.
Patrick Steinhardt May 28, 2024, 5:22 p.m. UTC | #3
On Tue, May 28, 2024 at 09:33:07AM -0700, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > On Tue, May 28, 2024 at 06:32:01AM +0000, Philip Peterson via GitGitGadget wrote:
> >> diff --git a/apply.c b/apply.c
> >> index 901b67e6255..364c05fbd06 100644
> >> --- a/apply.c
> >> +++ b/apply.c
> >> @@ -3218,13 +3218,13 @@ static int apply_binary(struct apply_state *state,
> >>  		return 0; /* deletion patch */
> >>  	}
> >>  
> >> -	if (has_object(the_repository, &oid, 0)) {
> >> +	if (has_object(state->repo, &oid, 0)) {
> >>  		/* We already have the postimage */
> >>  		enum object_type type;
> >>  		unsigned long size;
> >>  		char *result;
> >>  
> >> -		result = repo_read_object_file(the_repository, &oid, &type,
> >> +		result = repo_read_object_file(state->repo, &oid, &type,
> >>  					       &size);
> >>  		if (!result)
> >>  			return error(_("the necessary postimage %s for "
> >
> > We call `get_oid_hex()` in this function, which ultimately ends up
> > accessing `the_repository` via `the_hash_algo`. We should likely convert
> > those to `repo_get_oid()`.
> >
> > There are also other accesses to `the_hash_algo` in this function, which
> > should be converted to use `state->repo->hash_algo`, as well.
> 
> We as a more experienced developers should come up with a way to
> help developers who are less familiar with the API set, so that they
> can chip in this effort to wean ourselves off of globals.
> 
> Would a bug like the ones you pointed out be easily caught by say
> running "GIT_TEST_DEFAULT_HASH=sha256 make test"?

No, I don't think so.

I was also thinking about different approaches to this problem overall.
Ideally, I would want to catch this on the programmatic level so that we
do not even have to detect this at runtime. And I think this should be
feasible by introducing something similar to the USE_THE_INDEX_VARIABLE
macro, which we have recently removed.

If we had a USE_THE_REPOSITORY_VARIABLE macro that guards declarations
of:

  - `the_repository`

  - `the_hash_algo`

  - Functions that implicitly rely on either of the above.

Then you could prove that a given code unit does not rely on any of the
above anymore by not declaring that macro.

In fact, these patches prompted me to give this a go this morning. And
while the changes are of course trivial by themselves, I quickly
discovered that they are also pointless because we almost always rely on
either of the above.

The most important reason of this is "hash.h", where `struct object_id`
falls back to `the_hash_algo` in case its own hash algorithm wasn't set.
Ideally, this would never be the case. But a quick test shows that there
are many places where we do rely on the fallback value, mostly around
null OIDs. Fixing this would be a necessary prerequisite.

Another important benefit of the macro would be that we stop working
against a moving target. New files shouldn't declare it, and once a file
has been refactored and the corresponding macro was removed we would
know that we don't add new dependencies on either of the above by
accident.

Patrick
Junio C Hamano May 28, 2024, 5:37 p.m. UTC | #4
Patrick Steinhardt <ps@pks.im> writes:

> If we had a USE_THE_REPOSITORY_VARIABLE macro that guards declarations
> of:
>
>   - `the_repository`
>
>   - `the_hash_algo`
>
>   - Functions that implicitly rely on either of the above.
>
> Then you could prove that a given code unit does not rely on any of the
> above anymore by not declaring that macro.

;-)
diff mbox series

Patch

diff --git a/apply.c b/apply.c
index 901b67e6255..364c05fbd06 100644
--- a/apply.c
+++ b/apply.c
@@ -3218,13 +3218,13 @@  static int apply_binary(struct apply_state *state,
 		return 0; /* deletion patch */
 	}
 
-	if (has_object(the_repository, &oid, 0)) {
+	if (has_object(state->repo, &oid, 0)) {
 		/* We already have the postimage */
 		enum object_type type;
 		unsigned long size;
 		char *result;
 
-		result = repo_read_object_file(the_repository, &oid, &type,
+		result = repo_read_object_file(state->repo, &oid, &type,
 					       &size);
 		if (!result)
 			return error(_("the necessary postimage %s for "
@@ -3278,7 +3278,7 @@  static int apply_fragments(struct apply_state *state, struct image *img, struct
 	return 0;
 }
 
-static int read_blob_object(struct strbuf *buf, const struct object_id *oid, unsigned mode)
+static int read_blob_object(struct repository *r, struct strbuf *buf, const struct object_id *oid, unsigned mode)
 {
 	if (S_ISGITLINK(mode)) {
 		strbuf_grow(buf, 100);
@@ -3288,7 +3288,7 @@  static int read_blob_object(struct strbuf *buf, const struct object_id *oid, uns
 		unsigned long sz;
 		char *result;
 
-		result = repo_read_object_file(the_repository, oid, &type,
+		result = repo_read_object_file(r, oid, &type,
 					       &sz);
 		if (!result)
 			return -1;
@@ -3298,11 +3298,11 @@  static int read_blob_object(struct strbuf *buf, const struct object_id *oid, uns
 	return 0;
 }
 
-static int read_file_or_gitlink(const struct cache_entry *ce, struct strbuf *buf)
+static int read_file_or_gitlink(struct repository *r, const struct cache_entry *ce, struct strbuf *buf)
 {
 	if (!ce)
 		return 0;
-	return read_blob_object(buf, &ce->oid, ce->ce_mode);
+	return read_blob_object(r, buf, &ce->oid, ce->ce_mode);
 }
 
 static struct patch *in_fn_table(struct apply_state *state, const char *name)
@@ -3443,12 +3443,12 @@  static int load_patch_target(struct apply_state *state,
 			     unsigned expected_mode)
 {
 	if (state->cached || state->check_index) {
-		if (read_file_or_gitlink(ce, buf))
+		if (read_file_or_gitlink(state->repo, ce, buf))
 			return error(_("failed to read %s"), name);
 	} else if (name) {
 		if (S_ISGITLINK(expected_mode)) {
 			if (ce)
-				return read_file_or_gitlink(ce, buf);
+				return read_file_or_gitlink(state->repo, ce, buf);
 			else
 				return SUBMODULE_PATCH_WITHOUT_INDEX;
 		} else if (has_symlink_leading_path(name, strlen(name))) {
@@ -3510,14 +3510,14 @@  static int load_preimage(struct apply_state *state,
 	return 0;
 }
 
-static int resolve_to(struct image *image, const struct object_id *result_id)
+static int resolve_to(struct repository *r, struct image *image, const struct object_id *result_id)
 {
 	unsigned long size;
 	enum object_type type;
 
 	clear_image(image);
 
-	image->buf = repo_read_object_file(the_repository, result_id, &type,
+	image->buf = repo_read_object_file(r, result_id, &type,
 					   &size);
 	if (!image->buf || type != OBJ_BLOB)
 		die("unable to read blob object %s", oid_to_hex(result_id));
@@ -3539,9 +3539,9 @@  static int three_way_merge(struct apply_state *state,
 
 	/* resolve trivial cases first */
 	if (oideq(base, ours))
-		return resolve_to(image, theirs);
+		return resolve_to(state->repo, image, theirs);
 	else if (oideq(base, theirs) || oideq(ours, theirs))
-		return resolve_to(image, ours);
+		return resolve_to(state->repo, image, ours);
 
 	read_mmblob(&base_file, base);
 	read_mmblob(&our_file, ours);
@@ -3636,8 +3636,8 @@  static int try_threeway(struct apply_state *state,
 	/* Preimage the patch was prepared for */
 	if (patch->is_new)
 		write_object_file("", 0, OBJ_BLOB, &pre_oid);
-	else if (repo_get_oid(the_repository, patch->old_oid_prefix, &pre_oid) ||
-		 read_blob_object(&buf, &pre_oid, patch->old_mode))
+	else if (repo_get_oid(state->repo, patch->old_oid_prefix, &pre_oid) ||
+		 read_blob_object(state->repo, &buf, &pre_oid, patch->old_mode))
 		return error(_("repository lacks the necessary blob to perform 3-way merge."));
 
 	if (state->apply_verbosity > verbosity_silent && patch->direct_to_threeway)
@@ -4164,7 +4164,7 @@  static int build_fake_ancestor(struct apply_state *state, struct patch *list)
 			else
 				return error(_("sha1 information is lacking or "
 					       "useless for submodule %s"), name);
-		} else if (!repo_get_oid_blob(the_repository, patch->old_oid_prefix, &oid)) {
+		} else if (!repo_get_oid_blob(state->repo, patch->old_oid_prefix, &oid)) {
 			; /* ok */
 		} else if (!patch->lines_added && !patch->lines_deleted) {
 			/* mode-only change: update the current */