diff mbox series

[v2,1/4] promisor-remote: read partialClone config here

Message ID 07290cba86fda73ee329a47db8e524b32dba25af.1623111879.git.jonathantanmy@google.com (mailing list archive)
State Superseded
Headers show
Series First steps towards partial clone submodules | expand

Commit Message

Jonathan Tan June 8, 2021, 12:25 a.m. UTC
Currently, the reading of config related to promisor remotes is done in
two places: once in setup.c (which sets the global variable
repository_format_partial_clone, to be read by the code in
promisor-remote.c), and once in promisor-remote.c. This means that care
must be taken to ensure that repository_format_partial_clone is set
before any code in promisor-remote.c accesses it.

To simplify the code, move all such config reading to promisor-remote.c.
By doing this, it will be easier to see when
repository_format_partial_clone is written and, thus, to reason about
the code. This will be especially helpful in a subsequent commit, which
modifies this code.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 cache.h           |  1 -
 promisor-remote.c | 14 +++++++++-----
 promisor-remote.h |  6 ------
 setup.c           | 10 +++++++---
 4 files changed, 16 insertions(+), 15 deletions(-)

Comments

Junio C Hamano June 8, 2021, 3:18 a.m. UTC | #1
Jonathan Tan <jonathantanmy@google.com> writes:

> Currently, the reading of config related to promisor remotes is done in
> two places: once in setup.c (which sets the global variable
> repository_format_partial_clone, to be read by the code in
> promisor-remote.c), and once in promisor-remote.c. This means that care
> must be taken to ensure that repository_format_partial_clone is set
> before any code in promisor-remote.c accesses it.

The above is very true, but I am puzzled by the chosen direction of
the code movement.

Given that the value in the field repository_format.partial_clone
comes from an extension, and an extension that is not understood by
the version of Git that is running MUST abort the execution of Git,
wouldn't it be guaranteed that, in a correctly written program, the
.partial_clone field must already be set up correctly before
anything else, including those in promissor-remote.c, accesses it?

> To simplify the code, move all such config reading to promisor-remote.c.
> By doing this, it will be easier to see when
> repository_format_partial_clone is written and, thus, to reason about
> the code. This will be especially helpful in a subsequent commit, which
> modifies this code.

So, I am not sure if this simplifies the code the way we want to
read our code.  Doing a thing in one place is indeed simpler than
doing it in two places, but it looks like promisor-remote code
should be using the repository-format data more, not the other way
around, at least to me.

Perhaps I am missing some other motivation, though.

Thanks.

>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  cache.h           |  1 -
>  promisor-remote.c | 14 +++++++++-----
>  promisor-remote.h |  6 ------
>  setup.c           | 10 +++++++---
>  4 files changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index ba04ff8bd3..dbdcec8601 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1061,7 +1061,6 @@ extern int repository_format_worktree_config;
>  struct repository_format {
>  	int version;
>  	int precious_objects;
> -	char *partial_clone; /* value of extensions.partialclone */
>  	int worktree_config;
>  	int is_bare;
>  	int hash_algo;
> diff --git a/promisor-remote.c b/promisor-remote.c
> index da3f2ca261..c0e5061dfe 100644
> --- a/promisor-remote.c
> +++ b/promisor-remote.c
> @@ -7,11 +7,6 @@
>  
>  static char *repository_format_partial_clone;
>  
> -void set_repository_format_partial_clone(char *partial_clone)
> -{
> -	repository_format_partial_clone = xstrdup_or_null(partial_clone);
> -}
> -
>  static int fetch_objects(const char *remote_name,
>  			 const struct object_id *oids,
>  			 int oid_nr)
> @@ -99,6 +94,15 @@ static int promisor_remote_config(const char *var, const char *value, void *data
>  	size_t namelen;
>  	const char *subkey;
>  
> +	if (!strcmp(var, "extensions.partialclone")) {
> +		/*
> +		 * NULL value is handled in handle_extension_v0 in setup.c.
> +		 */
> +		if (value)
> +			repository_format_partial_clone = xstrdup(value);
> +		return 0;
> +	}
> +
>  	if (parse_config_key(var, "remote", &name, &namelen, &subkey) < 0)
>  		return 0;
>  
> diff --git a/promisor-remote.h b/promisor-remote.h
> index c7a14063c5..687210ab87 100644
> --- a/promisor-remote.h
> +++ b/promisor-remote.h
> @@ -32,10 +32,4 @@ int promisor_remote_get_direct(struct repository *repo,
>  			       const struct object_id *oids,
>  			       int oid_nr);
>  
> -/*
> - * This should be used only once from setup.c to set the value we got
> - * from the extensions.partialclone config option.
> - */
> -void set_repository_format_partial_clone(char *partial_clone);
> -
>  #endif /* PROMISOR_REMOTE_H */
> diff --git a/setup.c b/setup.c
> index 59e2facd9d..d60b6bc554 100644
> --- a/setup.c
> +++ b/setup.c
> @@ -470,7 +470,13 @@ static enum extension_result handle_extension_v0(const char *var,
>  		} else if (!strcmp(ext, "partialclone")) {
>  			if (!value)
>  				return config_error_nonbool(var);
> -			data->partial_clone = xstrdup(value);
> +			/*
> +			 * This config variable will be read together with the
> +			 * other relevant config variables in
> +			 * promisor_remote_config() in promisor_remote.c, so we
> +			 * do not need to read it here. Just report that this
> +			 * extension is known.
> +			 */
>  			return EXTENSION_OK;
>  		} else if (!strcmp(ext, "worktreeconfig")) {
>  			data->worktree_config = git_config_bool(var, value);
> @@ -566,7 +572,6 @@ static int check_repository_format_gently(const char *gitdir, struct repository_
>  	}
>  
>  	repository_format_precious_objects = candidate->precious_objects;
> -	set_repository_format_partial_clone(candidate->partial_clone);
>  	repository_format_worktree_config = candidate->worktree_config;
>  	string_list_clear(&candidate->unknown_extensions, 0);
>  	string_list_clear(&candidate->v1_only_extensions, 0);
> @@ -650,7 +655,6 @@ void clear_repository_format(struct repository_format *format)
>  	string_list_clear(&format->unknown_extensions, 0);
>  	string_list_clear(&format->v1_only_extensions, 0);
>  	free(format->work_tree);
> -	free(format->partial_clone);
>  	init_repository_format(format);
>  }
Elijah Newren June 8, 2021, 5:28 p.m. UTC | #2
On Mon, Jun 7, 2021 at 5:26 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> Currently, the reading of config related to promisor remotes is done in
> two places: once in setup.c (which sets the global variable
> repository_format_partial_clone, to be read by the code in
> promisor-remote.c), and once in promisor-remote.c. This means that care
> must be taken to ensure that repository_format_partial_clone is set
> before any code in promisor-remote.c accesses it.
>
> To simplify the code, move all such config reading to promisor-remote.c.
> By doing this, it will be easier to see when
> repository_format_partial_clone is written and, thus, to reason about
> the code. This will be especially helpful in a subsequent commit, which
> modifies this code.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  cache.h           |  1 -
>  promisor-remote.c | 14 +++++++++-----
>  promisor-remote.h |  6 ------
>  setup.c           | 10 +++++++---
>  4 files changed, 16 insertions(+), 15 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index ba04ff8bd3..dbdcec8601 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1061,7 +1061,6 @@ extern int repository_format_worktree_config;
>  struct repository_format {
>         int version;
>         int precious_objects;
> -       char *partial_clone; /* value of extensions.partialclone */
>         int worktree_config;
>         int is_bare;
>         int hash_algo;
> diff --git a/promisor-remote.c b/promisor-remote.c
> index da3f2ca261..c0e5061dfe 100644
> --- a/promisor-remote.c
> +++ b/promisor-remote.c
> @@ -7,11 +7,6 @@
>
>  static char *repository_format_partial_clone;
>
> -void set_repository_format_partial_clone(char *partial_clone)
> -{
> -       repository_format_partial_clone = xstrdup_or_null(partial_clone);
> -}
> -
>  static int fetch_objects(const char *remote_name,
>                          const struct object_id *oids,
>                          int oid_nr)
> @@ -99,6 +94,15 @@ static int promisor_remote_config(const char *var, const char *value, void *data
>         size_t namelen;
>         const char *subkey;
>
> +       if (!strcmp(var, "extensions.partialclone")) {
> +               /*
> +                * NULL value is handled in handle_extension_v0 in setup.c.
> +                */
> +               if (value)
> +                       repository_format_partial_clone = xstrdup(value);
> +               return 0;
> +       }

This is actually slightly hard to parse out.  I was trying to figure
out where repository_format_partial_clone was initialized, and it's
not handled when value is NULL in handle_extension_v0; it's the fact
that repository_format_partial_clone is declared a static global
variable.

But in the next patch you make it a member of struct
promisor_remote_config, and instead rely on the xcalloc call in
promisor_remote_init().

That means everything is properly initialized and you haven't made any
mistakes here, but the logic is a bit hard to follow.  Perhaps it'd be
nicer to just write this as

+       if (!strcmp(var, "extensions.partialclone")) {
+               repository_format_partial_clone = xstrdup_or_null(value);
+               return 0;
+       }

which makes the code shorter and easier to follow, at least for me.
Jonathan Tan June 9, 2021, 4:26 a.m. UTC | #3
> Jonathan Tan <jonathantanmy@google.com> writes:
> 
> > Currently, the reading of config related to promisor remotes is done in
> > two places: once in setup.c (which sets the global variable
> > repository_format_partial_clone, to be read by the code in
> > promisor-remote.c), and once in promisor-remote.c. This means that care
> > must be taken to ensure that repository_format_partial_clone is set
> > before any code in promisor-remote.c accesses it.
> 
> The above is very true, but I am puzzled by the chosen direction of
> the code movement.
> 
> Given that the value in the field repository_format.partial_clone
> comes from an extension, and an extension that is not understood by
> the version of Git that is running MUST abort the execution of Git,
> wouldn't it be guaranteed that, in a correctly written program, the
> .partial_clone field must already be set up correctly before
> anything else, including those in promissor-remote.c, accesses it?
> 
> > To simplify the code, move all such config reading to promisor-remote.c.
> > By doing this, it will be easier to see when
> > repository_format_partial_clone is written and, thus, to reason about
> > the code. This will be especially helpful in a subsequent commit, which
> > modifies this code.
> 
> So, I am not sure if this simplifies the code the way we want to
> read our code.  Doing a thing in one place is indeed simpler than
> doing it in two places, but it looks like promisor-remote code
> should be using the repository-format data more, not the other way
> around, at least to me.
> 
> Perhaps I am missing some other motivation, though.
> 
> Thanks.

I'm reluctant to add more fields to struct repository_format. Right
now, the way it is used is to hold any information we gathered (e.g.
hash type) while determining if a repo is one that we can handle. Any
information we still need is copied somewhere else, and the struct
itself is immediately freed.

If we were to use it for promisor remote config, we would have to
read config into struct repository_format's fields and copy those fields
into struct repository in setup.c, and then access the same fields in
promisor-remote.c. It seems more straightforward to just do everything
in promisor-remote.c - for example, if we needed to change the type of
one of those fields, we would just need to change it in one file instead
of two.

I acknowledge that there still remains the duplication that setup.c
needs to know that extensions.partialClone is a valid extension, and
that promsior-remote.c needs to interpret extensions.partialClone.

Having said that, I don't feel very strongly about keeping everything in
promisor-remote.c, so I can move it into setup.c if that's the reviewer
consensus.
Jonathan Tan June 9, 2021, 4:44 a.m. UTC | #4
> > @@ -99,6 +94,15 @@ static int promisor_remote_config(const char *var, const char *value, void *data
> >         size_t namelen;
> >         const char *subkey;
> >
> > +       if (!strcmp(var, "extensions.partialclone")) {
> > +               /*
> > +                * NULL value is handled in handle_extension_v0 in setup.c.
> > +                */
> > +               if (value)
> > +                       repository_format_partial_clone = xstrdup(value);
> > +               return 0;
> > +       }
> 
> This is actually slightly hard to parse out.  I was trying to figure
> out where repository_format_partial_clone was initialized, and it's
> not handled when value is NULL in handle_extension_v0; it's the fact
> that repository_format_partial_clone is declared a static global
> variable.
> 
> But in the next patch you make it a member of struct
> promisor_remote_config, and instead rely on the xcalloc call in
> promisor_remote_init().
> 
> That means everything is properly initialized and you haven't made any
> mistakes here, but the logic is a bit hard to follow.  Perhaps it'd be
> nicer to just write this as
> 
> +       if (!strcmp(var, "extensions.partialclone")) {
> +               repository_format_partial_clone = xstrdup_or_null(value);
> +               return 0;
> +       }
> 
> which makes the code shorter and easier to follow, at least for me.

Hmm...is your concern about the case in which
repository_format_partial_clone is uninitialized, or about ignoring a
potential NULL value? If the former, I don't see how your suggestion
fixes things, since extensions.partialclone may never have been in the
config in the first place (and would thus leave
repository_format_partial_clone uninitialized, if it weren't for the
fact that it is in static storage and thus initialized to 0). If the
latter, I guess I should be more detailed about how it's being handled
in setup.c (or maybe just leave out the comment altogether - the code
here can handle a NULL repository_format_partial_clone for some reason).
Elijah Newren June 9, 2021, 5:34 a.m. UTC | #5
On Tue, Jun 8, 2021 at 9:44 PM Jonathan Tan <jonathantanmy@google.com> wrote:
>
> > > @@ -99,6 +94,15 @@ static int promisor_remote_config(const char *var, const char *value, void *data
> > >         size_t namelen;
> > >         const char *subkey;
> > >
> > > +       if (!strcmp(var, "extensions.partialclone")) {
> > > +               /*
> > > +                * NULL value is handled in handle_extension_v0 in setup.c.
> > > +                */
> > > +               if (value)
> > > +                       repository_format_partial_clone = xstrdup(value);
> > > +               return 0;
> > > +       }
> >
> > This is actually slightly hard to parse out.  I was trying to figure
> > out where repository_format_partial_clone was initialized, and it's
> > not handled when value is NULL in handle_extension_v0; it's the fact
> > that repository_format_partial_clone is declared a static global
> > variable.
> >
> > But in the next patch you make it a member of struct
> > promisor_remote_config, and instead rely on the xcalloc call in
> > promisor_remote_init().
> >
> > That means everything is properly initialized and you haven't made any
> > mistakes here, but the logic is a bit hard to follow.  Perhaps it'd be
> > nicer to just write this as
> >
> > +       if (!strcmp(var, "extensions.partialclone")) {
> > +               repository_format_partial_clone = xstrdup_or_null(value);
> > +               return 0;
> > +       }
> >
> > which makes the code shorter and easier to follow, at least for me.
>
> Hmm...is your concern about the case in which
> repository_format_partial_clone is uninitialized, or about ignoring a
> potential NULL value? If the former, I don't see how your suggestion
> fixes things, since extensions.partialclone may never have been in the
> config in the first place (and would thus leave
> repository_format_partial_clone uninitialized, if it weren't for the
> fact that it is in static storage and thus initialized to 0). If the
> latter, I guess I should be more detailed about how it's being handled
> in setup.c (or maybe just leave out the comment altogether - the code
> here can handle a NULL repository_format_partial_clone for some reason).

My comment was about the latter; I was trying to understand what the
comment meant relative to that case, and how and where that case would
be handled in the code.  With that frame of reference, the comment
seemed misleading to me...though perhaps the comment was intended to
answer some other question entirely.
Junio C Hamano June 9, 2021, 9:30 a.m. UTC | #6
Jonathan Tan <jonathantanmy@google.com> writes:

> I'm reluctant to add more fields to struct repository_format.

I am only suggesting to add one new member to either struct
repository or struct repo_settings, so that it becomes crystal clear
that struct repository_format is about each single repository, not
the global the_repository.  Other things that partial repository
support needs to keep *and* do not directly come from extensions
would not belong to repository_format and should not be added there,
but what we read from extensions.* for each repository belongs to
each instance of in-core repository structure and should be
discoverable starting from "struct repository", no?
Jonathan Tan June 9, 2021, 5:16 p.m. UTC | #7
> Jonathan Tan <jonathantanmy@google.com> writes:
> 
> > I'm reluctant to add more fields to struct repository_format.
> 
> I am only suggesting to add one new member to either struct
> repository or struct repo_settings, so that it becomes crystal clear
> that struct repository_format is about each single repository, not
> the global the_repository.  Other things that partial repository
> support needs to keep *and* do not directly come from extensions
> would not belong to repository_format and should not be added there,
> but what we read from extensions.* for each repository belongs to
> each instance of in-core repository structure and should be
> discoverable starting from "struct repository", no?

Ah, I see. I'll try this.
Jonathan Tan June 10, 2021, 5:25 p.m. UTC | #8
> > Hmm...is your concern about the case in which
> > repository_format_partial_clone is uninitialized, or about ignoring a
> > potential NULL value? If the former, I don't see how your suggestion
> > fixes things, since extensions.partialclone may never have been in the
> > config in the first place (and would thus leave
> > repository_format_partial_clone uninitialized, if it weren't for the
> > fact that it is in static storage and thus initialized to 0). If the
> > latter, I guess I should be more detailed about how it's being handled
> > in setup.c (or maybe just leave out the comment altogether - the code
> > here can handle a NULL repository_format_partial_clone for some reason).
> 
> My comment was about the latter; I was trying to understand what the
> comment meant relative to that case, and how and where that case would
> be handled in the code.  With that frame of reference, the comment
> seemed misleading to me...though perhaps the comment was intended to
> answer some other question entirely.

Junio suggested [1] that repository_format_partial_clone be handled when
the repo format is validated, so this part of the code can just make use
of the repository_format_partial_clone value in struct repository and
not read the config itself. So I believe that this part is now
obsolete (but you can take a look at patches 1 and 2 to verify, if you
want).

[1] https://lore.kernel.org/git/xmqqeedbidvy.fsf@gitster.g/
diff mbox series

Patch

diff --git a/cache.h b/cache.h
index ba04ff8bd3..dbdcec8601 100644
--- a/cache.h
+++ b/cache.h
@@ -1061,7 +1061,6 @@  extern int repository_format_worktree_config;
 struct repository_format {
 	int version;
 	int precious_objects;
-	char *partial_clone; /* value of extensions.partialclone */
 	int worktree_config;
 	int is_bare;
 	int hash_algo;
diff --git a/promisor-remote.c b/promisor-remote.c
index da3f2ca261..c0e5061dfe 100644
--- a/promisor-remote.c
+++ b/promisor-remote.c
@@ -7,11 +7,6 @@ 
 
 static char *repository_format_partial_clone;
 
-void set_repository_format_partial_clone(char *partial_clone)
-{
-	repository_format_partial_clone = xstrdup_or_null(partial_clone);
-}
-
 static int fetch_objects(const char *remote_name,
 			 const struct object_id *oids,
 			 int oid_nr)
@@ -99,6 +94,15 @@  static int promisor_remote_config(const char *var, const char *value, void *data
 	size_t namelen;
 	const char *subkey;
 
+	if (!strcmp(var, "extensions.partialclone")) {
+		/*
+		 * NULL value is handled in handle_extension_v0 in setup.c.
+		 */
+		if (value)
+			repository_format_partial_clone = xstrdup(value);
+		return 0;
+	}
+
 	if (parse_config_key(var, "remote", &name, &namelen, &subkey) < 0)
 		return 0;
 
diff --git a/promisor-remote.h b/promisor-remote.h
index c7a14063c5..687210ab87 100644
--- a/promisor-remote.h
+++ b/promisor-remote.h
@@ -32,10 +32,4 @@  int promisor_remote_get_direct(struct repository *repo,
 			       const struct object_id *oids,
 			       int oid_nr);
 
-/*
- * This should be used only once from setup.c to set the value we got
- * from the extensions.partialclone config option.
- */
-void set_repository_format_partial_clone(char *partial_clone);
-
 #endif /* PROMISOR_REMOTE_H */
diff --git a/setup.c b/setup.c
index 59e2facd9d..d60b6bc554 100644
--- a/setup.c
+++ b/setup.c
@@ -470,7 +470,13 @@  static enum extension_result handle_extension_v0(const char *var,
 		} else if (!strcmp(ext, "partialclone")) {
 			if (!value)
 				return config_error_nonbool(var);
-			data->partial_clone = xstrdup(value);
+			/*
+			 * This config variable will be read together with the
+			 * other relevant config variables in
+			 * promisor_remote_config() in promisor_remote.c, so we
+			 * do not need to read it here. Just report that this
+			 * extension is known.
+			 */
 			return EXTENSION_OK;
 		} else if (!strcmp(ext, "worktreeconfig")) {
 			data->worktree_config = git_config_bool(var, value);
@@ -566,7 +572,6 @@  static int check_repository_format_gently(const char *gitdir, struct repository_
 	}
 
 	repository_format_precious_objects = candidate->precious_objects;
-	set_repository_format_partial_clone(candidate->partial_clone);
 	repository_format_worktree_config = candidate->worktree_config;
 	string_list_clear(&candidate->unknown_extensions, 0);
 	string_list_clear(&candidate->v1_only_extensions, 0);
@@ -650,7 +655,6 @@  void clear_repository_format(struct repository_format *format)
 	string_list_clear(&format->unknown_extensions, 0);
 	string_list_clear(&format->v1_only_extensions, 0);
 	free(format->work_tree);
-	free(format->partial_clone);
 	init_repository_format(format);
 }