diff mbox series

[v4] remote: allow specifying refs to prefetch

Message ID pull.1782.v4.git.1728073292874.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series [v4] remote: allow specifying refs to prefetch | expand

Commit Message

Shubham Kanodia Oct. 4, 2024, 8:21 p.m. UTC
From: Shubham Kanodia <shubham.kanodia10@gmail.com>

When using 'git fetch --prefetch', all fetchable refs are prefetched
by default. In large repositories with many refs, this can lead to
unnecessary network traffic and increased disk space use.

Introduce a new configuration option 'remote.<name>.prefetchref' that
allows users to specify specific ref patterns to be prefetched during
a 'git fetch --prefetch' operation.

The 'prefetchref' option accepts a space-separated list of ref
patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the
'--prefetch' option is used with 'git fetch', only the refs matching
these patterns will be prefetched.

Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com>
---
    remote: introduce config to set prefetch refs

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v4
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v4
Pull-Request: https://github.com/gitgitgadget/git/pull/1782

Range-diff vs v3:

 1:  717d5957c47 ! 1:  e3d8aee1ea8 remote: introduce config to set prefetch refs
     @@ Metadata
      Author: Shubham Kanodia <shubham.kanodia10@gmail.com>
      
       ## Commit message ##
     -    remote: introduce config to set prefetch refs
     +    remote: allow specifying refs to prefetch
      
     -    This commit introduces a new configuration option,
     -    remote.<name>.prefetchref, which allows users to specify specific
     -    ref patterns to be prefetched during a git fetch --prefetch
     -    operation.
     +    When using 'git fetch --prefetch', all fetchable refs are prefetched
     +    by default. In large repositories with many refs, this can lead to
     +    unnecessary network traffic and increased disk space use.
      
     -    The new option accepts a space-separated list of ref patterns.
     -    When the --prefetch option is used with git fetch, only the refs
     -    matching these patterns will be prefetched, instead of the
     -    default behavior of prefetching all fetchable refs.
     +    Introduce a new configuration option 'remote.<name>.prefetchref' that
     +    allows users to specify specific ref patterns to be prefetched during
     +    a 'git fetch --prefetch' operation.
      
     -    Example usage in .git/config:
     -    [remote "origin"]
     -        prefetchref = "refs/heads/main refs/heads/feature/*"
     -
     -    This change allows users to optimize their prefetch operations, potentially
     -    reducing network traffic and improving performance for large repositories
     -    with many refs.
     +    The 'prefetchref' option accepts a space-separated list of ref
     +    patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the
     +    '--prefetch' option is used with 'git fetch', only the refs matching
     +    these patterns will be prefetched.
      
          Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com>
      
     @@ Documentation/config/remote.txt: remote.<name>.fetch::
       	linkgit:git-fetch[1].
       
      +remote.<name>.prefetchref::
     -+    Specify the refs to be prefetched when fetching from this remote.
     -+    The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*").
     -+    This can be used to optimize fetch operations by specifying exactly which refs should be prefetched.
     ++	Specify the refs to be prefetched when fetching from this
     ++	remote. The value is a space-separated list of ref patterns
     ++	(e.g., "refs/heads/main !refs/heads/develop*"). This can be
     ++	used to optimize fetch operations by specifying exactly which
     ++	refs should be prefetched.
      +
       remote.<name>.push::
       	The default set of "refspec" for linkgit:git-push[1]. See
       	linkgit:git-push[1].
      
       ## builtin/fetch.c ##
     -@@
     - #include "trace.h"
     - #include "trace2.h"
     - #include "bundle-uri.h"
     -+#include "wildmatch.h"
     - 
     - #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
     - 
      @@ builtin/fetch.c: static void filter_prefetch_refspec(struct refspec *rs)
       	}
       }
       
     ++static int pattern_matches_ref(const char *pattern, const char *refname)
     ++{
     ++	if (strchr(pattern, '*'))
     ++		return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
     ++	return strcmp(pattern, refname) == 0;
     ++}
     ++
      +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
      +{
     -+	int i;
     -+	int has_positive = 0;
     -+	int matched_positive = 0;
     -+	int matched_negative = 0;
     ++	int has_positive = 0, matched_positive = 0, matched_negative = 0;
      +
     -+	for (i = 0; i < prefetch_refs->nr; i++) {
     ++	for (int i = 0; i < prefetch_refs->nr; i++) {
      +		const char *pattern = prefetch_refs->items[i].string;
      +		int is_negative = (*pattern == '!');
     ++		if (is_negative) pattern++;
     ++		else has_positive = 1;
      +
     -+		if (is_negative)
     -+			pattern++;
     -+		else
     -+			has_positive = 1;
     -+
     -+		if (wildmatch(pattern, refname, 0) == 0) {
     -+			if (is_negative)
     -+				matched_negative = 1;
     -+			else
     -+				matched_positive = 1;
     ++		if (pattern_matches_ref(pattern, refname)) {
     ++			if (is_negative) matched_negative = 1;
     ++			else matched_positive = 1;
      +		}
      +	}
      +
     -+	if (!has_positive)
     -+		return !matched_negative;
     -+
     -+	return matched_positive && !matched_negative;
     -+}
     -+
     -+
     -+static void ref_remove(struct ref **head, struct ref *to_remove)
     -+{
     -+	struct ref **pp, *p;
     -+
     -+	for (pp = head; (p = *pp) != NULL; pp = &p->next) {
     -+		if (p == to_remove) {
     -+			*pp = p->next;
     -+			return;
     -+		}
     -+	}
     ++	return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
      +}
      +
       static struct ref *get_ref_map(struct remote *remote,
       			       const struct ref *remote_refs,
       			       struct refspec *rs,
      @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote,
     + 	struct hashmap existing_refs;
       	int existing_refs_populated = 0;
       
     ++	struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
     ++	struct ref *next;
     ++
       	filter_prefetch_refspec(rs);
      +
       	if (remote)
     @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote,
      +	 * Filter out advertised refs that we don't want to fetch during
      +	 * prefetch if a prefetchref config is set
      +	 */
     ++
      +	if (prefetch && remote->prefetch_refs.nr) {
     -+		struct ref *ref, *next;
     -+		for (ref = ref_map; ref; ref = next) {
     -+			next = ref->next;
     ++		prefetch_filtered_ref_map = NULL;
     ++		ref_map_tail = &prefetch_filtered_ref_map;
     ++
     ++		for (rm = ref_map; rm; rm = next) {
     ++			next = rm->next;
     ++			rm->next = NULL;
      +
     -+			if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) {
     -+					ref_remove(&ref_map, ref);
     -+					free_one_ref(ref);
     ++			if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
     ++				*ref_map_tail = rm;
     ++				ref_map_tail = &rm->next;
     ++			} else {
     ++				free_one_ref(rm);
      +			}
      +		}
     ++		ref_map = prefetch_filtered_ref_map;
      +	}
      +
       	ref_map = ref_remove_duplicates(ref_map);
     @@ remote.c: static int handle_config(const char *key, const char *value,
       	else if (!strcmp(subkey, "url")) {
       		if (!value)
       			return config_error_nonbool(key);
     +@@ remote.c: struct strvec *push_url_of_remote(struct remote *remote)
     + 	return remote->pushurl.nr ? &remote->pushurl : &remote->url;
     + }
     + 
     +-static int match_name_with_pattern(const char *key, const char *name,
     ++int match_refspec_name_with_pattern(const char *key, const char *name,
     + 				   const char *value, char **result)
     + {
     + 	const char *kstar = strchr(key, '*');
     +@@ remote.c: static int refspec_match(const struct refspec_item *refspec,
     + 			 const char *name)
     + {
     + 	if (refspec->pattern)
     +-		return match_name_with_pattern(refspec->src, name, NULL, NULL);
     ++		return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL);
     + 
     + 	return !strcmp(refspec->src, name);
     + }
     +@@ remote.c: static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite
     + 			const char *key = refspec->dst ? refspec->dst : refspec->src;
     + 			const char *value = refspec->src;
     + 
     +-			if (match_name_with_pattern(key, needle, value, &expn_name))
     ++			if (match_refspec_name_with_pattern(key, needle, value, &expn_name))
     + 				string_list_append_nodup(&reversed, expn_name);
     + 		} else if (refspec->matching) {
     + 			/* For the special matching refspec, any query should match */
     +@@ remote.c: static void query_refspecs_multiple(struct refspec *rs,
     + 		if (!refspec->dst || refspec->negative)
     + 			continue;
     + 		if (refspec->pattern) {
     +-			if (match_name_with_pattern(key, needle, value, result))
     ++			if (match_refspec_name_with_pattern(key, needle, value, result))
     + 				string_list_append_nodup(results, *result);
     + 		} else if (!strcmp(needle, key)) {
     + 			string_list_append(results, value);
     +@@ remote.c: int query_refspecs(struct refspec *rs, struct refspec_item *query)
     + 		if (!refspec->dst || refspec->negative)
     + 			continue;
     + 		if (refspec->pattern) {
     +-			if (match_name_with_pattern(key, needle, value, result)) {
     ++			if (match_refspec_name_with_pattern(key, needle, value, result)) {
     + 				query->force = refspec->force;
     + 				return 0;
     + 			}
     +@@ remote.c: static char *get_ref_match(const struct refspec *rs, const struct ref *ref,
     + 			const char *dst_side = item->dst ? item->dst : item->src;
     + 			int match;
     + 			if (direction == FROM_SRC)
     +-				match = match_name_with_pattern(item->src, ref->name, dst_side, &name);
     ++				match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name);
     + 			else
     +-				match = match_name_with_pattern(dst_side, ref->name, item->src, &name);
     ++				match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name);
     + 			if (match) {
     + 				matching_refs = i;
     + 				break;
     +@@ remote.c: static struct ref *get_expanded_map(const struct ref *remote_refs,
     + 
     + 		if (strchr(ref->name, '^'))
     + 			continue; /* a dereference item */
     +-		if (match_name_with_pattern(refspec->src, ref->name,
     ++		if (match_refspec_name_with_pattern(refspec->src, ref->name,
     + 					    refspec->dst, &expn_name) &&
     + 		    !ignore_symref_update(expn_name, &scratch)) {
     + 			struct ref *cpy = copy_ref(ref);
      
       ## remote.h ##
      @@
     @@ remote.h: struct remote {
       	/*
       	 * The setting for whether to fetch tags (as a separate rule from the
       	 * configured refspecs);
     +@@ remote.h: int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
     + 
     + int check_ref_type(const struct ref *ref, int flags);
     + 
     ++int match_refspec_name_with_pattern(const char *key, const char *name,
     ++					const char *value, char **result);
     ++
     + /*
     +  * Free a single ref and its peer, or an entire list of refs and their peers,
     +  * respectively.
      
       ## t/t7900-maintenance.sh ##
      @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
     @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
      +		git checkout -b feature && test_commit feature-commit-2 &&
      +		git checkout -b wip/test && test_commit wip-test-commit-2 &&
      +		git checkout -b topic/x && test_commit topic-x-commit-2 &&
     -+		git push -f origin feature wip/test topic/x&&
     ++		git push -f origin feature wip/test topic/x &&
      +		cd .. &&
      +
      +		git config remote.remote2.prefetchref "refs/heads/feature" &&
     -+		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
     -+		GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null &&
     ++		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
     ++			--recurse-submodules=no --quiet" &&
     ++		GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \
     ++			git maintenance run --task=prefetch 2>/dev/null &&
      +		test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt &&
      +
      +		git rev-parse refs/prefetch/remotes/remote2/feature &&
     @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
      +		cd .. &&
      +
      +		git config remote.remote3.prefetchref "!refs/heads/wip/*" &&
     -+		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
     -+		GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null &&
     ++		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
     ++			--recurse-submodules=no --quiet" &&
     ++		GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \
     ++			git maintenance run --task=prefetch 2>/dev/null &&
      +		test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt &&
      +		git rev-parse refs/prefetch/remotes/remote3/feature &&
      +		git rev-parse refs/prefetch/remotes/remote3/topic/x &&
     @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
      +		git push -f origin feature topic/x topic/y &&
      +		cd .. &&
      +
     -+		git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" &&
     -+		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
     -+		GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null &&
     ++		git config remote.remote4.prefetchref \
     ++			"refs/heads/topic/* !refs/heads/topic/y" &&
     ++		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
     ++			--recurse-submodules=no --quiet" &&
     ++		GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \
     ++			git maintenance run --task=prefetch 2>/dev/null &&
      +		test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt &&
      +
      +		test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature &&


 Documentation/config/remote.txt |  7 +++
 builtin/fetch.c                 | 53 ++++++++++++++++++++
 remote.c                        | 24 ++++++----
 remote.h                        |  6 +++
 t/t7900-maintenance.sh          | 85 +++++++++++++++++++++++++++++++++
 5 files changed, 167 insertions(+), 8 deletions(-)


base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c

Comments

Shubham Kanodia Nov. 4, 2024, 8:47 a.m. UTC | #1
On Sat, Oct 5, 2024 at 1:51 AM Shubham Kanodia via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Shubham Kanodia <shubham.kanodia10@gmail.com>
>
> When using 'git fetch --prefetch', all fetchable refs are prefetched
> by default. In large repositories with many refs, this can lead to
> unnecessary network traffic and increased disk space use.
>
> Introduce a new configuration option 'remote.<name>.prefetchref' that
> allows users to specify specific ref patterns to be prefetched during
> a 'git fetch --prefetch' operation.
>
> The 'prefetchref' option accepts a space-separated list of ref
> patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the
> '--prefetch' option is used with 'git fetch', only the refs matching
> these patterns will be prefetched.
>
> Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com>
> ---
>     remote: introduce config to set prefetch refs
>
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-1782%2Fpastelsky%2Fsk%2Fremote-prefetchref-v4
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-1782/pastelsky/sk/remote-prefetchref-v4
> Pull-Request: https://github.com/gitgitgadget/git/pull/1782
>
> Range-diff vs v3:
>
>  1:  717d5957c47 ! 1:  e3d8aee1ea8 remote: introduce config to set prefetch refs
>      @@ Metadata
>       Author: Shubham Kanodia <shubham.kanodia10@gmail.com>
>
>        ## Commit message ##
>      -    remote: introduce config to set prefetch refs
>      +    remote: allow specifying refs to prefetch
>
>      -    This commit introduces a new configuration option,
>      -    remote.<name>.prefetchref, which allows users to specify specific
>      -    ref patterns to be prefetched during a git fetch --prefetch
>      -    operation.
>      +    When using 'git fetch --prefetch', all fetchable refs are prefetched
>      +    by default. In large repositories with many refs, this can lead to
>      +    unnecessary network traffic and increased disk space use.
>
>      -    The new option accepts a space-separated list of ref patterns.
>      -    When the --prefetch option is used with git fetch, only the refs
>      -    matching these patterns will be prefetched, instead of the
>      -    default behavior of prefetching all fetchable refs.
>      +    Introduce a new configuration option 'remote.<name>.prefetchref' that
>      +    allows users to specify specific ref patterns to be prefetched during
>      +    a 'git fetch --prefetch' operation.
>
>      -    Example usage in .git/config:
>      -    [remote "origin"]
>      -        prefetchref = "refs/heads/main refs/heads/feature/*"
>      -
>      -    This change allows users to optimize their prefetch operations, potentially
>      -    reducing network traffic and improving performance for large repositories
>      -    with many refs.
>      +    The 'prefetchref' option accepts a space-separated list of ref
>      +    patterns (e.g., 'refs/heads/main !refs/heads/feature/*'). When the
>      +    '--prefetch' option is used with 'git fetch', only the refs matching
>      +    these patterns will be prefetched.
>
>           Signed-off-by: Shubham Kanodia <shubham.kanodia10@gmail.com>
>
>      @@ Documentation/config/remote.txt: remote.<name>.fetch::
>         linkgit:git-fetch[1].
>
>       +remote.<name>.prefetchref::
>      -+    Specify the refs to be prefetched when fetching from this remote.
>      -+    The value is a space-separated list of ref patterns (e.g., "refs/heads/master !refs/heads/develop*").
>      -+    This can be used to optimize fetch operations by specifying exactly which refs should be prefetched.
>      ++ Specify the refs to be prefetched when fetching from this
>      ++ remote. The value is a space-separated list of ref patterns
>      ++ (e.g., "refs/heads/main !refs/heads/develop*"). This can be
>      ++ used to optimize fetch operations by specifying exactly which
>      ++ refs should be prefetched.
>       +
>        remote.<name>.push::
>         The default set of "refspec" for linkgit:git-push[1]. See
>         linkgit:git-push[1].
>
>        ## builtin/fetch.c ##
>      -@@
>      - #include "trace.h"
>      - #include "trace2.h"
>      - #include "bundle-uri.h"
>      -+#include "wildmatch.h"
>      -
>      - #define FORCED_UPDATES_DELAY_WARNING_IN_MS (10 * 1000)
>      -
>       @@ builtin/fetch.c: static void filter_prefetch_refspec(struct refspec *rs)
>         }
>        }
>
>      ++static int pattern_matches_ref(const char *pattern, const char *refname)
>      ++{
>      ++ if (strchr(pattern, '*'))
>      ++         return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
>      ++ return strcmp(pattern, refname) == 0;
>      ++}
>      ++
>       +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
>       +{
>      -+ int i;
>      -+ int has_positive = 0;
>      -+ int matched_positive = 0;
>      -+ int matched_negative = 0;
>      ++ int has_positive = 0, matched_positive = 0, matched_negative = 0;
>       +
>      -+ for (i = 0; i < prefetch_refs->nr; i++) {
>      ++ for (int i = 0; i < prefetch_refs->nr; i++) {
>       +         const char *pattern = prefetch_refs->items[i].string;
>       +         int is_negative = (*pattern == '!');
>      ++         if (is_negative) pattern++;
>      ++         else has_positive = 1;
>       +
>      -+         if (is_negative)
>      -+                 pattern++;
>      -+         else
>      -+                 has_positive = 1;
>      -+
>      -+         if (wildmatch(pattern, refname, 0) == 0) {
>      -+                 if (is_negative)
>      -+                         matched_negative = 1;
>      -+                 else
>      -+                         matched_positive = 1;
>      ++         if (pattern_matches_ref(pattern, refname)) {
>      ++                 if (is_negative) matched_negative = 1;
>      ++                 else matched_positive = 1;
>       +         }
>       + }
>       +
>      -+ if (!has_positive)
>      -+         return !matched_negative;
>      -+
>      -+ return matched_positive && !matched_negative;
>      -+}
>      -+
>      -+
>      -+static void ref_remove(struct ref **head, struct ref *to_remove)
>      -+{
>      -+ struct ref **pp, *p;
>      -+
>      -+ for (pp = head; (p = *pp) != NULL; pp = &p->next) {
>      -+         if (p == to_remove) {
>      -+                 *pp = p->next;
>      -+                 return;
>      -+         }
>      -+ }
>      ++ return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
>       +}
>       +
>        static struct ref *get_ref_map(struct remote *remote,
>                                const struct ref *remote_refs,
>                                struct refspec *rs,
>       @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote,
>      +  struct hashmap existing_refs;
>         int existing_refs_populated = 0;
>
>      ++ struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
>      ++ struct ref *next;
>      ++
>         filter_prefetch_refspec(rs);
>       +
>         if (remote)
>      @@ builtin/fetch.c: static struct ref *get_ref_map(struct remote *remote,
>       +  * Filter out advertised refs that we don't want to fetch during
>       +  * prefetch if a prefetchref config is set
>       +  */
>      ++
>       + if (prefetch && remote->prefetch_refs.nr) {
>      -+         struct ref *ref, *next;
>      -+         for (ref = ref_map; ref; ref = next) {
>      -+                 next = ref->next;
>      ++         prefetch_filtered_ref_map = NULL;
>      ++         ref_map_tail = &prefetch_filtered_ref_map;
>      ++
>      ++         for (rm = ref_map; rm; rm = next) {
>      ++                 next = rm->next;
>      ++                 rm->next = NULL;
>       +
>      -+                 if (!matches_prefetch_refs(ref->name, &remote->prefetch_refs)) {
>      -+                                 ref_remove(&ref_map, ref);
>      -+                                 free_one_ref(ref);
>      ++                 if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
>      ++                         *ref_map_tail = rm;
>      ++                         ref_map_tail = &rm->next;
>      ++                 } else {
>      ++                         free_one_ref(rm);
>       +                 }
>       +         }
>      ++         ref_map = prefetch_filtered_ref_map;
>       + }
>       +
>         ref_map = ref_remove_duplicates(ref_map);
>      @@ remote.c: static int handle_config(const char *key, const char *value,
>         else if (!strcmp(subkey, "url")) {
>                 if (!value)
>                         return config_error_nonbool(key);
>      +@@ remote.c: struct strvec *push_url_of_remote(struct remote *remote)
>      +  return remote->pushurl.nr ? &remote->pushurl : &remote->url;
>      + }
>      +
>      +-static int match_name_with_pattern(const char *key, const char *name,
>      ++int match_refspec_name_with_pattern(const char *key, const char *name,
>      +                             const char *value, char **result)
>      + {
>      +  const char *kstar = strchr(key, '*');
>      +@@ remote.c: static int refspec_match(const struct refspec_item *refspec,
>      +                   const char *name)
>      + {
>      +  if (refspec->pattern)
>      +-         return match_name_with_pattern(refspec->src, name, NULL, NULL);
>      ++         return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL);
>      +
>      +  return !strcmp(refspec->src, name);
>      + }
>      +@@ remote.c: static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite
>      +                  const char *key = refspec->dst ? refspec->dst : refspec->src;
>      +                  const char *value = refspec->src;
>      +
>      +-                 if (match_name_with_pattern(key, needle, value, &expn_name))
>      ++                 if (match_refspec_name_with_pattern(key, needle, value, &expn_name))
>      +                          string_list_append_nodup(&reversed, expn_name);
>      +          } else if (refspec->matching) {
>      +                  /* For the special matching refspec, any query should match */
>      +@@ remote.c: static void query_refspecs_multiple(struct refspec *rs,
>      +          if (!refspec->dst || refspec->negative)
>      +                  continue;
>      +          if (refspec->pattern) {
>      +-                 if (match_name_with_pattern(key, needle, value, result))
>      ++                 if (match_refspec_name_with_pattern(key, needle, value, result))
>      +                          string_list_append_nodup(results, *result);
>      +          } else if (!strcmp(needle, key)) {
>      +                  string_list_append(results, value);
>      +@@ remote.c: int query_refspecs(struct refspec *rs, struct refspec_item *query)
>      +          if (!refspec->dst || refspec->negative)
>      +                  continue;
>      +          if (refspec->pattern) {
>      +-                 if (match_name_with_pattern(key, needle, value, result)) {
>      ++                 if (match_refspec_name_with_pattern(key, needle, value, result)) {
>      +                          query->force = refspec->force;
>      +                          return 0;
>      +                  }
>      +@@ remote.c: static char *get_ref_match(const struct refspec *rs, const struct ref *ref,
>      +                  const char *dst_side = item->dst ? item->dst : item->src;
>      +                  int match;
>      +                  if (direction == FROM_SRC)
>      +-                         match = match_name_with_pattern(item->src, ref->name, dst_side, &name);
>      ++                         match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name);
>      +                  else
>      +-                         match = match_name_with_pattern(dst_side, ref->name, item->src, &name);
>      ++                         match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name);
>      +                  if (match) {
>      +                          matching_refs = i;
>      +                          break;
>      +@@ remote.c: static struct ref *get_expanded_map(const struct ref *remote_refs,
>      +
>      +          if (strchr(ref->name, '^'))
>      +                  continue; /* a dereference item */
>      +-         if (match_name_with_pattern(refspec->src, ref->name,
>      ++         if (match_refspec_name_with_pattern(refspec->src, ref->name,
>      +                                      refspec->dst, &expn_name) &&
>      +              !ignore_symref_update(expn_name, &scratch)) {
>      +                  struct ref *cpy = copy_ref(ref);
>
>        ## remote.h ##
>       @@
>      @@ remote.h: struct remote {
>         /*
>          * The setting for whether to fetch tags (as a separate rule from the
>          * configured refspecs);
>      +@@ remote.h: int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
>      +
>      + int check_ref_type(const struct ref *ref, int flags);
>      +
>      ++int match_refspec_name_with_pattern(const char *key, const char *name,
>      ++                                 const char *value, char **result);
>      ++
>      + /*
>      +  * Free a single ref and its peer, or an entire list of refs and their peers,
>      +  * respectively.
>
>        ## t/t7900-maintenance.sh ##
>       @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
>      @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
>       +         git checkout -b feature && test_commit feature-commit-2 &&
>       +         git checkout -b wip/test && test_commit wip-test-commit-2 &&
>       +         git checkout -b topic/x && test_commit topic-x-commit-2 &&
>      -+         git push -f origin feature wip/test topic/x&&
>      ++         git push -f origin feature wip/test topic/x &&
>       +         cd .. &&
>       +
>       +         git config remote.remote2.prefetchref "refs/heads/feature" &&
>      -+         fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
>      -+         GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" git maintenance run --task=prefetch 2>/dev/null &&
>      ++         fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
>      ++                 --recurse-submodules=no --quiet" &&
>      ++         GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \
>      ++                 git maintenance run --task=prefetch 2>/dev/null &&
>       +         test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt &&
>       +
>       +         git rev-parse refs/prefetch/remotes/remote2/feature &&
>      @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
>       +         cd .. &&
>       +
>       +         git config remote.remote3.prefetchref "!refs/heads/wip/*" &&
>      -+         fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
>      -+         GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" git maintenance run --task=prefetch 2>/dev/null &&
>      ++         fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
>      ++                 --recurse-submodules=no --quiet" &&
>      ++         GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \
>      ++                 git maintenance run --task=prefetch 2>/dev/null &&
>       +         test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt &&
>       +         git rev-parse refs/prefetch/remotes/remote3/feature &&
>       +         git rev-parse refs/prefetch/remotes/remote3/topic/x &&
>      @@ t/t7900-maintenance.sh: test_expect_success 'prefetch multiple remotes' '
>       +         git push -f origin feature topic/x topic/y &&
>       +         cd .. &&
>       +
>      -+         git config remote.remote4.prefetchref "refs/heads/topic/* !refs/heads/topic/y" &&
>      -+         fetchargs="--prefetch --prune --no-tags --no-write-fetch-head --recurse-submodules=no --quiet" &&
>      -+         GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" git maintenance run --task=prefetch 2>/dev/null &&
>      ++         git config remote.remote4.prefetchref \
>      ++                 "refs/heads/topic/* !refs/heads/topic/y" &&
>      ++         fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
>      ++                 --recurse-submodules=no --quiet" &&
>      ++         GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \
>      ++                 git maintenance run --task=prefetch 2>/dev/null &&
>       +         test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt &&
>       +
>       +         test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature &&
>
>
>  Documentation/config/remote.txt |  7 +++
>  builtin/fetch.c                 | 53 ++++++++++++++++++++
>  remote.c                        | 24 ++++++----
>  remote.h                        |  6 +++
>  t/t7900-maintenance.sh          | 85 +++++++++++++++++++++++++++++++++
>  5 files changed, 167 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> index 8efc53e836d..186f439ed7b 100644
> --- a/Documentation/config/remote.txt
> +++ b/Documentation/config/remote.txt
> @@ -33,6 +33,13 @@ remote.<name>.fetch::
>         The default set of "refspec" for linkgit:git-fetch[1]. See
>         linkgit:git-fetch[1].
>
> +remote.<name>.prefetchref::
> +       Specify the refs to be prefetched when fetching from this
> +       remote. The value is a space-separated list of ref patterns
> +       (e.g., "refs/heads/main !refs/heads/develop*"). This can be
> +       used to optimize fetch operations by specifying exactly which
> +       refs should be prefetched.
> +
>  remote.<name>.push::
>         The default set of "refspec" for linkgit:git-push[1]. See
>         linkgit:git-push[1].
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index b2b5aee5bf2..74603cfabe0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs)
>         }
>  }
>
> +static int pattern_matches_ref(const char *pattern, const char *refname)
> +{
> +       if (strchr(pattern, '*'))
> +               return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
> +       return strcmp(pattern, refname) == 0;
> +}
> +
> +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
> +{
> +       int has_positive = 0, matched_positive = 0, matched_negative = 0;
> +
> +       for (int i = 0; i < prefetch_refs->nr; i++) {
> +               const char *pattern = prefetch_refs->items[i].string;
> +               int is_negative = (*pattern == '!');
> +               if (is_negative) pattern++;
> +               else has_positive = 1;
> +
> +               if (pattern_matches_ref(pattern, refname)) {
> +                       if (is_negative) matched_negative = 1;
> +                       else matched_positive = 1;
> +               }
> +       }
> +
> +       return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
> +}
> +
>  static struct ref *get_ref_map(struct remote *remote,
>                                const struct ref *remote_refs,
>                                struct refspec *rs,
> @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote,
>         struct hashmap existing_refs;
>         int existing_refs_populated = 0;
>
> +       struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
> +       struct ref *next;
> +
>         filter_prefetch_refspec(rs);
> +
>         if (remote)
>                 filter_prefetch_refspec(&remote->fetch);
>
> @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote,
>         else
>                 ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
>
> +       /**
> +        * Filter out advertised refs that we don't want to fetch during
> +        * prefetch if a prefetchref config is set
> +        */
> +
> +       if (prefetch && remote->prefetch_refs.nr) {
> +               prefetch_filtered_ref_map = NULL;
> +               ref_map_tail = &prefetch_filtered_ref_map;
> +
> +               for (rm = ref_map; rm; rm = next) {
> +                       next = rm->next;
> +                       rm->next = NULL;
> +
> +                       if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
> +                               *ref_map_tail = rm;
> +                               ref_map_tail = &rm->next;
> +                       } else {
> +                               free_one_ref(rm);
> +                       }
> +               }
> +               ref_map = prefetch_filtered_ref_map;
> +       }
> +
>         ref_map = ref_remove_duplicates(ref_map);
>
>         for (rm = ref_map; rm; rm = rm->next) {
> diff --git a/remote.c b/remote.c
> index 8f3dee13186..6752c73370f 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state,
>         ret->prune = -1;  /* unspecified */
>         ret->prune_tags = -1;  /* unspecified */
>         ret->name = xstrndup(name, len);
> +       string_list_init_dup(&ret->prefetch_refs);
>         refspec_init(&ret->push, REFSPEC_PUSH);
>         refspec_init(&ret->fetch, REFSPEC_FETCH);
>
> @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote)
>         free((char *)remote->uploadpack);
>         FREE_AND_NULL(remote->http_proxy);
>         FREE_AND_NULL(remote->http_proxy_authmethod);
> +       string_list_clear(&remote->prefetch_refs, 0);
>  }
>
>  static void add_merge(struct branch *branch, const char *name)
> @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value,
>                 remote->prune = git_config_bool(key, value);
>         else if (!strcmp(subkey, "prunetags"))
>                 remote->prune_tags = git_config_bool(key, value);
> +       else if (!strcmp(subkey, "prefetchref")) {
> +               if (!value)
> +                       return config_error_nonbool(key);
> +               string_list_split(&remote->prefetch_refs, value, ' ', -1);
> +               return 0;
> +       }
>         else if (!strcmp(subkey, "url")) {
>                 if (!value)
>                         return config_error_nonbool(key);
> @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote)
>         return remote->pushurl.nr ? &remote->pushurl : &remote->url;
>  }
>
> -static int match_name_with_pattern(const char *key, const char *name,
> +int match_refspec_name_with_pattern(const char *key, const char *name,
>                                    const char *value, char **result)
>  {
>         const char *kstar = strchr(key, '*');
> @@ -900,7 +908,7 @@ static int refspec_match(const struct refspec_item *refspec,
>                          const char *name)
>  {
>         if (refspec->pattern)
> -               return match_name_with_pattern(refspec->src, name, NULL, NULL);
> +               return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL);
>
>         return !strcmp(refspec->src, name);
>  }
> @@ -969,7 +977,7 @@ static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite
>                         const char *key = refspec->dst ? refspec->dst : refspec->src;
>                         const char *value = refspec->src;
>
> -                       if (match_name_with_pattern(key, needle, value, &expn_name))
> +                       if (match_refspec_name_with_pattern(key, needle, value, &expn_name))
>                                 string_list_append_nodup(&reversed, expn_name);
>                 } else if (refspec->matching) {
>                         /* For the special matching refspec, any query should match */
> @@ -1014,7 +1022,7 @@ static void query_refspecs_multiple(struct refspec *rs,
>                 if (!refspec->dst || refspec->negative)
>                         continue;
>                 if (refspec->pattern) {
> -                       if (match_name_with_pattern(key, needle, value, result))
> +                       if (match_refspec_name_with_pattern(key, needle, value, result))
>                                 string_list_append_nodup(results, *result);
>                 } else if (!strcmp(needle, key)) {
>                         string_list_append(results, value);
> @@ -1043,7 +1051,7 @@ int query_refspecs(struct refspec *rs, struct refspec_item *query)
>                 if (!refspec->dst || refspec->negative)
>                         continue;
>                 if (refspec->pattern) {
> -                       if (match_name_with_pattern(key, needle, value, result)) {
> +                       if (match_refspec_name_with_pattern(key, needle, value, result)) {
>                                 query->force = refspec->force;
>                                 return 0;
>                         }
> @@ -1456,9 +1464,9 @@ static char *get_ref_match(const struct refspec *rs, const struct ref *ref,
>                         const char *dst_side = item->dst ? item->dst : item->src;
>                         int match;
>                         if (direction == FROM_SRC)
> -                               match = match_name_with_pattern(item->src, ref->name, dst_side, &name);
> +                               match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name);
>                         else
> -                               match = match_name_with_pattern(dst_side, ref->name, item->src, &name);
> +                               match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name);
>                         if (match) {
>                                 matching_refs = i;
>                                 break;
> @@ -2076,7 +2084,7 @@ static struct ref *get_expanded_map(const struct ref *remote_refs,
>
>                 if (strchr(ref->name, '^'))
>                         continue; /* a dereference item */
> -               if (match_name_with_pattern(refspec->src, ref->name,
> +               if (match_refspec_name_with_pattern(refspec->src, ref->name,
>                                             refspec->dst, &expn_name) &&
>                     !ignore_symref_update(expn_name, &scratch)) {
>                         struct ref *cpy = copy_ref(ref);
> diff --git a/remote.h b/remote.h
> index b901b56746d..9ffef206f23 100644
> --- a/remote.h
> +++ b/remote.h
> @@ -5,6 +5,7 @@
>  #include "hashmap.h"
>  #include "refspec.h"
>  #include "strvec.h"
> +#include "string-list.h"
>
>  struct option;
>  struct transport_ls_refs_options;
> @@ -77,6 +78,8 @@ struct remote {
>
>         struct refspec fetch;
>
> +       struct string_list prefetch_refs;
> +
>         /*
>          * The setting for whether to fetch tags (as a separate rule from the
>          * configured refspecs);
> @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
>
>  int check_ref_type(const struct ref *ref, int flags);
>
> +int match_refspec_name_with_pattern(const char *key, const char *name,
> +                                       const char *value, char **result);
> +
>  /*
>   * Free a single ref and its peer, or an entire list of refs and their peers,
>   * respectively.
> diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
> index abae7a97546..fc1b5d14e75 100755
> --- a/t/t7900-maintenance.sh
> +++ b/t/t7900-maintenance.sh
> @@ -245,6 +245,91 @@ test_expect_success 'prefetch multiple remotes' '
>         test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt
>  '
>
> +test_expect_success 'prefetch with positive prefetch ref patterns' '
> +       test_create_repo filter-prefetch-positive &&
> +       (
> +               cd filter-prefetch-positive &&
> +               test_commit initial &&
> +               git clone . clone2 &&
> +               git remote add remote2 "file://$(pwd)/clone2" &&
> +
> +               cd clone2 &&
> +               git checkout -b feature && test_commit feature-commit-2 &&
> +               git checkout -b wip/test && test_commit wip-test-commit-2 &&
> +               git checkout -b topic/x && test_commit topic-x-commit-2 &&
> +               git push -f origin feature wip/test topic/x &&
> +               cd .. &&
> +
> +               git config remote.remote2.prefetchref "refs/heads/feature" &&
> +               fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
> +                       --recurse-submodules=no --quiet" &&
> +               GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \
> +                       git maintenance run --task=prefetch 2>/dev/null &&
> +               test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt &&
> +
> +               git rev-parse refs/prefetch/remotes/remote2/feature &&
> +               test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test &&
> +               test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x
> +       )
> +'
> +
> +test_expect_success 'prefetch with negative prefetch ref patterns' '
> +       test_create_repo filter-prefetch-negative &&
> +       (
> +               cd filter-prefetch-negative &&
> +               test_commit initial &&
> +               git clone . clone3 &&
> +               git remote add remote3 "file://$(pwd)/clone3" &&
> +               cat .git/config &&
> +
> +               cd clone3 &&
> +               git checkout -b feature && test_commit feature-commit-3 &&
> +               git checkout -b wip/test && test_commit wip-test-commit-3 &&
> +               git checkout -b topic/x && test_commit topic-x-commit-3 &&
> +               git push -f origin feature wip/test topic/x &&
> +               cd .. &&
> +
> +               git config remote.remote3.prefetchref "!refs/heads/wip/*" &&
> +               fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
> +                       --recurse-submodules=no --quiet" &&
> +               GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \
> +                       git maintenance run --task=prefetch 2>/dev/null &&
> +               test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt &&
> +               git rev-parse refs/prefetch/remotes/remote3/feature &&
> +               git rev-parse refs/prefetch/remotes/remote3/topic/x &&
> +               test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test
> +       )
> +'
> +
> +test_expect_success 'prefetch with positive & negative prefetch ref patterns' '
> +       test_create_repo filter-prefetch-mixed &&
> +       (
> +               cd filter-prefetch-mixed &&
> +               test_commit initial &&
> +               git clone . clone4 &&
> +               git remote add remote4 "file://$(pwd)/clone4" &&
> +
> +               cd clone4 &&
> +               git checkout -b feature && test_commit feature-commit-4 &&
> +               git checkout -b topic/x && test_commit topic-x-commit-4 &&
> +               git checkout -b topic/y && test_commit topic-y-commit-4 &&
> +               git push -f origin feature topic/x topic/y &&
> +               cd .. &&
> +
> +               git config remote.remote4.prefetchref \
> +                       "refs/heads/topic/* !refs/heads/topic/y" &&
> +               fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
> +                       --recurse-submodules=no --quiet" &&
> +               GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \
> +                       git maintenance run --task=prefetch 2>/dev/null &&
> +               test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt &&
> +
> +               test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature &&
> +               test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y &&
> +               git rev-parse refs/prefetch/remotes/remote4/topic/x
> +       )
> +'
> +
>  test_expect_success 'loose-objects task' '
>         # Repack everything so we know the state of the object dir
>         git repack -adk &&
>
> base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c
> --
> gitgitgadget

A friendly bump to revive this thread to see if the code looks good to
you Junio.
Patrick Steinhardt Nov. 5, 2024, 6:45 a.m. UTC | #2
On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:

I'm coming rather late to the party and simply want to review this so
that the thread gets revived. So my context may be lacking, please
forgive me if I am reopening things that were already discussed.

> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> index 8efc53e836d..186f439ed7b 100644
> --- a/Documentation/config/remote.txt
> +++ b/Documentation/config/remote.txt
> @@ -33,6 +33,13 @@ remote.<name>.fetch::
>  	The default set of "refspec" for linkgit:git-fetch[1]. See
>  	linkgit:git-fetch[1].
>  
> +remote.<name>.prefetchref::
> +	Specify the refs to be prefetched when fetching from this
> +	remote. The value is a space-separated list of ref patterns
> +	(e.g., "refs/heads/main !refs/heads/develop*"). This can be
> +	used to optimize fetch operations by specifying exactly which
> +	refs should be prefetched.

I'm a bit surprised that we use a space-separated list here instead of
accepting a multi-valued config like we do for "remote.<name>.fetch".
Shouldn't we use the format here to make things more consistent?

>  remote.<name>.push::
>  	The default set of "refspec" for linkgit:git-push[1]. See
>  	linkgit:git-push[1].
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index b2b5aee5bf2..74603cfabe0 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -485,6 +485,32 @@ static void filter_prefetch_refspec(struct refspec *rs)
>  	}
>  }
>  
> +static int pattern_matches_ref(const char *pattern, const char *refname)
> +{
> +	if (strchr(pattern, '*'))
> +		return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
> +	return strcmp(pattern, refname) == 0;
> +}
> +
> +static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
> +{
> +	int has_positive = 0, matched_positive = 0, matched_negative = 0;
> +
> +	for (int i = 0; i < prefetch_refs->nr; i++) {
> +		const char *pattern = prefetch_refs->items[i].string;
> +		int is_negative = (*pattern == '!');
> +		if (is_negative) pattern++;
> +		else has_positive = 1;
> +
> +		if (pattern_matches_ref(pattern, refname)) {
> +			if (is_negative) matched_negative = 1;
> +			else matched_positive = 1;
> +		}
> +	}
> +
> +	return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
> +}

This is essentially open-coding a bunch of logic around how we parse
refspecs. I'd propose to instead use the APIs we already have in this
area, namely those in "refspec.h".

>  static struct ref *get_ref_map(struct remote *remote,
>  			       const struct ref *remote_refs,
>  			       struct refspec *rs,
> @@ -501,7 +527,11 @@ static struct ref *get_ref_map(struct remote *remote,
>  	struct hashmap existing_refs;
>  	int existing_refs_populated = 0;
>  
> +	struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
> +	struct ref *next;
> +

We don't typically have empty lines between variable declarations.

>  	filter_prefetch_refspec(rs);
> +
>  	if (remote)
>  		filter_prefetch_refspec(&remote->fetch);
>  
> @@ -610,6 +640,29 @@ static struct ref *get_ref_map(struct remote *remote,
>  	else
>  		ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
>  
> +	/**
> +	 * Filter out advertised refs that we don't want to fetch during
> +	 * prefetch if a prefetchref config is set
> +	 */

Our comments typically start with `/*`, not `/**`.

> +	if (prefetch && remote->prefetch_refs.nr) {
> +		prefetch_filtered_ref_map = NULL;
> +		ref_map_tail = &prefetch_filtered_ref_map;

As far as I can see, both of these variables have already been
initialized beforehand.

> +		for (rm = ref_map; rm; rm = next) {
> +			next = rm->next;
> +			rm->next = NULL;
> +
> +			if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
> +				*ref_map_tail = rm;
> +				ref_map_tail = &rm->next;
> +			} else {
> +				free_one_ref(rm);
> +			}
> +		}
> +		ref_map = prefetch_filtered_ref_map;
> +	}
> +
>  	ref_map = ref_remove_duplicates(ref_map);
>  
>  	for (rm = ref_map; rm; rm = rm->next) {
> diff --git a/remote.c b/remote.c
> index 8f3dee13186..6752c73370f 100644
> --- a/remote.c
> +++ b/remote.c
> @@ -141,6 +141,7 @@ static struct remote *make_remote(struct remote_state *remote_state,
>  	ret->prune = -1;  /* unspecified */
>  	ret->prune_tags = -1;  /* unspecified */
>  	ret->name = xstrndup(name, len);
> +	string_list_init_dup(&ret->prefetch_refs);
>  	refspec_init(&ret->push, REFSPEC_PUSH);
>  	refspec_init(&ret->fetch, REFSPEC_FETCH);
>  
> @@ -166,6 +167,7 @@ static void remote_clear(struct remote *remote)
>  	free((char *)remote->uploadpack);
>  	FREE_AND_NULL(remote->http_proxy);
>  	FREE_AND_NULL(remote->http_proxy_authmethod);
> +	string_list_clear(&remote->prefetch_refs, 0);
>  }
>  
>  static void add_merge(struct branch *branch, const char *name)
> @@ -456,6 +458,12 @@ static int handle_config(const char *key, const char *value,
>  		remote->prune = git_config_bool(key, value);
>  	else if (!strcmp(subkey, "prunetags"))
>  		remote->prune_tags = git_config_bool(key, value);
> +	else if (!strcmp(subkey, "prefetchref")) {
> +		if (!value)
> +			return config_error_nonbool(key);
> +		string_list_split(&remote->prefetch_refs, value, ' ', -1);
> +		return 0;
> +	}
>  	else if (!strcmp(subkey, "url")) {
>  		if (!value)
>  			return config_error_nonbool(key);
> @@ -868,7 +876,7 @@ struct strvec *push_url_of_remote(struct remote *remote)
>  	return remote->pushurl.nr ? &remote->pushurl : &remote->url;
>  }
>  
> -static int match_name_with_pattern(const char *key, const char *name,
> +int match_refspec_name_with_pattern(const char *key, const char *name,
>  				   const char *value, char **result)
>  {
>  	const char *kstar = strchr(key, '*');

Is this rename really necessary? It is not mentioned in the commit
message, so this is surprising to me. If it really was necessary it
should be split out into a separate commit that also explains why you
think that this is a good idea.

> diff --git a/remote.h b/remote.h
> index b901b56746d..9ffef206f23 100644
> --- a/remote.h
> +++ b/remote.h
> @@ -5,6 +5,7 @@
>  #include "hashmap.h"
>  #include "refspec.h"
>  #include "strvec.h"
> +#include "string-list.h"
>  
>  struct option;
>  struct transport_ls_refs_options;
> @@ -77,6 +78,8 @@ struct remote {
>  
>  	struct refspec fetch;
>  
> +	struct string_list prefetch_refs;
> +
>  	/*
>  	 * The setting for whether to fetch tags (as a separate rule from the
>  	 * configured refspecs);
> @@ -207,6 +210,9 @@ int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
>  
>  int check_ref_type(const struct ref *ref, int flags);
>  
> +int match_refspec_name_with_pattern(const char *key, const char *name,
> +					const char *value, char **result);

I think instead of exposing this function we should rather expose
`refspec_match()`, which is at a higher level and knows to handle the
cases for us where the refspec is a pattern and when it's not.

Patrick
Phillip Wood Nov. 5, 2024, 2:45 p.m. UTC | #3
Hi Shubham

On 04/10/2024 21:21, Shubham Kanodia via GitGitGadget wrote:
> From: Shubham Kanodia <shubham.kanodia10@gmail.com>
> 

I agree with the Patrick's comments on the implementation, I've left a 
couple of test comments below.

> +test_expect_success 'prefetch with positive prefetch ref patterns' '
> +	test_create_repo filter-prefetch-positive &&
> +	(
> +		cd filter-prefetch-positive &&
> +		test_commit initial &&
> +		git clone . clone2 &&
> +		git remote add remote2 "file://$(pwd)/clone2" &&
> +
> +		cd clone2 &&
> +		git checkout -b feature && test_commit feature-commit-2 &&
> +		git checkout -b wip/test && test_commit wip-test-commit-2 &&
> +		git checkout -b topic/x && test_commit topic-x-commit-2 &&
> +		git push -f origin feature wip/test topic/x &&
> +		cd .. &&

I think it would make sense to have a blank line before this rather than 
after it so "cd" is grouped with the commands executed in that directory.

> +		git config remote.remote2.prefetchref "refs/heads/feature" &&
> +		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
> +			--recurse-submodules=no --quiet" &&
> +		GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \
> +			git maintenance run --task=prefetch 2>/dev/null &&
> +		test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt &&

This seems to be testing what "git maintenance" runs which is not really 
related to testing the prefetch ref pattern matching. I think just

	git maintenance run --task=prefetch &&

would be sufficient. We certainly should not be redirecting stderr to 
/dev/null as that hides any error messages that are helpful when 
debugging test failures.

> +		git rev-parse refs/prefetch/remotes/remote2/feature &&
> +		test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test &&
> +		test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x

these are the important tests for checking the prefetch pattern 
matching. We should perhaps be using "git rev-parse --verify"

The test coverage looks good

Best Wishes

Phillip

> +	)
> +'
> +
> +test_expect_success 'prefetch with negative prefetch ref patterns' '
> +	test_create_repo filter-prefetch-negative &&
> +	(
> +		cd filter-prefetch-negative &&
> +		test_commit initial &&
> +		git clone . clone3 &&
> +		git remote add remote3 "file://$(pwd)/clone3" &&
> +		cat .git/config &&
> +
> +		cd clone3 &&
> +		git checkout -b feature && test_commit feature-commit-3 &&
> +		git checkout -b wip/test && test_commit wip-test-commit-3 &&
> +		git checkout -b topic/x && test_commit topic-x-commit-3 &&
> +		git push -f origin feature wip/test topic/x &&
> +		cd .. &&
> +
> +		git config remote.remote3.prefetchref "!refs/heads/wip/*" &&
> +		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
> +			--recurse-submodules=no --quiet" &&
> +		GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \
> +			git maintenance run --task=prefetch 2>/dev/null &&
> +		test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt &&
> +		git rev-parse refs/prefetch/remotes/remote3/feature &&
> +		git rev-parse refs/prefetch/remotes/remote3/topic/x &&
> +		test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test
> +	)
> +'
> +
> +test_expect_success 'prefetch with positive & negative prefetch ref patterns' '
> +	test_create_repo filter-prefetch-mixed &&
> +	(
> +		cd filter-prefetch-mixed &&
> +		test_commit initial &&
> +		git clone . clone4 &&
> +		git remote add remote4 "file://$(pwd)/clone4" &&
> +
> +		cd clone4 &&
> +		git checkout -b feature && test_commit feature-commit-4 &&
> +		git checkout -b topic/x && test_commit topic-x-commit-4 &&
> +		git checkout -b topic/y && test_commit topic-y-commit-4 &&
> +		git push -f origin feature topic/x topic/y &&
> +		cd .. &&
> +
> +		git config remote.remote4.prefetchref \
> +			"refs/heads/topic/* !refs/heads/topic/y" &&
> +		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
> +			--recurse-submodules=no --quiet" &&
> +		GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \
> +			git maintenance run --task=prefetch 2>/dev/null &&
> +		test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt &&
> +
> +		test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature &&
> +		test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y &&
> +		git rev-parse refs/prefetch/remotes/remote4/topic/x
> +	)
> +'
> +
>   test_expect_success 'loose-objects task' '
>   	# Repack everything so we know the state of the object dir
>   	git repack -adk &&
> 
> base-commit: 2e7b89e038c0c888acf61f1b4ee5a43d4dd5e94c
Phillip Wood Nov. 5, 2024, 2:47 p.m. UTC | #4
On 05/11/2024 06:45, Patrick Steinhardt wrote:
> On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:
> 
>> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
>> index 8efc53e836d..186f439ed7b 100644
>> --- a/Documentation/config/remote.txt
>> +++ b/Documentation/config/remote.txt
>> @@ -33,6 +33,13 @@ remote.<name>.fetch::
>>   	The default set of "refspec" for linkgit:git-fetch[1]. See
>>   	linkgit:git-fetch[1].
>>   
>> +remote.<name>.prefetchref::
>> +	Specify the refs to be prefetched when fetching from this
>> +	remote. The value is a space-separated list of ref patterns
>> +	(e.g., "refs/heads/main !refs/heads/develop*"). This can be
>> +	used to optimize fetch operations by specifying exactly which
>> +	refs should be prefetched.
> 
> I'm a bit surprised that we use a space-separated list here instead of
> accepting a multi-valued config like we do for "remote.<name>.fetch".
> Shouldn't we use the format here to make things more consistent?

I agree that would be a good idea. I also think that it would be helpful 
to expand the documentation to explain exactly how the patterns are 
matched. I think "remote.<name>.prefetch" would better match the 
existing "remote.<name>.fetch" and "remote.<push>.name" config key names.

Best Wishes

Phillip
Shubham Kanodia Nov. 5, 2024, 4:26 p.m. UTC | #5
On Tue, Nov 5, 2024 at 8:17 PM Phillip Wood <phillip.wood123@gmail.com> wrote:
>
> On 05/11/2024 06:45, Patrick Steinhardt wrote:
> > On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:
> >
> >> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> >> index 8efc53e836d..186f439ed7b 100644
> >> --- a/Documentation/config/remote.txt
> >> +++ b/Documentation/config/remote.txt
> >> @@ -33,6 +33,13 @@ remote.<name>.fetch::
> >>      The default set of "refspec" for linkgit:git-fetch[1]. See
> >>      linkgit:git-fetch[1].
> >>
> >> +remote.<name>.prefetchref::
> >> +    Specify the refs to be prefetched when fetching from this
> >> +    remote. The value is a space-separated list of ref patterns
> >> +    (e.g., "refs/heads/main !refs/heads/develop*"). This can be
> >> +    used to optimize fetch operations by specifying exactly which
> >> +    refs should be prefetched.
> >
> > I'm a bit surprised that we use a space-separated list here instead of
> > accepting a multi-valued config like we do for "remote.<name>.fetch".
> > Shouldn't we use the format here to make things more consistent?
>
> I agree that would be a good idea. I also think that it would be helpful
> to expand the documentation to explain exactly how the patterns are
> matched. I think "remote.<name>.prefetch" would better match the
> existing "remote.<name>.fetch" and "remote.<push>.name" config key names.
>
> Best Wishes
>
> Phillip
>

Thanks for taking the time to look at this!

Let me add context here based on a few things that have already been discussed
as a part of this thread or an earlier discussion on
https://lore.kernel.org/git/CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com/

> I'm coming rather late to the party and simply want to review this so
> that the thread gets revived. So my context may be lacking, please
> forgive me if I am reopening things that were already discussed.

I don't have a particular preference here, and this was discussed in
an earlier thread
where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/—

> I agree that it is the right place to configure this as attributes
> to remotes.  It would make it handy if we could give a catch-all
> configuration, though.  For example:
>
> [remote "origin"]
>       prefetch = true
>        prefetchref = refs/heads/* refs/tags/*
> [remote "*"]
>        prefetch = false
>
> may toggle prefetch off for all remotes, except that the tags and
> the local branches of the remote "origin" are prefetched.  Instead
> of a multi-value configuration variable (like remote.*.fetch) where
> we need to worry about clearing convention, we can use a regular
> "last one wins" variable that is whitespace separated patterns, as
> such a pattern can never have a whitespace in it.

which is what my implementation is based on.

> This is essentially open-coding a bunch of logic around how we parse
> refspecs. I'd propose to instead use the APIs we already have in this
> area, namely those in "refspec.h".

Is there any that you would suggest? I don't see anything in `refspec.h` that
might cover a similar logic. I'd previously used `wildmatch.h`, but
that is really
only suitable for glob-style wildcards.

> Is this rename really necessary? It is not mentioned in the commit
> message, so this is surprising to me

What was previously `match_name_with_pattern` is now exposed out
from `remote.c` to be used outside. I thought given that its now applicable
outside `remote.c` it might be clearer to rename it to be more explicit
(match name of what? remote? refspec? etc)

However I agree that the higher level `refspec_match()` is a better
function to expose
anyway, and am happy to make that change.
Junio C Hamano Nov. 6, 2024, 12:39 a.m. UTC | #6
Shubham Kanodia <shubham.kanodia10@gmail.com> writes:

> I don't have a particular preference here, and this was discussed in
> an earlier thread
> where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/—
>
>> I agree that it is the right place to configure this as attributes
>> to remotes.  It would make it handy if we could give a catch-all
>> configuration, though.  For example:
>>
>> [remote "origin"]
>>       prefetch = true
>>        prefetchref = refs/heads/* refs/tags/*
>> [remote "*"]
>>        prefetch = false
>>
>> may toggle prefetch off for all remotes, except that the tags and
>> the local branches of the remote "origin" are prefetched.  Instead
>> of a multi-value configuration variable (like remote.*.fetch) where
>> we need to worry about clearing convention, we can use a regular
>> "last one wins" variable that is whitespace separated patterns, as
>> such a pattern can never have a whitespace in it.
> which is what my implementation is based on.

I am fine with space separated list or multi-valued variable.  The
only difference is that with multi-valued list, we'd need to worry
about ensuring that we have a way to "clear" the values we have seen
so far.  It has plenty of precedence and is not a rocket science.
The above, if I recall correctly, was solely about the need for
"catch-all default" (aka "*" remote) and not about multi-value vs
space separated last-one-wins value at all.  IOW, the above could
have been

    [remote "origin"]
	prefetch = true
	prefetchref = refs/heads/*
	prefetchref = refs/tags/*
    [remote "*"]
	prefetch = false

and conveyed exactly what I wanted to say in the message you quoted.

In any case, I somehow thought that we discarded the arrangement
with "*" wildcard as unworkable.  If I remember the discussion
before I left correctly, didn't it turn out to be troublesome to
have [remote "*"] section because existing code would need to
enumerate configured remotes, and we do not want to see "*" listed?
If we found a workable solution to that while I was away, that would
be great, but I haven't looked at what this latest round of the
series does to solve it (yet).  Perhaps teaching "git remote" and
"git fetch --all" to skip "*" while enumerating remotes was
sufficient?  I dunno.

Thanks.
Patrick Steinhardt Nov. 6, 2024, 6:46 a.m. UTC | #7
On Tue, Nov 05, 2024 at 09:56:52PM +0530, Shubham Kanodia wrote:
> On Tue, Nov 5, 2024 at 8:17 PM Phillip Wood <phillip.wood123@gmail.com> wrote:
> > On 05/11/2024 06:45, Patrick Steinhardt wrote:
> > > On Fri, Oct 04, 2024 at 08:21:32PM +0000, Shubham Kanodia via GitGitGadget wrote:
> > >> diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
> > >> index 8efc53e836d..186f439ed7b 100644
> > >> --- a/Documentation/config/remote.txt
> > >> +++ b/Documentation/config/remote.txt
> > >> @@ -33,6 +33,13 @@ remote.<name>.fetch::
> > >>      The default set of "refspec" for linkgit:git-fetch[1]. See
> > >>      linkgit:git-fetch[1].
> > >>
> > >> +remote.<name>.prefetchref::
> > >> +    Specify the refs to be prefetched when fetching from this
> > >> +    remote. The value is a space-separated list of ref patterns
> > >> +    (e.g., "refs/heads/main !refs/heads/develop*"). This can be
> > >> +    used to optimize fetch operations by specifying exactly which
> > >> +    refs should be prefetched.
> > >
> > > I'm a bit surprised that we use a space-separated list here instead of
> > > accepting a multi-valued config like we do for "remote.<name>.fetch".
> > > Shouldn't we use the format here to make things more consistent?
> >
> > I agree that would be a good idea. I also think that it would be helpful
> > to expand the documentation to explain exactly how the patterns are
> > matched. I think "remote.<name>.prefetch" would better match the
> > existing "remote.<name>.fetch" and "remote.<push>.name" config key names.
> >
> > Best Wishes
> >
> > Phillip
> >
> 
> Thanks for taking the time to look at this!
> 
> Let me add context here based on a few things that have already been discussed
> as a part of this thread or an earlier discussion on
> https://lore.kernel.org/git/CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com/
> 
> > I'm coming rather late to the party and simply want to review this so
> > that the thread gets revived. So my context may be lacking, please
> > forgive me if I am reopening things that were already discussed.
> 
> I don't have a particular preference here, and this was discussed in
> an earlier thread
> where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/—

Fair enough, thanks for the pointer! The reason stated by Junio is that
having a space-separated list of refspecs makes it easier to reset the
refspec again, as we can simply use a "last-one-wins" pattern. And while
I understand that, I would claim that we already have a properly
established way to reset multi-valued lists by first assigning an empty
value:

    [remote "origin"]
        prefetchref = refs/heads/*
        prefetchref = refs/tags/*
        prefetchref =
        prefetchref = refs/heads/*

The final value would be only "refs/heads/*" due to the reset.

So overall I'm still leaning into the direction of making this a
multi-valued list for the sake of consistency with other refspec
configs. @Junio Let me know in case you disagree though.

> > I agree that it is the right place to configure this as attributes
> > to remotes.  It would make it handy if we could give a catch-all
> > configuration, though.  For example:
> >
> > [remote "origin"]
> >       prefetch = true
> >        prefetchref = refs/heads/* refs/tags/*
> > [remote "*"]
> >        prefetch = false
> >
> > may toggle prefetch off for all remotes, except that the tags and
> > the local branches of the remote "origin" are prefetched.  Instead
> > of a multi-value configuration variable (like remote.*.fetch) where
> > we need to worry about clearing convention, we can use a regular
> > "last one wins" variable that is whitespace separated patterns, as
> > such a pattern can never have a whitespace in it.
> 
> which is what my implementation is based on.
> 
> > This is essentially open-coding a bunch of logic around how we parse
> > refspecs. I'd propose to instead use the APIs we already have in this
> > area, namely those in "refspec.h".
> 
> Is there any that you would suggest? I don't see anything in `refspec.h` that
> might cover a similar logic. I'd previously used `wildmatch.h`, but
> that is really
> only suitable for glob-style wildcards.

The logic around refspecs is somewhat split up between different headers
indeed. "refspec.h" really only concerns parsing of refspecs, but does
not provide the interfaces to also use them to translate or match refs.

I think the closest to what you need is `omit_name_by_refspec()`: you
give it a refspec and a refname, and it returns whether any of the
refspec items matches that ref. This also takes into oaccount whether
the refspec item is negated ("!" prefix") or whether it is a pattern
(contains "*").

Ideally, we'd move interfaces that provide basic refspec functionality
into "refspec.h" so that they are easier to discover. But I'm fine with
making this a #leftoverbit, I don't want to push the burden on you.

Patrick
Patrick Steinhardt Nov. 6, 2024, 6:52 a.m. UTC | #8
On Tue, Nov 05, 2024 at 04:39:06PM -0800, Junio C Hamano wrote:
> Shubham Kanodia <shubham.kanodia10@gmail.com> writes:
> 
> > I don't have a particular preference here, and this was discussed in
> > an earlier thread
> > where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/—
> >
> >> I agree that it is the right place to configure this as attributes
> >> to remotes.  It would make it handy if we could give a catch-all
> >> configuration, though.  For example:
> >>
> >> [remote "origin"]
> >>       prefetch = true
> >>        prefetchref = refs/heads/* refs/tags/*
> >> [remote "*"]
> >>        prefetch = false
> >>
> >> may toggle prefetch off for all remotes, except that the tags and
> >> the local branches of the remote "origin" are prefetched.  Instead
> >> of a multi-value configuration variable (like remote.*.fetch) where
> >> we need to worry about clearing convention, we can use a regular
> >> "last one wins" variable that is whitespace separated patterns, as
> >> such a pattern can never have a whitespace in it.
> > which is what my implementation is based on.
> 
> I am fine with space separated list or multi-valued variable.  The
> only difference is that with multi-valued list, we'd need to worry
> about ensuring that we have a way to "clear" the values we have seen
> so far.  It has plenty of precedence and is not a rocket science.
> The above, if I recall correctly, was solely about the need for
> "catch-all default" (aka "*" remote) and not about multi-value vs
> space separated last-one-wins value at all.  IOW, the above could
> have been
> 
>     [remote "origin"]
> 	prefetch = true
> 	prefetchref = refs/heads/*
> 	prefetchref = refs/tags/*
>     [remote "*"]
> 	prefetch = false
> 
> and conveyed exactly what I wanted to say in the message you quoted.

Ah, I missed your mail here. I replied to this bit in a parallel email.

> In any case, I somehow thought that we discarded the arrangement
> with "*" wildcard as unworkable.  If I remember the discussion
> before I left correctly, didn't it turn out to be troublesome to
> have [remote "*"] section because existing code would need to
> enumerate configured remotes, and we do not want to see "*" listed?

I wouldn't say unworkable, but it certainly isn't as easy as just adding
the new syntax.

> If we found a workable solution to that while I was away, that would
> be great, but I haven't looked at what this latest round of the
> series does to solve it (yet).  Perhaps teaching "git remote" and
> "git fetch --all" to skip "*" while enumerating remotes was
> sufficient?  I dunno.

So yes, we'd have to teach Git to ignore "*" remotes in many places. I
would hope that it isn't all that involved and that we only need to
adjust a couple of places to ignore "*". But the remote logic is
somewhat outside of my area of expertise, so my hope might be misplaced.

If so, we might think about using a different syntax to achieve the same
thing.

Patrick
Junio C Hamano Nov. 6, 2024, 8:20 a.m. UTC | #9
Patrick Steinhardt <ps@pks.im> writes:

> So yes, we'd have to teach Git to ignore "*" remotes in many places. I
> would hope that it isn't all that involved and that we only need to
> adjust a couple of places to ignore "*". But the remote logic is
> somewhat outside of my area of expertise, so my hope might be misplaced.

I am hoping that it would not be too painful to exclude '*' from
enumeration, and also, "git fetch '*'" and other things that expect
the name of a remote do not say silly things like "ah, there is
"remote.*.<something>" configuration defined, so '*' must be a name
of a remote" ;-)

It might take certain refactoring, but if it results in a cleaner
codebase, that is also a welcome outcome.

Thanks.
Phillip Wood Nov. 6, 2024, 11:04 a.m. UTC | #10
On 06/11/2024 06:46, Patrick Steinhardt wrote:
> On Tue, Nov 05, 2024 at 09:56:52PM +0530, Shubham Kanodia wrote:
>>
>> Let me add context here based on a few things that have already been discussed
>> as a part of this thread or an earlier discussion on
>> https://lore.kernel.org/git/CAG=Um+1wTbXn_RN+LOCrpZpSNR_QF582PszxNyhz5anVHtBp+w@mail.gmail.com/
>>
>>> I'm coming rather late to the party and simply want to review this so
>>> that the thread gets revived. So my context may be lacking, please
>>> forgive me if I am reopening things that were already discussed.
>>
>> I don't have a particular preference here, and this was discussed in
>> an earlier thread
>> where Junio opined (https://lore.kernel.org/git/xmqq5xrcn2k1.fsf@gitster.g/—
> 
> Fair enough, thanks for the pointer! The reason stated by Junio is that
> having a space-separated list of refspecs makes it easier to reset the
> refspec again, as we can simply use a "last-one-wins" pattern. And while
> I understand that, I would claim that we already have a properly
> established way to reset multi-valued lists by first assigning an empty
> value:
> 
>      [remote "origin"]
>          prefetchref = refs/heads/*
>          prefetchref = refs/tags/*
>          prefetchref =
>          prefetchref = refs/heads/*
> 
> The final value would be only "refs/heads/*" due to the reset.
> 
> So overall I'm still leaning into the direction of making this a
> multi-valued list for the sake of consistency with other refspec
> configs. @Junio Let me know in case you disagree though.

It is also easier to manipulate the list with "git config" when it is 
multivalued as one can add values and "git config --add" and remove 
specific entries with "git config --unset <key> <pattern>". With a 
single entry to append a new value one has to resort to something like

	git config <key> "$(echo $(git config <key>) <new-value>)"

which is not very user friendly. Deleting a value is even less friendly.

Best Wishes

Phillip
diff mbox series

Patch

diff --git a/Documentation/config/remote.txt b/Documentation/config/remote.txt
index 8efc53e836d..186f439ed7b 100644
--- a/Documentation/config/remote.txt
+++ b/Documentation/config/remote.txt
@@ -33,6 +33,13 @@  remote.<name>.fetch::
 	The default set of "refspec" for linkgit:git-fetch[1]. See
 	linkgit:git-fetch[1].
 
+remote.<name>.prefetchref::
+	Specify the refs to be prefetched when fetching from this
+	remote. The value is a space-separated list of ref patterns
+	(e.g., "refs/heads/main !refs/heads/develop*"). This can be
+	used to optimize fetch operations by specifying exactly which
+	refs should be prefetched.
+
 remote.<name>.push::
 	The default set of "refspec" for linkgit:git-push[1]. See
 	linkgit:git-push[1].
diff --git a/builtin/fetch.c b/builtin/fetch.c
index b2b5aee5bf2..74603cfabe0 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -485,6 +485,32 @@  static void filter_prefetch_refspec(struct refspec *rs)
 	}
 }
 
+static int pattern_matches_ref(const char *pattern, const char *refname)
+{
+	if (strchr(pattern, '*'))
+		return match_refspec_name_with_pattern(pattern, refname, NULL, NULL) != 0;
+	return strcmp(pattern, refname) == 0;
+}
+
+static int matches_prefetch_refs(const char *refname, const struct string_list *prefetch_refs)
+{
+	int has_positive = 0, matched_positive = 0, matched_negative = 0;
+
+	for (int i = 0; i < prefetch_refs->nr; i++) {
+		const char *pattern = prefetch_refs->items[i].string;
+		int is_negative = (*pattern == '!');
+		if (is_negative) pattern++;
+		else has_positive = 1;
+
+		if (pattern_matches_ref(pattern, refname)) {
+			if (is_negative) matched_negative = 1;
+			else matched_positive = 1;
+		}
+	}
+
+	return has_positive ? (matched_positive && !matched_negative) : !matched_negative;
+}
+
 static struct ref *get_ref_map(struct remote *remote,
 			       const struct ref *remote_refs,
 			       struct refspec *rs,
@@ -501,7 +527,11 @@  static struct ref *get_ref_map(struct remote *remote,
 	struct hashmap existing_refs;
 	int existing_refs_populated = 0;
 
+	struct ref *prefetch_filtered_ref_map = NULL, **ref_map_tail = &prefetch_filtered_ref_map;
+	struct ref *next;
+
 	filter_prefetch_refspec(rs);
+
 	if (remote)
 		filter_prefetch_refspec(&remote->fetch);
 
@@ -610,6 +640,29 @@  static struct ref *get_ref_map(struct remote *remote,
 	else
 		ref_map = apply_negative_refspecs(ref_map, &remote->fetch);
 
+	/**
+	 * Filter out advertised refs that we don't want to fetch during
+	 * prefetch if a prefetchref config is set
+	 */
+
+	if (prefetch && remote->prefetch_refs.nr) {
+		prefetch_filtered_ref_map = NULL;
+		ref_map_tail = &prefetch_filtered_ref_map;
+
+		for (rm = ref_map; rm; rm = next) {
+			next = rm->next;
+			rm->next = NULL;
+
+			if (matches_prefetch_refs(rm->name, &remote->prefetch_refs)) {
+				*ref_map_tail = rm;
+				ref_map_tail = &rm->next;
+			} else {
+				free_one_ref(rm);
+			}
+		}
+		ref_map = prefetch_filtered_ref_map;
+	}
+
 	ref_map = ref_remove_duplicates(ref_map);
 
 	for (rm = ref_map; rm; rm = rm->next) {
diff --git a/remote.c b/remote.c
index 8f3dee13186..6752c73370f 100644
--- a/remote.c
+++ b/remote.c
@@ -141,6 +141,7 @@  static struct remote *make_remote(struct remote_state *remote_state,
 	ret->prune = -1;  /* unspecified */
 	ret->prune_tags = -1;  /* unspecified */
 	ret->name = xstrndup(name, len);
+	string_list_init_dup(&ret->prefetch_refs);
 	refspec_init(&ret->push, REFSPEC_PUSH);
 	refspec_init(&ret->fetch, REFSPEC_FETCH);
 
@@ -166,6 +167,7 @@  static void remote_clear(struct remote *remote)
 	free((char *)remote->uploadpack);
 	FREE_AND_NULL(remote->http_proxy);
 	FREE_AND_NULL(remote->http_proxy_authmethod);
+	string_list_clear(&remote->prefetch_refs, 0);
 }
 
 static void add_merge(struct branch *branch, const char *name)
@@ -456,6 +458,12 @@  static int handle_config(const char *key, const char *value,
 		remote->prune = git_config_bool(key, value);
 	else if (!strcmp(subkey, "prunetags"))
 		remote->prune_tags = git_config_bool(key, value);
+	else if (!strcmp(subkey, "prefetchref")) {
+		if (!value)
+			return config_error_nonbool(key);
+		string_list_split(&remote->prefetch_refs, value, ' ', -1);
+		return 0;
+	}
 	else if (!strcmp(subkey, "url")) {
 		if (!value)
 			return config_error_nonbool(key);
@@ -868,7 +876,7 @@  struct strvec *push_url_of_remote(struct remote *remote)
 	return remote->pushurl.nr ? &remote->pushurl : &remote->url;
 }
 
-static int match_name_with_pattern(const char *key, const char *name,
+int match_refspec_name_with_pattern(const char *key, const char *name,
 				   const char *value, char **result)
 {
 	const char *kstar = strchr(key, '*');
@@ -900,7 +908,7 @@  static int refspec_match(const struct refspec_item *refspec,
 			 const char *name)
 {
 	if (refspec->pattern)
-		return match_name_with_pattern(refspec->src, name, NULL, NULL);
+		return match_refspec_name_with_pattern(refspec->src, name, NULL, NULL);
 
 	return !strcmp(refspec->src, name);
 }
@@ -969,7 +977,7 @@  static int query_matches_negative_refspec(struct refspec *rs, struct refspec_ite
 			const char *key = refspec->dst ? refspec->dst : refspec->src;
 			const char *value = refspec->src;
 
-			if (match_name_with_pattern(key, needle, value, &expn_name))
+			if (match_refspec_name_with_pattern(key, needle, value, &expn_name))
 				string_list_append_nodup(&reversed, expn_name);
 		} else if (refspec->matching) {
 			/* For the special matching refspec, any query should match */
@@ -1014,7 +1022,7 @@  static void query_refspecs_multiple(struct refspec *rs,
 		if (!refspec->dst || refspec->negative)
 			continue;
 		if (refspec->pattern) {
-			if (match_name_with_pattern(key, needle, value, result))
+			if (match_refspec_name_with_pattern(key, needle, value, result))
 				string_list_append_nodup(results, *result);
 		} else if (!strcmp(needle, key)) {
 			string_list_append(results, value);
@@ -1043,7 +1051,7 @@  int query_refspecs(struct refspec *rs, struct refspec_item *query)
 		if (!refspec->dst || refspec->negative)
 			continue;
 		if (refspec->pattern) {
-			if (match_name_with_pattern(key, needle, value, result)) {
+			if (match_refspec_name_with_pattern(key, needle, value, result)) {
 				query->force = refspec->force;
 				return 0;
 			}
@@ -1456,9 +1464,9 @@  static char *get_ref_match(const struct refspec *rs, const struct ref *ref,
 			const char *dst_side = item->dst ? item->dst : item->src;
 			int match;
 			if (direction == FROM_SRC)
-				match = match_name_with_pattern(item->src, ref->name, dst_side, &name);
+				match = match_refspec_name_with_pattern(item->src, ref->name, dst_side, &name);
 			else
-				match = match_name_with_pattern(dst_side, ref->name, item->src, &name);
+				match = match_refspec_name_with_pattern(dst_side, ref->name, item->src, &name);
 			if (match) {
 				matching_refs = i;
 				break;
@@ -2076,7 +2084,7 @@  static struct ref *get_expanded_map(const struct ref *remote_refs,
 
 		if (strchr(ref->name, '^'))
 			continue; /* a dereference item */
-		if (match_name_with_pattern(refspec->src, ref->name,
+		if (match_refspec_name_with_pattern(refspec->src, ref->name,
 					    refspec->dst, &expn_name) &&
 		    !ignore_symref_update(expn_name, &scratch)) {
 			struct ref *cpy = copy_ref(ref);
diff --git a/remote.h b/remote.h
index b901b56746d..9ffef206f23 100644
--- a/remote.h
+++ b/remote.h
@@ -5,6 +5,7 @@ 
 #include "hashmap.h"
 #include "refspec.h"
 #include "strvec.h"
+#include "string-list.h"
 
 struct option;
 struct transport_ls_refs_options;
@@ -77,6 +78,8 @@  struct remote {
 
 	struct refspec fetch;
 
+	struct string_list prefetch_refs;
+
 	/*
 	 * The setting for whether to fetch tags (as a separate rule from the
 	 * configured refspecs);
@@ -207,6 +210,9 @@  int count_refspec_match(const char *, struct ref *refs, struct ref **matched_ref
 
 int check_ref_type(const struct ref *ref, int flags);
 
+int match_refspec_name_with_pattern(const char *key, const char *name,
+					const char *value, char **result);
+
 /*
  * Free a single ref and its peer, or an entire list of refs and their peers,
  * respectively.
diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
index abae7a97546..fc1b5d14e75 100755
--- a/t/t7900-maintenance.sh
+++ b/t/t7900-maintenance.sh
@@ -245,6 +245,91 @@  test_expect_success 'prefetch multiple remotes' '
 	test_subcommand git fetch remote2 $fetchargs <skip-remote1.txt
 '
 
+test_expect_success 'prefetch with positive prefetch ref patterns' '
+	test_create_repo filter-prefetch-positive &&
+	(
+		cd filter-prefetch-positive &&
+		test_commit initial &&
+		git clone . clone2 &&
+		git remote add remote2 "file://$(pwd)/clone2" &&
+
+		cd clone2 &&
+		git checkout -b feature && test_commit feature-commit-2 &&
+		git checkout -b wip/test && test_commit wip-test-commit-2 &&
+		git checkout -b topic/x && test_commit topic-x-commit-2 &&
+		git push -f origin feature wip/test topic/x &&
+		cd .. &&
+
+		git config remote.remote2.prefetchref "refs/heads/feature" &&
+		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
+			--recurse-submodules=no --quiet" &&
+		GIT_TRACE2_EVENT="$(pwd)/prefetch-positive.txt" \
+			git maintenance run --task=prefetch 2>/dev/null &&
+		test_subcommand git fetch remote2 $fetchargs <prefetch-positive.txt &&
+
+		git rev-parse refs/prefetch/remotes/remote2/feature &&
+		test_must_fail git rev-parse refs/prefetch/remotes/remote2/wip/test &&
+		test_must_fail git rev-parse refs/prefetch/remotes/remote2/topic/x
+	)
+'
+
+test_expect_success 'prefetch with negative prefetch ref patterns' '
+	test_create_repo filter-prefetch-negative &&
+	(
+		cd filter-prefetch-negative &&
+		test_commit initial &&
+		git clone . clone3 &&
+		git remote add remote3 "file://$(pwd)/clone3" &&
+		cat .git/config &&
+
+		cd clone3 &&
+		git checkout -b feature && test_commit feature-commit-3 &&
+		git checkout -b wip/test && test_commit wip-test-commit-3 &&
+		git checkout -b topic/x && test_commit topic-x-commit-3 &&
+		git push -f origin feature wip/test topic/x &&
+		cd .. &&
+
+		git config remote.remote3.prefetchref "!refs/heads/wip/*" &&
+		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
+			--recurse-submodules=no --quiet" &&
+		GIT_TRACE2_EVENT="$(pwd)/prefetch-negative.txt" \
+			git maintenance run --task=prefetch 2>/dev/null &&
+		test_subcommand git fetch remote3 $fetchargs <prefetch-negative.txt &&
+		git rev-parse refs/prefetch/remotes/remote3/feature &&
+		git rev-parse refs/prefetch/remotes/remote3/topic/x &&
+		test_must_fail git rev-parse refs/prefetch/remotes/remote3/wip/test
+	)
+'
+
+test_expect_success 'prefetch with positive & negative prefetch ref patterns' '
+	test_create_repo filter-prefetch-mixed &&
+	(
+		cd filter-prefetch-mixed &&
+		test_commit initial &&
+		git clone . clone4 &&
+		git remote add remote4 "file://$(pwd)/clone4" &&
+
+		cd clone4 &&
+		git checkout -b feature && test_commit feature-commit-4 &&
+		git checkout -b topic/x && test_commit topic-x-commit-4 &&
+		git checkout -b topic/y && test_commit topic-y-commit-4 &&
+		git push -f origin feature topic/x topic/y &&
+		cd .. &&
+
+		git config remote.remote4.prefetchref \
+			"refs/heads/topic/* !refs/heads/topic/y" &&
+		fetchargs="--prefetch --prune --no-tags --no-write-fetch-head \
+			--recurse-submodules=no --quiet" &&
+		GIT_TRACE2_EVENT="$(pwd)/prefetch-mixed.txt" \
+			git maintenance run --task=prefetch 2>/dev/null &&
+		test_subcommand git fetch remote4 $fetchargs <prefetch-mixed.txt &&
+
+		test_must_fail git rev-parse refs/prefetch/remotes/remote4/feature &&
+		test_must_fail git rev-parse refs/prefetch/remotes/remote4/topic/y &&
+		git rev-parse refs/prefetch/remotes/remote4/topic/x
+	)
+'
+
 test_expect_success 'loose-objects task' '
 	# Repack everything so we know the state of the object dir
 	git repack -adk &&