diff mbox series

[v3,04/14] dir: select directories correctly

Message ID 0252c7ee15c3cb364a7610b63d74c193cbda0728.1632159937.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Sparse-checkout: modify 'git add', 'git rm', and 'git add' behavior | expand

Commit Message

Derrick Stolee Sept. 20, 2021, 5:45 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

When matching a path against a list of patterns, the ones that require a
directory match previously did not work when a filename is specified.
This was fine when all pattern-matching was done within methods such as
unpack_trees() that check a directory before recursing into the
contained files. However, other commands will start matching individual
files against pattern lists without that recursive approach.

We modify path_matches_dir_pattern() to take a strbuf pointer
'path_parent' that is used to store the parent directory of 'pathname'
between multiple pattern matching tests. This is loaded lazily, only on
the first pattern it finds that has the PATTERN_FLAG_MUSTBEDIR flag.

If we find that a path has a parent directory, we start by checking to
see if that parent directory matches the pattern. If so, then we do not
need to query the index for the type (which can be expensive). If we
find that the parent does not match, then we still must check the type
from the index for the given pathname.

Note that this does not affect cone mode pattern matching, but instead
the more general -- and slower -- full pattern set. Thus, this does not
affect the sparse index.

Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 dir.c | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/dir.c b/dir.c
index 652135df896..9ea6cfe61cb 100644
--- a/dir.c
+++ b/dir.c
@@ -1305,10 +1305,35 @@  int match_pathname(const char *pathname, int pathlen,
 
 static int path_matches_dir_pattern(const char *pathname,
 				    int pathlen,
+				    struct strbuf **path_parent,
 				    int *dtype,
 				    struct path_pattern *pattern,
 				    struct index_state *istate)
 {
+	if (!*path_parent) {
+		char *slash;
+		CALLOC_ARRAY(*path_parent, 1);
+		strbuf_add(*path_parent, pathname, pathlen);
+		slash = find_last_dir_sep((*path_parent)->buf);
+
+		if (slash)
+			strbuf_setlen(*path_parent, slash - (*path_parent)->buf);
+		else
+			strbuf_setlen(*path_parent, 0);
+	}
+
+	/*
+	 * If the parent directory matches the pattern, then we do not
+	 * need to check for dtype.
+	 */
+	if ((*path_parent)->len &&
+	    match_pathname((*path_parent)->buf, (*path_parent)->len,
+			   pattern->base,
+			   pattern->baselen ? pattern->baselen - 1 : 0,
+			   pattern->pattern, pattern->nowildcardlen,
+			   pattern->patternlen, pattern->flags))
+		return 1;
+
 	*dtype = resolve_dtype(*dtype, istate, pathname, pathlen);
 	if (*dtype != DT_DIR)
 		return 0;
@@ -1331,6 +1356,7 @@  static struct path_pattern *last_matching_pattern_from_list(const char *pathname
 {
 	struct path_pattern *res = NULL; /* undecided */
 	int i;
+	struct strbuf *path_parent = NULL;
 
 	if (!pl->nr)
 		return NULL;	/* undefined */
@@ -1340,8 +1366,8 @@  static struct path_pattern *last_matching_pattern_from_list(const char *pathname
 		const char *exclude = pattern->pattern;
 		int prefix = pattern->nowildcardlen;
 
-		if ((pattern->flags & PATTERN_FLAG_MUSTBEDIR) &&
-		    !path_matches_dir_pattern(pathname, pathlen,
+		if (pattern->flags & PATTERN_FLAG_MUSTBEDIR &&
+		    !path_matches_dir_pattern(pathname, pathlen, &path_parent,
 					      dtype, pattern, istate))
 			continue;
 
@@ -1367,6 +1393,12 @@  static struct path_pattern *last_matching_pattern_from_list(const char *pathname
 			break;
 		}
 	}
+
+	if (path_parent) {
+		strbuf_release(path_parent);
+		free(path_parent);
+	}
+
 	return res;
 }