Message ID | d47c7a1cf2a3fa8cfdcfc6be1ac800af123e7efc.1629842085.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Sparse-checkout: modify 'git add', 'git rm', and 'git add' behavior | expand |
Am 24.08.21 um 23:54 schrieb Derrick Stolee via GitGitGadget: > From: Derrick Stolee <dstolee@microsoft.com> > > When matching a path against a list of patterns, the ones that require a > directory match previously did not work when a filename is specified. > This was fine when all pattern-matching was done within methods such as > unpack_trees() that check a directory before recursing into the > contained files. However, other commands will start matching individual > files against pattern lists without that recursive approach. > > We modify path_matches_dir_pattern() to take a strbuf 'path_parent' that > is used to store the parent directory of 'pathname' between multiple > pattern matching tests. This is loaded lazily, only on the first pattern > it finds that has the PATTERN_FLAG_MUSTBEDIR flag. > > If we find that a path has a parent directory, we start by checking to > see if that parent directory matches the pattern. If so, then we do not > need to query the index for the type (which can be expensive). If we > find that the parent does not match, then we still must check the type > from the index for the given pathname. > > Note that this does not affect cone mode pattern matching, but instead > the more general -- and slower -- full pattern set. Thus, this does not > affect the sparse index. > > Signed-off-by: Derrick Stolee <dstolee@microsoft.com> > --- > dir.c | 34 ++++++++++++++++++++++++++++++++-- > 1 file changed, 32 insertions(+), 2 deletions(-) > > diff --git a/dir.c b/dir.c > index 652135df896..fe5ee87bb5f 100644 > --- a/dir.c > +++ b/dir.c > @@ -1305,10 +1305,38 @@ int match_pathname(const char *pathname, int pathlen, > > static int path_matches_dir_pattern(const char *pathname, > int pathlen, > + struct strbuf *path_parent, > int *dtype, > struct path_pattern *pattern, > struct index_state *istate) > { > + /* > + * Use 'alloc' as an indicator that the string has not been > + * initialized, in case the parent is the root directory. > + */ This means the caller needs to take care to release the strbuf between calls for files from different directories. Seems a bit fragile. The current caller is only ever passing in the same pathname before throwing away the strbuf, so it's doing the right thing. > + if (!path_parent->alloc) { > + char *slash; > + strbuf_addstr(path_parent, pathname); > + slash = find_last_dir_sep(path_parent->buf); The caller has pathname, pathlen and basename. If basename is guaranteed to be a substring of pathname then the parent directory name length could be calculated without requiring a string copy or scan. IIUC if pathname and basename can be pointers to different objects then just checking if basename is between pathname and pathname + pathlen would already be undefined behavior. Using pathname, pathlen and dirlen instead would be safer for such calculations, as it enforces basename to be a substring. Seems like this would require a lot of function signature changes, though, as the call tree is quite high. :| > + > + if (slash) > + *slash = '\0'; This doesn't update path_parent->len... > + else > + strbuf_setlen(path_parent, 0); > + } > + > + /* > + * If the parent directory matches the pattern, then we do not > + * need to check for dtype. > + */ > + if (path_parent->len && > + match_pathname(path_parent->buf, path_parent->len, ... so this checks if "<dirname>\0<basename>" matches. Intended? > + pattern->base, > + pattern->baselen ? pattern->baselen - 1 : 0, > + pattern->pattern, pattern->nowildcardlen, > + pattern->patternlen, pattern->flags)) > + return 1; > + > *dtype = resolve_dtype(*dtype, istate, pathname, pathlen); > if (*dtype != DT_DIR) > return 0; > @@ -1331,6 +1359,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname > { > struct path_pattern *res = NULL; /* undecided */ > int i; > + struct strbuf path_parent = STRBUF_INIT; > > if (!pl->nr) > return NULL; /* undefined */ > @@ -1340,8 +1369,8 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname > const char *exclude = pattern->pattern; > int prefix = pattern->nowildcardlen; > > - if ((pattern->flags & PATTERN_FLAG_MUSTBEDIR) && > - !path_matches_dir_pattern(pathname, pathlen, > + if (pattern->flags & PATTERN_FLAG_MUSTBEDIR && > + !path_matches_dir_pattern(pathname, pathlen, &path_parent, "a & b && c" is equivalent to "(a & b) && c", but removing the parentheses here serves no apparent purpose and distracts a bit from the actual change, i.e. adding a parameter. > dtype, pattern, istate)) > continue; > > @@ -1367,6 +1396,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname > break; > } > } > + strbuf_release(&path_parent); > return res; > } > >
diff --git a/dir.c b/dir.c index 652135df896..fe5ee87bb5f 100644 --- a/dir.c +++ b/dir.c @@ -1305,10 +1305,38 @@ int match_pathname(const char *pathname, int pathlen, static int path_matches_dir_pattern(const char *pathname, int pathlen, + struct strbuf *path_parent, int *dtype, struct path_pattern *pattern, struct index_state *istate) { + /* + * Use 'alloc' as an indicator that the string has not been + * initialized, in case the parent is the root directory. + */ + if (!path_parent->alloc) { + char *slash; + strbuf_addstr(path_parent, pathname); + slash = find_last_dir_sep(path_parent->buf); + + if (slash) + *slash = '\0'; + else + strbuf_setlen(path_parent, 0); + } + + /* + * If the parent directory matches the pattern, then we do not + * need to check for dtype. + */ + if (path_parent->len && + match_pathname(path_parent->buf, path_parent->len, + pattern->base, + pattern->baselen ? pattern->baselen - 1 : 0, + pattern->pattern, pattern->nowildcardlen, + pattern->patternlen, pattern->flags)) + return 1; + *dtype = resolve_dtype(*dtype, istate, pathname, pathlen); if (*dtype != DT_DIR) return 0; @@ -1331,6 +1359,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname { struct path_pattern *res = NULL; /* undecided */ int i; + struct strbuf path_parent = STRBUF_INIT; if (!pl->nr) return NULL; /* undefined */ @@ -1340,8 +1369,8 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname const char *exclude = pattern->pattern; int prefix = pattern->nowildcardlen; - if ((pattern->flags & PATTERN_FLAG_MUSTBEDIR) && - !path_matches_dir_pattern(pathname, pathlen, + if (pattern->flags & PATTERN_FLAG_MUSTBEDIR && + !path_matches_dir_pattern(pathname, pathlen, &path_parent, dtype, pattern, istate)) continue; @@ -1367,6 +1396,7 @@ static struct path_pattern *last_matching_pattern_from_list(const char *pathname break; } } + strbuf_release(&path_parent); return res; }