Message ID | 45cf57c9c40bebb7383b8aab19c82fc4e41d2cd3.1611596534.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Sparse Index | expand |
On Mon, Jan 25, 2021 at 9:42 AM Derrick Stolee via GitGitGadget <gitgitgadget@gmail.com> wrote: > > From: Derrick Stolee <dstolee@microsoft.com> > > As a first step to integrate 'git status' and 'git add' with the sparse > index, we must start integrating unpack_trees() with sparse directory > entries. These changes are currently impossible to trigger because > unpack_trees() calls ensure_full_index() if command_requires_full_index > is true. This is the case for all commands at the moment. As we expand > more commands to be sparse-aware, we might find that more changes are > required to unpack_trees(). The current changes will suffice for > 'status' and 'add'. > > unpack_trees() calls the traverse_trees() API using unpack_callback() > to decide if we should recurse into a subtree. We must add new abilities > to skip a subtree if it corresponds to a sparse directory entry. Makes sense. > It is important to be careful about the trailing directory separator > that exists in the sparse directory entries but not in the subtree > paths. The comment makes me wonder if leaving the trailing directory separator out would be better, as it'd allow direct comparisons. Of course, you have a better idea of what is easier or harder based on this decision. Is there any chance you have a quick list of the places that the code was simplified by this decision and a list of places like this one that were made slightly harder? > Signed-off-by: Derrick Stolee <dstolee@microsoft.com> > --- > dir.h | 2 +- > preload-index.c | 2 ++ > read-cache.c | 3 +++ > unpack-trees.c | 24 ++++++++++++++++++++++-- > 4 files changed, 28 insertions(+), 3 deletions(-) > > diff --git a/dir.h b/dir.h > index facfae47402..300305ec335 100644 > --- a/dir.h > +++ b/dir.h > @@ -503,7 +503,7 @@ static inline int ce_path_match(const struct index_state *istate, > char *seen) > { > return match_pathspec(istate, pathspec, ce->name, ce_namelen(ce), 0, seen, > - S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)); > + S_ISSPARSEDIR(ce) || S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)); I think this hunk becomes unnecessary if you use ce_mode = 040000 for sparse directory entries. > } > > static inline int dir_path_match(const struct index_state *istate, > diff --git a/preload-index.c b/preload-index.c > index ed6eaa47388..323fc8c5100 100644 > --- a/preload-index.c > +++ b/preload-index.c > @@ -54,6 +54,8 @@ static void *preload_thread(void *_data) > continue; > if (S_ISGITLINK(ce->ce_mode)) > continue; > + if (S_ISSPARSEDIR(ce)) > + continue; > if (ce_uptodate(ce)) > continue; > if (ce_skip_worktree(ce)) > diff --git a/read-cache.c b/read-cache.c > index 65679d70d7c..ab0c2b86ec0 100644 > --- a/read-cache.c > +++ b/read-cache.c > @@ -1572,6 +1572,9 @@ int refresh_index(struct index_state *istate, unsigned int flags, > if (ignore_submodules && S_ISGITLINK(ce->ce_mode)) > continue; > > + if (istate->sparse_index && S_ISSPARSEDIR(ce)) > + continue; > + > if (pathspec && !ce_path_match(istate, ce, pathspec, seen)) > filtered = 1; > > diff --git a/unpack-trees.c b/unpack-trees.c > index b324eec2a5d..90644856a80 100644 > --- a/unpack-trees.c > +++ b/unpack-trees.c > @@ -583,6 +583,13 @@ static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o) > { > ce->ce_flags |= CE_UNPACKED; > > + /* > + * If this is a sparse directory, don't advance cache_bottom. > + * That will be advanced later using the cache-tree data. > + */ > + if (S_ISSPARSEDIR(ce)) > + return; I don't grok the cache_bottom stuff -- in general, nothing specific about your patch. But since I don't grok that stuff, it means I don't understand how your comment here relates; you may want to ping another reviewer about this portion of the patch. > + > if (o->cache_bottom < o->src_index->cache_nr && > o->src_index->cache[o->cache_bottom] == ce) { > int bottom = o->cache_bottom; > @@ -980,6 +987,9 @@ static int do_compare_entry(const struct cache_entry *ce, > ce_len -= pathlen; > ce_name = ce->name + pathlen; > > + /* remove directory separator if a sparse directory entry */ > + if (S_ISSPARSEDIR(ce)) > + ce_len--; Here's where your comment about trailing separator comes in; makes sense. > return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); > } > > @@ -989,6 +999,10 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf > if (cmp) > return cmp; > > + /* If ce is a sparse directory, then allow equality here. */ > + if (S_ISSPARSEDIR(ce)) > + return 0; > + This seems surprising to me. Is there a chance you are comparing sparse directory A with sparse directory B and you return with equality? Or sparse_directory A with regular file B? Do the callers still do the right thing? If your code change here is right, it seems like it deserves an extra comment either in the code or the commit message. > /* > * Even if the beginning compared identically, the ce should > * compare as bigger than a directory leading up to it! > @@ -1239,6 +1253,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str > struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; > struct unpack_trees_options *o = info->data; > const struct name_entry *p = names; > + unsigned recurse = 1; > > /* Find first entry with a real name (we could use "mask" too) */ > while (!p->mode) > @@ -1280,12 +1295,16 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str > } > } > src[0] = ce; > + > + if (S_ISSPARSEDIR(ce)) > + recurse = 0; > } > break; > } > } > > - if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0) > + if (recurse && > + unpack_nondirectories(n, mask, dirmask, src, names, info) < 0) > return -1; > > if (o->merge && src[0]) { > @@ -1315,7 +1334,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str > } > } > > - if (traverse_trees_recursive(n, dirmask, mask & ~dirmask, > + if (recurse && > + traverse_trees_recursive(n, dirmask, mask & ~dirmask, > names, info) < 0) > return -1; > return mask; The unpack_callback() code has some comparison to a cache-tree, but I'd assume that you'd need to update cache-tree.c somewhat to take advantage of these sparse directory entries. Am I wrong, and you just get cache-tree.c working with sparse directory entries for free? Or is this something coming in a later patch?
On 2/1/2021 3:50 PM, Elijah Newren wrote: > On Mon, Jan 25, 2021 at 9:42 AM Derrick Stolee via GitGitGadget > <gitgitgadget@gmail.com> wrote: >> It is important to be careful about the trailing directory separator >> that exists in the sparse directory entries but not in the subtree >> paths. > > The comment makes me wonder if leaving the trailing directory > separator out would be better, as it'd allow direct comparisons. Of > course, you have a better idea of what is easier or harder based on > this decision. Is there any chance you have a quick list of the > places that the code was simplified by this decision and a list of > places like this one that were made slightly harder? I'm going through all of your comments and making notes about areas to fix and clean up before starting a new series for full review. This question of the trailing slash is important, and I will take particular care about answering it as I rework the series. However, the questions in this patch poke at the right places... >> + /* remove directory separator if a sparse directory entry */ >> + if (S_ISSPARSEDIR(ce)) >> + ce_len--; > > Here's where your comment about trailing separator comes in; makes sense. > >> return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); >> } >> >> @@ -989,6 +999,10 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf >> if (cmp) >> return cmp; >> >> + /* If ce is a sparse directory, then allow equality here. */ >> + if (S_ISSPARSEDIR(ce)) >> + return 0; >> + > > This seems surprising to me. Is there a chance you are comparing > sparse directory A with sparse directory B and you return with > equality? Or sparse_directory A with regular file B? Do the callers > still do the right thing? If your code change here is right, it seems > like it deserves an extra comment either in the code or the commit > message. Sometimes a caller is asking for the first index entry corresponding to a directory. In these cases, the input could be "A/B/C/". We want to ensure that a sparse directory entry corresponding exactly to that directory is correctly matched. If we place "A/B/C" in the index instead, this search becomes more complicated (I think; I will justify this more after thinking about it). At this point in time, we are just saying "We found the entry with equal path value!" and not failing with the check in the rest of the method: /* * Even if the beginning compared identically, the ce should * compare as bigger than a directory leading up to it! */ return ce_namelen(ce) > traverse_path_len(info, tree_entry_len(n)); >> - if (traverse_trees_recursive(n, dirmask, mask & ~dirmask, >> + if (recurse && >> + traverse_trees_recursive(n, dirmask, mask & ~dirmask, >> names, info) < 0) >> return -1; >> return mask; > > The unpack_callback() code has some comparison to a cache-tree, but > I'd assume that you'd need to update cache-tree.c somewhat to take > advantage of these sparse directory entries. Am I wrong, and you just > get cache-tree.c working with sparse directory entries for free? Or > is this something coming in a later patch? In the RFC, I integrate the cache-tree with the sparse-index at the very end. I will move that integration to be much earlier in the next submission, so it becomes part of the format discussion. Thanks, -Stolee
diff --git a/dir.h b/dir.h index facfae47402..300305ec335 100644 --- a/dir.h +++ b/dir.h @@ -503,7 +503,7 @@ static inline int ce_path_match(const struct index_state *istate, char *seen) { return match_pathspec(istate, pathspec, ce->name, ce_namelen(ce), 0, seen, - S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)); + S_ISSPARSEDIR(ce) || S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)); } static inline int dir_path_match(const struct index_state *istate, diff --git a/preload-index.c b/preload-index.c index ed6eaa47388..323fc8c5100 100644 --- a/preload-index.c +++ b/preload-index.c @@ -54,6 +54,8 @@ static void *preload_thread(void *_data) continue; if (S_ISGITLINK(ce->ce_mode)) continue; + if (S_ISSPARSEDIR(ce)) + continue; if (ce_uptodate(ce)) continue; if (ce_skip_worktree(ce)) diff --git a/read-cache.c b/read-cache.c index 65679d70d7c..ab0c2b86ec0 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1572,6 +1572,9 @@ int refresh_index(struct index_state *istate, unsigned int flags, if (ignore_submodules && S_ISGITLINK(ce->ce_mode)) continue; + if (istate->sparse_index && S_ISSPARSEDIR(ce)) + continue; + if (pathspec && !ce_path_match(istate, ce, pathspec, seen)) filtered = 1; diff --git a/unpack-trees.c b/unpack-trees.c index b324eec2a5d..90644856a80 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -583,6 +583,13 @@ static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o) { ce->ce_flags |= CE_UNPACKED; + /* + * If this is a sparse directory, don't advance cache_bottom. + * That will be advanced later using the cache-tree data. + */ + if (S_ISSPARSEDIR(ce)) + return; + if (o->cache_bottom < o->src_index->cache_nr && o->src_index->cache[o->cache_bottom] == ce) { int bottom = o->cache_bottom; @@ -980,6 +987,9 @@ static int do_compare_entry(const struct cache_entry *ce, ce_len -= pathlen; ce_name = ce->name + pathlen; + /* remove directory separator if a sparse directory entry */ + if (S_ISSPARSEDIR(ce)) + ce_len--; return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode); } @@ -989,6 +999,10 @@ static int compare_entry(const struct cache_entry *ce, const struct traverse_inf if (cmp) return cmp; + /* If ce is a sparse directory, then allow equality here. */ + if (S_ISSPARSEDIR(ce)) + return 0; + /* * Even if the beginning compared identically, the ce should * compare as bigger than a directory leading up to it! @@ -1239,6 +1253,7 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, }; struct unpack_trees_options *o = info->data; const struct name_entry *p = names; + unsigned recurse = 1; /* Find first entry with a real name (we could use "mask" too) */ while (!p->mode) @@ -1280,12 +1295,16 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str } } src[0] = ce; + + if (S_ISSPARSEDIR(ce)) + recurse = 0; } break; } } - if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0) + if (recurse && + unpack_nondirectories(n, mask, dirmask, src, names, info) < 0) return -1; if (o->merge && src[0]) { @@ -1315,7 +1334,8 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str } } - if (traverse_trees_recursive(n, dirmask, mask & ~dirmask, + if (recurse && + traverse_trees_recursive(n, dirmask, mask & ~dirmask, names, info) < 0) return -1; return mask;