diff mbox series

[v7,09/16] unpack-trees: unpack sparse directory entries

Message ID 237ccf4e43dd461250b3e6d609a475c1d675ea86.1624932294.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse-index: integrate with status | expand

Commit Message

Derrick Stolee June 29, 2021, 2:04 a.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

During unpack_callback(), index entries are compared against tree
entries. These are matched according to names and types. One goal is to
decide if we should recurse into subtrees or simply operate on one index
entry.

In the case of a sparse-directory entry, we do not want to recurse into
that subtree and instead simply compare the trees. In some cases, we
might want to perform a merge operation on the entry, such as during
'git checkout <commit>' which wants to replace a sparse tree entry with
the tree for that path at the target commit. We extend the logic within
unpack_single_entry() to create a sparse-directory entry in this case,
and then that is sent to call_unpack_fn().

There are some subtleties in this process. For instance, we need to
update find_cache_entry() to allow finding a sparse-directory entry that
exactly matches a given path. Use the new helper method
sparse_dir_matches_path() for this. We also need to ignore conflict
markers in the case that the entries correspond to directories and we
already have a sparse directory entry.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 unpack-trees.c | 105 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 97 insertions(+), 8 deletions(-)

Comments

Elijah Newren July 7, 2021, 10:25 p.m. UTC | #1
On Mon, Jun 28, 2021 at 7:05 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
> -       else
> +
> +       /*
> +        * Check for a sparse-directory entry named "path/".
> +        * Due to the input p->path not having a trailing
> +        * slash, the negative 'pos' value overshoots the
> +        * expected position by at least one, hence "-2" here.

You added the qualifier "at least" to this comment since v5.  I think
it's slightly misleading because it sounds like -2 is the end of the
special handling of the "at least" one overshoot.  Perhaps if you
ended with... "hence '-2' instead of '-1' here, and we also need to
check below if we overshot more than one".

> +        */
> +       pos = -pos - 2;
> +
> +       if (pos < 0 || pos >= o->src_index->cache_nr)
>                 return NULL;
> +
> +       /*
> +        * We might have multiple entries between 'pos' and
> +        * the actual sparse-directory entry, so start walking
> +        * back until finding it or passing where it would be.

It might be helpful to add a quick comment about the scenario where
this comes up.  e.g.

    This arises due to lexicographic sort ordering and sparse
directory entries coming with a trailing slash, causing there to be
multiple entries between "subdir" and "subdir/" (such as anything
beginning with "subdir." or "subdir-").  We are trying to walk back
from "subdir/" to "subdir" here.


> +        */
> +       while (pos >= 0) {
> +               ce = o->src_index->cache[pos];
> +
> +               if (strncmp(ce->name, p->path, p->pathlen))
> +                       return NULL;
> +
> +               if (S_ISSPARSEDIR(ce->ce_mode) &&
> +                   sparse_dir_matches_path(ce, info, p))
> +                       return ce;
> +
> +               pos--;
> +       }
> +
> +       return NULL;
>  }
diff mbox series

Patch

diff --git a/unpack-trees.c b/unpack-trees.c
index d26386ce8b2..d141dffbd94 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -1052,13 +1052,15 @@  static struct cache_entry *create_ce_entry(const struct traverse_info *info,
 	const struct name_entry *n,
 	int stage,
 	struct index_state *istate,
-	int is_transient)
+	int is_transient,
+	int is_sparse_directory)
 {
 	size_t len = traverse_path_len(info, tree_entry_len(n));
+	size_t alloc_len = is_sparse_directory ? len + 1 : len;
 	struct cache_entry *ce =
 		is_transient ?
-		make_empty_transient_cache_entry(len, NULL) :
-		make_empty_cache_entry(istate, len);
+		make_empty_transient_cache_entry(alloc_len, NULL) :
+		make_empty_cache_entry(istate, alloc_len);
 
 	ce->ce_mode = create_ce_mode(n->mode);
 	ce->ce_flags = create_ce_flags(stage);
@@ -1067,6 +1069,13 @@  static struct cache_entry *create_ce_entry(const struct traverse_info *info,
 	/* len+1 because the cache_entry allocates space for NUL */
 	make_traverse_path(ce->name, len + 1, info, n->path, n->pathlen);
 
+	if (is_sparse_directory) {
+		ce->name[len] = '/';
+		ce->name[len + 1] = '\0';
+		ce->ce_namelen++;
+		ce->ce_flags |= CE_SKIP_WORKTREE;
+	}
+
 	return ce;
 }
 
@@ -1085,10 +1094,17 @@  static int unpack_single_entry(int n, unsigned long mask,
 	struct unpack_trees_options *o = info->data;
 	unsigned long conflicts = info->df_conflicts | dirmask;
 
-	/* Do we have *only* directories? Nothing to do */
 	if (mask == dirmask && !src[0])
 		return 0;
 
+	/*
+	 * When we have a sparse directory entry for src[0],
+	 * then this isn't necessarily a directory-file conflict.
+	 */
+	if (mask == dirmask && src[0] &&
+	    S_ISSPARSEDIR(src[0]->ce_mode))
+		conflicts = 0;
+
 	/*
 	 * Ok, we've filled in up to any potential index entry in src[0],
 	 * now do the rest.
@@ -1118,7 +1134,9 @@  static int unpack_single_entry(int n, unsigned long mask,
 		 * not stored in the index.  otherwise construct the
 		 * cache entry from the index aware logic.
 		 */
-		src[i + o->merge] = create_ce_entry(info, names + i, stage, &o->result, o->merge);
+		src[i + o->merge] = create_ce_entry(info, names + i, stage,
+						    &o->result, o->merge,
+						    bit & dirmask);
 	}
 
 	if (o->merge) {
@@ -1222,16 +1240,69 @@  static int find_cache_pos(struct traverse_info *info,
 	return -1;
 }
 
+/*
+ * Given a sparse directory entry 'ce', compare ce->name to
+ * info->name + '/' + p->path + '/' if info->name is non-empty.
+ * Compare ce->name to p->path + '/' otherwise. Note that
+ * ce->name must end in a trailing '/' because it is a sparse
+ * directory entry.
+ */
+static int sparse_dir_matches_path(const struct cache_entry *ce,
+				   struct traverse_info *info,
+				   const struct name_entry *p)
+{
+	assert(S_ISSPARSEDIR(ce->ce_mode));
+	assert(ce->name[ce->ce_namelen - 1] == '/');
+
+	if (info->namelen)
+		return ce->ce_namelen == info->namelen + p->pathlen + 2 &&
+		       ce->name[info->namelen] == '/' &&
+		       !strncmp(ce->name, info->name, info->namelen) &&
+		       !strncmp(ce->name + info->namelen + 1, p->path, p->pathlen);
+	return ce->ce_namelen == p->pathlen + 1 &&
+	       !strncmp(ce->name, p->path, p->pathlen);
+}
+
 static struct cache_entry *find_cache_entry(struct traverse_info *info,
 					    const struct name_entry *p)
 {
+	struct cache_entry *ce;
 	int pos = find_cache_pos(info, p->path, p->pathlen);
 	struct unpack_trees_options *o = info->data;
 
 	if (0 <= pos)
 		return o->src_index->cache[pos];
-	else
+
+	/*
+	 * Check for a sparse-directory entry named "path/".
+	 * Due to the input p->path not having a trailing
+	 * slash, the negative 'pos' value overshoots the
+	 * expected position by at least one, hence "-2" here.
+	 */
+	pos = -pos - 2;
+
+	if (pos < 0 || pos >= o->src_index->cache_nr)
 		return NULL;
+
+	/*
+	 * We might have multiple entries between 'pos' and
+	 * the actual sparse-directory entry, so start walking
+	 * back until finding it or passing where it would be.
+	 */
+	while (pos >= 0) {
+		ce = o->src_index->cache[pos];
+
+		if (strncmp(ce->name, p->path, p->pathlen))
+			return NULL;
+
+		if (S_ISSPARSEDIR(ce->ce_mode) &&
+		    sparse_dir_matches_path(ce, info, p))
+			return ce;
+
+		pos--;
+	}
+
+	return NULL;
 }
 
 static void debug_path(struct traverse_info *info)
@@ -1266,6 +1337,21 @@  static void debug_unpack_callback(int n,
 		debug_name_entry(i, names + i);
 }
 
+/*
+ * Returns true if and only if the given cache_entry is a
+ * sparse-directory entry that matches the given name_entry
+ * from the tree walk at the given traverse_info.
+ */
+static int is_sparse_directory_entry(struct cache_entry *ce,
+				     struct name_entry *name,
+				     struct traverse_info *info)
+{
+	if (!ce || !name || !S_ISSPARSEDIR(ce->ce_mode))
+		return 0;
+
+	return sparse_dir_matches_path(ce, info, name);
+}
+
 /*
  * Note that traverse_by_cache_tree() duplicates some logic in this function
  * without actually calling it. If you change the logic here you may need to
@@ -1352,9 +1438,12 @@  static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str
 			}
 		}
 
-		if (traverse_trees_recursive(n, dirmask, mask & ~dirmask,
-					     names, info) < 0)
+		if (!is_sparse_directory_entry(src[0], names, info) &&
+		    traverse_trees_recursive(n, dirmask, mask & ~dirmask,
+						    names, info) < 0) {
 			return -1;
+		}
+
 		return mask;
 	}