diff mbox series

[21/27] sparse-index: expand_to_path no-op if path exists

Message ID dfbafbde3f54333dc27a18e46b5b79573f015e60.1611596534.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Sparse Index | expand

Commit Message

Derrick Stolee Jan. 25, 2021, 5:42 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

We need to check the file hashmap first, then look to see if the
directory signals a non-sparse directory entry. In such a case, we can
rely on the contents of the sparse-index.

We still use ensure_full_index() in the case that we hit a path that is
within a sparse-directory entry.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 name-hash.c    |  6 ++++++
 sparse-index.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

Comments

Elijah Newren Feb. 1, 2021, 10:34 p.m. UTC | #1
On Mon, Jan 25, 2021 at 9:42 AM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> We need to check the file hashmap first, then look to see if the
> directory signals a non-sparse directory entry. In such a case, we can
> rely on the contents of the sparse-index.
>
> We still use ensure_full_index() in the case that we hit a path that is
> within a sparse-directory entry.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  name-hash.c    |  6 ++++++
>  sparse-index.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 59 insertions(+)
>
> diff --git a/name-hash.c b/name-hash.c
> index 641f6900a7c..cb0f316f652 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -110,6 +110,12 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
>         if (ce->ce_flags & CE_HASHED)
>                 return;
>         ce->ce_flags |= CE_HASHED;
> +
> +       if (ce->ce_mode == CE_MODE_SPARSE_DIRECTORY) {
> +               add_dir_entry(istate, ce);
> +               return;
> +       }
> +
>         hashmap_entry_init(&ce->ent, memihash(ce->name, ce_namelen(ce)));
>         hashmap_add(&istate->name_hash, &ce->ent);
>
> diff --git a/sparse-index.c b/sparse-index.c
> index dd1a06dfdd3..bf8dce9a09b 100644
> --- a/sparse-index.c
> +++ b/sparse-index.c
> @@ -281,9 +281,62 @@ void ensure_full_index(struct index_state *istate)
>         trace2_region_leave("index", "ensure_full_index", istate->repo);
>  }
>
> +static int in_expand_to_path = 0;
> +
>  void expand_to_path(struct index_state *istate,
>                     const char *path, size_t pathlen, int icase)
>  {
> +       struct strbuf path_as_dir = STRBUF_INIT;
> +       int pos;
> +
> +       /* prevent extra recursion */
> +       if (in_expand_to_path)
> +               return;

Maybe "prevent extra expand_to_path() <-> index_file_exists()
recursion", just to be extra explicit?

> +
> +       if (!istate || !istate->sparse_index)
> +               return;
> +
> +       if (!istate->repo)
> +               istate->repo = the_repository;

So, we assume the_repository if istate->repo isn't set.  I guess given
the number of the_repository assumptions we have in the code, this
isn't a big deal.  And instead of a
USE_THE_REPOSITORY_COMPATIBILITY_MACROS we have a
NO_THE_REPOSITORY_COMPATIBILITY_MACROS, so there's nothing to mark
this either.

> +
> +       in_expand_to_path = 1;
> +
> +       /*
> +        * We only need to actually expand a region if the
> +        * following are both true:
> +        *
> +        * 1. 'path' is not already in the index.
> +        * 2. Some parent directory of 'path' is a sparse directory.
> +        */
> +
> +       strbuf_add(&path_as_dir, path, pathlen);
> +       strbuf_addch(&path_as_dir, '/');
> +
> +       /* in_expand_to_path prevents infinite recursion here */
> +       if (index_file_exists(istate, path, pathlen, icase))
> +               goto cleanup;

Shouldn't the editing of path_as_dir be done after the
index_file_exists() call?  In the case that the entry already exists,
writing to path_as_dir is wasted work.

> +       pos = index_name_pos(istate, path_as_dir.buf, path_as_dir.len);
> +
> +       if (pos < 0)
> +               pos = -pos - 1;
> +
> +       /*
> +        * Even if the path doesn't exist, if the value isn't exactly a
> +        * sparse-directory entry, then there is no need to expand the
> +        * index.
> +        */
> +       if (istate->cache[pos]->ce_mode != CE_MODE_SPARSE_DIRECTORY)
> +               goto cleanup;

This looked wrong to me until I tried to come up with a
counter-example.  Here you are relying on the fact that before the
comment, pos is going to be the index of a sparse directory entry --
either for path_as_dir or some ancestor directory.  It would be nice
if the comment mentioned that.

> +
> +       trace2_region_enter("index", "expand_to_path", istate->repo);
> +
>         /* for now, do the obviously-correct, slow thing */
>         ensure_full_index(istate);
> +
> +       trace2_region_leave("index", "expand_to_path", istate->repo);
> +
> +cleanup:
> +       strbuf_release(&path_as_dir);
> +       in_expand_to_path = 0;
>  }
> --
> gitgitgadget

Looks good otherwise.
diff mbox series

Patch

diff --git a/name-hash.c b/name-hash.c
index 641f6900a7c..cb0f316f652 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -110,6 +110,12 @@  static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 	if (ce->ce_flags & CE_HASHED)
 		return;
 	ce->ce_flags |= CE_HASHED;
+
+	if (ce->ce_mode == CE_MODE_SPARSE_DIRECTORY) {
+		add_dir_entry(istate, ce);
+		return;
+	}
+
 	hashmap_entry_init(&ce->ent, memihash(ce->name, ce_namelen(ce)));
 	hashmap_add(&istate->name_hash, &ce->ent);
 
diff --git a/sparse-index.c b/sparse-index.c
index dd1a06dfdd3..bf8dce9a09b 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -281,9 +281,62 @@  void ensure_full_index(struct index_state *istate)
 	trace2_region_leave("index", "ensure_full_index", istate->repo);
 }
 
+static int in_expand_to_path = 0;
+
 void expand_to_path(struct index_state *istate,
 		    const char *path, size_t pathlen, int icase)
 {
+	struct strbuf path_as_dir = STRBUF_INIT;
+	int pos;
+
+	/* prevent extra recursion */
+	if (in_expand_to_path)
+		return;
+
+	if (!istate || !istate->sparse_index)
+		return;
+
+	if (!istate->repo)
+		istate->repo = the_repository;
+
+	in_expand_to_path = 1;
+
+	/*
+	 * We only need to actually expand a region if the
+	 * following are both true:
+	 *
+	 * 1. 'path' is not already in the index.
+	 * 2. Some parent directory of 'path' is a sparse directory.
+	 */
+
+	strbuf_add(&path_as_dir, path, pathlen);
+	strbuf_addch(&path_as_dir, '/');
+
+	/* in_expand_to_path prevents infinite recursion here */
+	if (index_file_exists(istate, path, pathlen, icase))
+		goto cleanup;
+
+	pos = index_name_pos(istate, path_as_dir.buf, path_as_dir.len);
+
+	if (pos < 0)
+		pos = -pos - 1;
+
+	/*
+	 * Even if the path doesn't exist, if the value isn't exactly a
+	 * sparse-directory entry, then there is no need to expand the
+	 * index.
+	 */
+	if (istate->cache[pos]->ce_mode != CE_MODE_SPARSE_DIRECTORY)
+		goto cleanup;
+
+	trace2_region_enter("index", "expand_to_path", istate->repo);
+
 	/* for now, do the obviously-correct, slow thing */
 	ensure_full_index(istate);
+
+	trace2_region_leave("index", "expand_to_path", istate->repo);
+
+cleanup:
+	strbuf_release(&path_as_dir);
+	in_expand_to_path = 0;
 }