diff mbox series

[v7,15/16] wt-status: expand added sparse directory entries

Message ID 717a3f49f97753149d5c435a83b3f1773dd4b1bb.1624932294.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Sparse-index: integrate with status | expand

Commit Message

Derrick Stolee June 29, 2021, 2:04 a.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

It is difficult, but possible, to get into a state where we intend to
add a directory that is outside of the sparse-checkout definition. Add a
test to t1092-sparse-checkout-compatibility.sh that demonstrates this
using a combination of 'git reset --mixed' and 'git checkout --orphan'.

This test failed before because the output of 'git status
--porcelain=v2' would not match on the lines for folder1/:

* The sparse-checkout repo (with a full index) would output each path
  name that is intended to be added.

* The sparse-index repo would only output that "folder1/" is staged for
  addition.

The status should report the full list of files to be added, and so this
sparse-directory entry should be expanded to a full list when reaching
it inside the wt_status_collect_changes_initial() method. Use
read_tree_at() to assist.

Somehow, this loop over the cache entries was not guarded by
ensure_full_index() as intended.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 t/t1092-sparse-checkout-compatibility.sh | 33 +++++++++++++++
 wt-status.c                              | 51 ++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

Comments

Elijah Newren July 9, 2021, 1:03 a.m. UTC | #1
On Mon, Jun 28, 2021 at 7:05 PM Derrick Stolee via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Derrick Stolee <dstolee@microsoft.com>
>
> It is difficult, but possible, to get into a state where we intend to
> add a directory that is outside of the sparse-checkout definition. Add a
> test to t1092-sparse-checkout-compatibility.sh that demonstrates this
> using a combination of 'git reset --mixed' and 'git checkout --orphan'.
>
> This test failed before because the output of 'git status
> --porcelain=v2' would not match on the lines for folder1/:
>
> * The sparse-checkout repo (with a full index) would output each path
>   name that is intended to be added.
>
> * The sparse-index repo would only output that "folder1/" is staged for
>   addition.
>
> The status should report the full list of files to be added, and so this
> sparse-directory entry should be expanded to a full list when reaching
> it inside the wt_status_collect_changes_initial() method. Use
> read_tree_at() to assist.
>
> Somehow, this loop over the cache entries was not guarded by
> ensure_full_index() as intended.
>
> Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
> ---
>  t/t1092-sparse-checkout-compatibility.sh | 33 +++++++++++++++
>  wt-status.c                              | 51 ++++++++++++++++++++++++
>  2 files changed, 84 insertions(+)
>
> diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
> index fed0440bafe..df217a2d10b 100755
> --- a/t/t1092-sparse-checkout-compatibility.sh
> +++ b/t/t1092-sparse-checkout-compatibility.sh
> @@ -545,4 +545,37 @@ test_expect_success 'sparse-index is not expanded' '
>         test_region ! index ensure_full_index trace2.txt
>  '
>
> +test_expect_success 'reset mixed and checkout orphan' '
> +       init_repos &&
> +
> +       test_all_match git checkout rename-out-to-in &&
> +
> +       # Sparse checkouts do not agree with full checkouts about
> +       # how to report a directory/file conflict during a reset.
> +       # This command would fail with test_all_match because the
> +       # full checkout reports "T folder1/0/1" while a sparse
> +       # checkout reports "D folder1/0/1". This matches because
> +       # the sparse checkouts skip "adding" the other side of
> +       # the conflict.

The same issue I highlighted last time is still present.  If you
insert an "exit 1" right here, then run
    ./t1092-sparse-checkout-compatibility.sh --ver --imm -x
until it stops, then
    cd t/trash directory.t1092-sparse-checkout-compatibility/sparse-checkout
    git ls-files -t | grep folder  # Note the files that are sparse
    git reset --mixed HEAD~1
    git ls-files -t | grep folder  # Note the files that are sparse --
there are some that aren't that should be
    git sparse-checkout reapply
    git ls-files -t | grep folder  # Note the files that are sparse

Granted, this is a bug with sparse-checkout without sparse-index, so
not something new to your series.  But since you are using comparisons
between regular sparse-checkouts and sparse-index to verify
correctness, this seems problematic to me.

> +       test_sparse_match git reset --mixed HEAD~1 &&
> +       test_sparse_match test-tool read-cache --table --expand &&
> +       test_sparse_match git status --porcelain=v2 &&
> +
> +       # At this point, sparse-checkouts behave differently
> +       # from the full-checkout.
> +       test_sparse_match git checkout --orphan new-branch &&
> +       test_sparse_match test-tool read-cache --table --expand &&
> +       test_sparse_match git status --porcelain=v2
> +'
> +
> +test_expect_success 'add everything with deep new file' '
> +       init_repos &&
> +
> +       run_on_sparse git sparse-checkout set deep/deeper1/deepest &&
> +
> +       run_on_all touch deep/deeper1/x &&
> +       test_all_match git add . &&
> +       test_all_match git status --porcelain=v2
> +'
> +
>  test_done
> diff --git a/wt-status.c b/wt-status.c
> index 96db3e74962..0317baef87e 100644
> --- a/wt-status.c
> +++ b/wt-status.c
> @@ -657,6 +657,36 @@ static void wt_status_collect_changes_index(struct wt_status *s)
>         clear_pathspec(&rev.prune_data);
>  }
>
> +static int add_file_to_list(const struct object_id *oid,
> +                           struct strbuf *base, const char *path,
> +                           unsigned int mode, void *context)
> +{
> +       struct string_list_item *it;
> +       struct wt_status_change_data *d;
> +       struct wt_status *s = context;
> +       struct strbuf full_name = STRBUF_INIT;
> +
> +       if (S_ISDIR(mode))
> +               return READ_TREE_RECURSIVE;
> +
> +       strbuf_add(&full_name, base->buf, base->len);
> +       strbuf_addstr(&full_name, path);
> +       it = string_list_insert(&s->change, full_name.buf);
> +       d = it->util;
> +       if (!d) {
> +               CALLOC_ARRAY(d, 1);
> +               it->util = d;
> +       }
> +
> +       d->index_status = DIFF_STATUS_ADDED;
> +       /* Leave {mode,oid}_head zero for adds. */
> +       d->mode_index = mode;
> +       oidcpy(&d->oid_index, oid);
> +       s->committable = 1;
> +       strbuf_release(&full_name);
> +       return 0;
> +}
> +
>  static void wt_status_collect_changes_initial(struct wt_status *s)
>  {
>         struct index_state *istate = s->repo->index;
> @@ -671,6 +701,27 @@ static void wt_status_collect_changes_initial(struct wt_status *s)
>                         continue;
>                 if (ce_intent_to_add(ce))
>                         continue;
> +               if (S_ISSPARSEDIR(ce->ce_mode)) {
> +                       /*
> +                        * This is a sparse directory entry, so we want to collect all
> +                        * of the added files within the tree. This requires recursively
> +                        * expanding the trees to find the elements that are new in this
> +                        * tree and marking them with DIFF_STATUS_ADDED.
> +                        */
> +                       struct strbuf base = STRBUF_INIT;
> +                       struct pathspec ps = { 0 };
> +                       struct tree *tree = lookup_tree(istate->repo, &ce->oid);
> +
> +                       ps.recursive = 1;
> +                       ps.has_wildcard = 1;
> +                       ps.max_depth = -1;
> +
> +                       strbuf_add(&base, ce->name, ce->ce_namelen);
> +                       read_tree_at(istate->repo, tree, &base, &ps,
> +                                    add_file_to_list, s);
> +                       continue;
> +               }
> +
>                 it = string_list_insert(&s->change, ce->name);
>                 d = it->util;
>                 if (!d) {
> --
> gitgitgadget
Derrick Stolee July 12, 2021, 1:56 p.m. UTC | #2
On 7/8/2021 9:03 PM, Elijah Newren wrote:
> On Mon, Jun 28, 2021 at 7:05 PM Derrick Stolee via GitGitGadget
> <gitgitgadget@gmail.com> wrote:
...
>> +test_expect_success 'reset mixed and checkout orphan' '
>> +       init_repos &&
>> +
>> +       test_all_match git checkout rename-out-to-in &&
>> +
>> +       # Sparse checkouts do not agree with full checkouts about
>> +       # how to report a directory/file conflict during a reset.
>> +       # This command would fail with test_all_match because the
>> +       # full checkout reports "T folder1/0/1" while a sparse
>> +       # checkout reports "D folder1/0/1". This matches because
>> +       # the sparse checkouts skip "adding" the other side of
>> +       # the conflict.
> 
> The same issue I highlighted last time is still present.  If you
> insert an "exit 1" right here, then run
>     ./t1092-sparse-checkout-compatibility.sh --ver --imm -x
> until it stops, then
>     cd t/trash directory.t1092-sparse-checkout-compatibility/sparse-checkout
>     git ls-files -t | grep folder  # Note the files that are sparse
>     git reset --mixed HEAD~1
>     git ls-files -t | grep folder  # Note the files that are sparse --
> there are some that aren't that should be
>     git sparse-checkout reapply
>     git ls-files -t | grep folder  # Note the files that are sparse
> 
> Granted, this is a bug with sparse-checkout without sparse-index, so
> not something new to your series.  But since you are using comparisons
> between regular sparse-checkouts and sparse-index to verify
> correctness, this seems problematic to me.

I'll add it to the pile, but I want to continue having this series
focus on making the sparse-index work quickly without a change in
behavior from a normal index. Changing the behavior of the sparse-
checkout feature should be a separate series.

Thanks,
-Stolee
Elijah Newren July 12, 2021, 7:32 p.m. UTC | #3
On Mon, Jul 12, 2021 at 6:56 AM Derrick Stolee <stolee@gmail.com> wrote:
>
> On 7/8/2021 9:03 PM, Elijah Newren wrote:
> > On Mon, Jun 28, 2021 at 7:05 PM Derrick Stolee via GitGitGadget
> > <gitgitgadget@gmail.com> wrote:
> ...
> >> +test_expect_success 'reset mixed and checkout orphan' '
> >> +       init_repos &&
> >> +
> >> +       test_all_match git checkout rename-out-to-in &&
> >> +
> >> +       # Sparse checkouts do not agree with full checkouts about
> >> +       # how to report a directory/file conflict during a reset.
> >> +       # This command would fail with test_all_match because the
> >> +       # full checkout reports "T folder1/0/1" while a sparse
> >> +       # checkout reports "D folder1/0/1". This matches because
> >> +       # the sparse checkouts skip "adding" the other side of
> >> +       # the conflict.
> >
> > The same issue I highlighted last time is still present.  If you
> > insert an "exit 1" right here, then run
> >     ./t1092-sparse-checkout-compatibility.sh --ver --imm -x
> > until it stops, then
> >     cd t/trash directory.t1092-sparse-checkout-compatibility/sparse-checkout
> >     git ls-files -t | grep folder  # Note the files that are sparse
> >     git reset --mixed HEAD~1
> >     git ls-files -t | grep folder  # Note the files that are sparse --
> > there are some that aren't that should be
> >     git sparse-checkout reapply
> >     git ls-files -t | grep folder  # Note the files that are sparse
> >
> > Granted, this is a bug with sparse-checkout without sparse-index, so
> > not something new to your series.  But since you are using comparisons
> > between regular sparse-checkouts and sparse-index to verify
> > correctness, this seems problematic to me.
>
> I'll add it to the pile, but I want to continue having this series
> focus on making the sparse-index work quickly without a change in
> behavior from a normal index. Changing the behavior of the sparse-
> checkout feature should be a separate series.

Hmm..perhaps there's some middle ground?  I appreciate that you want
to have this series focus on making the sparse-index work without
worrying about behavioral changes to sparse-checkout.  I'm concerned,
though, that testcases tend to be treated as documentation of intended
behavior, even when the tests are testing something else.  These tests
are clearly triggering buggy behavior, and I think your comments and
subsequent command may be affected by it.  I don't want to leave
future folks (even ourselves) to have to try to explain away why the
behavior expected in this test should not be expected.

Perhaps we can just add a comment that this testcase is triggering a
bug in both sparse-checkout and sparse-index but we're only checking
that the two match, and that once the bug is fix, the testcase itself
may need tweaking?
Derrick Stolee July 12, 2021, 7:41 p.m. UTC | #4
On 7/12/2021 3:32 PM, Elijah Newren wrote:
> On Mon, Jul 12, 2021 at 6:56 AM Derrick Stolee <stolee@gmail.com> wrote:
>>
>> On 7/8/2021 9:03 PM, Elijah Newren wrote:
>>> On Mon, Jun 28, 2021 at 7:05 PM Derrick Stolee via GitGitGadget
>>> <gitgitgadget@gmail.com> wrote:
>> ...
>>>> +test_expect_success 'reset mixed and checkout orphan' '
>>>> +       init_repos &&
>>>> +
>>>> +       test_all_match git checkout rename-out-to-in &&
>>>> +
>>>> +       # Sparse checkouts do not agree with full checkouts about
>>>> +       # how to report a directory/file conflict during a reset.
>>>> +       # This command would fail with test_all_match because the
>>>> +       # full checkout reports "T folder1/0/1" while a sparse
>>>> +       # checkout reports "D folder1/0/1". This matches because
>>>> +       # the sparse checkouts skip "adding" the other side of
>>>> +       # the conflict.
>>>
>>> The same issue I highlighted last time is still present.  If you
>>> insert an "exit 1" right here, then run
>>>     ./t1092-sparse-checkout-compatibility.sh --ver --imm -x
>>> until it stops, then
>>>     cd t/trash directory.t1092-sparse-checkout-compatibility/sparse-checkout
>>>     git ls-files -t | grep folder  # Note the files that are sparse
>>>     git reset --mixed HEAD~1
>>>     git ls-files -t | grep folder  # Note the files that are sparse --
>>> there are some that aren't that should be
>>>     git sparse-checkout reapply
>>>     git ls-files -t | grep folder  # Note the files that are sparse
>>>
>>> Granted, this is a bug with sparse-checkout without sparse-index, so
>>> not something new to your series.  But since you are using comparisons
>>> between regular sparse-checkouts and sparse-index to verify
>>> correctness, this seems problematic to me.
>>
>> I'll add it to the pile, but I want to continue having this series
>> focus on making the sparse-index work quickly without a change in
>> behavior from a normal index. Changing the behavior of the sparse-
>> checkout feature should be a separate series.
> 
> Hmm..perhaps there's some middle ground?  I appreciate that you want
> to have this series focus on making the sparse-index work without
> worrying about behavioral changes to sparse-checkout.  I'm concerned,
> though, that testcases tend to be treated as documentation of intended
> behavior, even when the tests are testing something else.  These tests
> are clearly triggering buggy behavior, and I think your comments and
> subsequent command may be affected by it.  I don't want to leave
> future folks (even ourselves) to have to try to explain away why the
> behavior expected in this test should not be expected.
> 
> Perhaps we can just add a comment that this testcase is triggering a
> bug in both sparse-checkout and sparse-index but we're only checking
> that the two match, and that once the bug is fix, the testcase itself
> may need tweaking?
 
I can get behind that approach: document the bug, but comment that it
_is_ a bug and should be changed in the future.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/t/t1092-sparse-checkout-compatibility.sh b/t/t1092-sparse-checkout-compatibility.sh
index fed0440bafe..df217a2d10b 100755
--- a/t/t1092-sparse-checkout-compatibility.sh
+++ b/t/t1092-sparse-checkout-compatibility.sh
@@ -545,4 +545,37 @@  test_expect_success 'sparse-index is not expanded' '
 	test_region ! index ensure_full_index trace2.txt
 '
 
+test_expect_success 'reset mixed and checkout orphan' '
+	init_repos &&
+
+	test_all_match git checkout rename-out-to-in &&
+
+	# Sparse checkouts do not agree with full checkouts about
+	# how to report a directory/file conflict during a reset.
+	# This command would fail with test_all_match because the
+	# full checkout reports "T folder1/0/1" while a sparse
+	# checkout reports "D folder1/0/1". This matches because
+	# the sparse checkouts skip "adding" the other side of
+	# the conflict.
+	test_sparse_match git reset --mixed HEAD~1 &&
+	test_sparse_match test-tool read-cache --table --expand &&
+	test_sparse_match git status --porcelain=v2 &&
+
+	# At this point, sparse-checkouts behave differently
+	# from the full-checkout.
+	test_sparse_match git checkout --orphan new-branch &&
+	test_sparse_match test-tool read-cache --table --expand &&
+	test_sparse_match git status --porcelain=v2
+'
+
+test_expect_success 'add everything with deep new file' '
+	init_repos &&
+
+	run_on_sparse git sparse-checkout set deep/deeper1/deepest &&
+
+	run_on_all touch deep/deeper1/x &&
+	test_all_match git add . &&
+	test_all_match git status --porcelain=v2
+'
+
 test_done
diff --git a/wt-status.c b/wt-status.c
index 96db3e74962..0317baef87e 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -657,6 +657,36 @@  static void wt_status_collect_changes_index(struct wt_status *s)
 	clear_pathspec(&rev.prune_data);
 }
 
+static int add_file_to_list(const struct object_id *oid,
+			    struct strbuf *base, const char *path,
+			    unsigned int mode, void *context)
+{
+	struct string_list_item *it;
+	struct wt_status_change_data *d;
+	struct wt_status *s = context;
+	struct strbuf full_name = STRBUF_INIT;
+
+	if (S_ISDIR(mode))
+		return READ_TREE_RECURSIVE;
+
+	strbuf_add(&full_name, base->buf, base->len);
+	strbuf_addstr(&full_name, path);
+	it = string_list_insert(&s->change, full_name.buf);
+	d = it->util;
+	if (!d) {
+		CALLOC_ARRAY(d, 1);
+		it->util = d;
+	}
+
+	d->index_status = DIFF_STATUS_ADDED;
+	/* Leave {mode,oid}_head zero for adds. */
+	d->mode_index = mode;
+	oidcpy(&d->oid_index, oid);
+	s->committable = 1;
+	strbuf_release(&full_name);
+	return 0;
+}
+
 static void wt_status_collect_changes_initial(struct wt_status *s)
 {
 	struct index_state *istate = s->repo->index;
@@ -671,6 +701,27 @@  static void wt_status_collect_changes_initial(struct wt_status *s)
 			continue;
 		if (ce_intent_to_add(ce))
 			continue;
+		if (S_ISSPARSEDIR(ce->ce_mode)) {
+			/*
+			 * This is a sparse directory entry, so we want to collect all
+			 * of the added files within the tree. This requires recursively
+			 * expanding the trees to find the elements that are new in this
+			 * tree and marking them with DIFF_STATUS_ADDED.
+			 */
+			struct strbuf base = STRBUF_INIT;
+			struct pathspec ps = { 0 };
+			struct tree *tree = lookup_tree(istate->repo, &ce->oid);
+
+			ps.recursive = 1;
+			ps.has_wildcard = 1;
+			ps.max_depth = -1;
+
+			strbuf_add(&base, ce->name, ce->ce_namelen);
+			read_tree_at(istate->repo, tree, &base, &ps,
+				     add_file_to_list, s);
+			continue;
+		}
+
 		it = string_list_insert(&s->change, ce->name);
 		d = it->util;
 		if (!d) {