diff mbox series

[v2,7/9] fetch: fetch unpopulated, changed submodules

Message ID 20220215172318.73533-8-chooglen@google.com (mailing list archive)
State Superseded
Headers show
Series fetch --recurse-submodules: fetch unpopulated submodules | expand

Commit Message

Glen Choo Feb. 15, 2022, 5:23 p.m. UTC
"git fetch --recurse-submodules" only considers populated
submodules (i.e. submodules that can be found by iterating the index),
which makes "git fetch" behave differently based on which commit is
checked out. As a result, even if the user has initialized all submodules
correctly, they may not fetch the necessary submodule commits, and
commands like "git checkout --recurse-submodules" might fail.

Teach "git fetch" to fetch cloned, changed submodules regardless of
whether they are populated (this is in addition to the current behavior
of fetching populated submodules).

Since a submodule may be encountered multiple times (via the list of
populated submodules or via the list of changed submodules), maintain a
list of seen submodules to avoid fetching a submodule more than once.

Signed-off-by: Glen Choo <chooglen@google.com>
---
As I mentioned in the cover letter, I'm not entirely happy with the
name repo_has_absorbed_submodules() - it's not a standardized term AFAIK
and it's a little clunky.

"absorbed submodule" is just a stand-in for "submodule in .git/modules",
so if we have a better term for "submodule in .git/modules", let's use
that instead.

 Documentation/fetch-options.txt |  26 +++--
 Documentation/git-fetch.txt     |  10 +-
 builtin/fetch.c                 |  14 +--
 submodule.c                     | 134 +++++++++++++++++++---
 submodule.h                     |  12 +-
 t/t5526-fetch-submodules.sh     | 195 ++++++++++++++++++++++++++++++++
 6 files changed, 349 insertions(+), 42 deletions(-)

Comments

Jonathan Tan Feb. 15, 2022, 10:02 p.m. UTC | #1
Patches 4-6 look good.

Glen Choo <chooglen@google.com> writes:
> Teach "git fetch" to fetch cloned, changed submodules regardless of
> whether they are populated (this is in addition to the current behavior
> of fetching populated submodules).

I think add a note that the current behavior is regardless of what is
being fetched. So, maybe something like:

  Teach "git fetch" to fetch cloned, changed submodules regardless of
  whether they are populated. This is in addition to the current behavior
  of fetching populated submodules (which happens regardless of what was
  fetched in the superproject, or even if nothing was fetched in the
  superproject).

> As I mentioned in the cover letter, I'm not entirely happy with the
> name repo_has_absorbed_submodules() - it's not a standardized term AFAIK
> and it's a little clunky.
> 
> "absorbed submodule" is just a stand-in for "submodule in .git/modules",
> so if we have a better term for "submodule in .git/modules", let's use
> that instead.

I think that this is OK if the doc comment is updated. I'll make the
suggestion in the appropriate place below.

> @@ -1248,14 +1261,28 @@ void check_for_new_submodule_commits(struct object_id *oid)
>  	oid_array_append(&ref_tips_after_fetch, oid);
>  }
>  
> +/*
> + * Returns 1 if the repo has absorbed submodule gitdirs, and 0
> + * otherwise. Like submodule_name_to_gitdir(), this checks
> + * $GIT_DIR/modules, not $GIT_COMMON_DIR.
> + */
> +static int repo_has_absorbed_submodules(struct repository *r)
> +{
> +	struct strbuf buf = STRBUF_INIT;
> +
> +	strbuf_repo_git_path(&buf, r, "modules/");
> +	return file_exists(buf.buf) && !is_empty_dir(buf.buf);
> +}

I think that if you replace the doc comment with something like:

  Returns 1 if there is at least one submodule gitdir in
  $GIT_DIR/modules, and 0 otherwise. (End users can move submodule
  gitdirs into $GIT_DIR/modules by running "git submodule
  absorbgitdirs".) Like submodule_name_to_gitdir()...

then it would be fine.

> @@ -1338,6 +1366,7 @@ struct submodule_parallel_fetch {
>  	int result;
>  
>  	struct string_list changed_submodule_names;
> +	struct string_list seen_submodule_names;

Could we unify the 2 lists instead of having 2 separate ones?

> @@ -1529,12 +1569,77 @@ get_fetch_task(struct submodule_parallel_fetch *spf,
>  	return NULL;
>  }
>  
> +static struct fetch_task *
> +get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
> +			    const char **default_argv, struct strbuf *err)
> +{
> +	for (; spf->changed_count < spf->changed_submodule_names.nr;
> +	     spf->changed_count++) {
> +		struct string_list_item item =
> +			spf->changed_submodule_names.items[spf->changed_count];
> +		struct changed_submodule_data *cs_data = item.util;
> +		struct fetch_task *task;
> +
> +		/*
> +		 * We might have already considered this submodule
> +		 * because we saw it in the index.
> +		 */
> +		if (string_list_lookup(&spf->seen_submodule_names, item.string))
> +			continue;
> +
> +		task = fetch_task_create(spf->r, cs_data->path,
> +					 cs_data->super_oid);
> +		if (!task)
> +			continue;
> +
> +		switch (get_fetch_recurse_config(task->sub, spf)) {
> +		default:
> +		case RECURSE_SUBMODULES_DEFAULT:
> +		case RECURSE_SUBMODULES_ON_DEMAND:
> +			*default_argv = "on-demand";
> +			break;
> +		case RECURSE_SUBMODULES_ON:
> +			*default_argv = "yes";
> +			break;
> +		case RECURSE_SUBMODULES_OFF:
> +			continue;
> +		}
> +
> +		task->repo = get_submodule_repo_for(spf->r, task->sub->path,
> +						    cs_data->super_oid);
> +		if (!task->repo) {
> +			fetch_task_release(task);
> +			free(task);
> +
> +			strbuf_addf(err, _("Could not access submodule '%s'\n"),
> +				    cs_data->path);
> +			continue;
> +		}
> +		if (!is_tree_submodule_active(spf->r, cs_data->super_oid,
> +					      task->sub->path))
> +			continue;
> +
> +		if (!spf->quiet)
> +			strbuf_addf(err,
> +				    _("Fetching submodule %s%s at commit %s\n"),
> +				    spf->prefix, task->sub->path,
> +				    find_unique_abbrev(cs_data->super_oid,
> +						       DEFAULT_ABBREV));
> +		spf->changed_count++;
> +		return task;
> +	}
> +	return NULL;
> +}

This is very similar to get_fetch_task_from_index(). Both:
 1. loop over something
 2. exit early if the submodule name is seen
 3. create the fetch task
 4. set the "recurse config"
 5. get the submodule repo
 6. if success, increment a counter
 7. if failure, check for some conditions and maybe append to err

Could a function be refactored that does 2-5?

> +test_expect_success "'--recurse-submodules=on-demand' should fetch submodule commits if the submodule is changed but the index has no submodules" '
> +	git -C downstream fetch --recurse-submodules &&

First of all, thanks for updating the test - it is much easier to
understand now.

About this line, we shouldn't use the code being tested to set up (we're
testing "fetch --recurse-submodules", so we shouldn't use the same
command to set up). Also, if we don't have confidence in the starting
state, it may be a sign to write it out more explicitly instead of
relying on a complicated command to do the right thing.

However, in this case, I don't think we need this. All we need is to see
that the test contains a new superproject commit that points to a new
submodule commit (and recursively). So we don't need this line.

> +	# Create new superproject commit with updated submodules
> +	add_upstream_commit &&
> +	(
> +		cd submodule &&
> +		(
> +			cd subdir/deepsubmodule &&
> +			git fetch &&
> +			git checkout -q FETCH_HEAD
> +		) &&
> +		git add subdir/deepsubmodule &&
> +		git commit -m "new deep submodule"
> +	) &&
> +	git add submodule &&
> +	git commit -m "new submodule" &&

I thought add_upstream_commit() would do this, but apparently it just
adds commits to the submodules (which works for the earlier tests that
just tested that the submodules were fetched, but not for this one). I
think that all this should go into its own function.

Also, I understand that "git fetch" is there to pick up the commit we
made in add_upstream_commit, but this indirection is unnecessary in a
test, I think. Can we not use add_upstream_commit and just create our
own in subdir/deepsubmodule? (This might conflict with subsequent tests
that use the old scheme, but I think that it should be fine.)

> +	# Fetch the new superproject commit
> +	(
> +		cd downstream &&
> +		git switch --recurse-submodules no-submodules &&
> +		git fetch --recurse-submodules=on-demand >../actual.out 2>../actual.err
> +	) &&
> +	test_must_be_empty actual.out &&
> +	git rev-parse --short HEAD >superhead &&
> +	git -C submodule rev-parse --short HEAD >subhead &&
> +	git -C deepsubmodule rev-parse --short HEAD >deephead &&

These >superhead lines would not be necessary if we had our own
function.

> +# In downstream, init "submodule2", but do not check it out while
> +# fetching. This lets us assert that unpopulated submodules can be
> +# fetched.

Firstly, "In upstream", I think? You want to fetch from it, so it has to
be upstream.

Secondly, is this test needed? I thought the case in which the worktree
has no submodules would be sufficient to test that unpopulated
submodules can be fetched.

> +test_expect_success "'--recurse-submodules' should fetch submodule commits in changed submodules and the index" '

[snip]

> +	git checkout super &&
> +	(
> +		cd downstream &&
> +		git fetch --recurse-submodules >../actual.out 2>../actual.err
> +	) &&
> +	test_must_be_empty actual.out &&
> +
> +	# Assert that the submodules in the super branch are fetched
> +	git rev-parse --short HEAD >superhead &&
> +	git -C submodule rev-parse --short HEAD >subhead &&
> +	git -C deepsubmodule rev-parse --short HEAD >deephead &&
> +	verify_fetch_result actual.err &&
> +	# grep for the exact line to check that the submodule is read
> +	# from the index, not from a commit
> +	grep "^Fetching submodule submodule\$" actual.err &&

Instead of a grep, I think this should be done by precisely specifying
what to fetch in the "git fetch" invocation, and then checking that the
submodule has commits that it didn't have before.

In addition, I think the following cases also need to be tested:
 - two fetched commits have submodules of the same name but different
   URL
 - a fetched commit and a commit in the index have submodules of the
   same name but different URL
Ævar Arnfjörð Bjarmason Feb. 15, 2022, 10:06 p.m. UTC | #2
On Wed, Feb 16 2022, Glen Choo wrote:

> +		switch (get_fetch_recurse_config(task->sub, spf)) {
> +		default:

Unfortunately get_fetch_recurse_config() returns "int", and the enum
fields here defined in submodule.h aren't of a named type, so we can't
get the advantage of a complier check for exhaustive enum member
checking here...

> +		case RECURSE_SUBMODULES_DEFAULT:
> +		case RECURSE_SUBMODULES_ON_DEMAND:
> +			*default_argv = "on-demand";
> +			break;
> +		case RECURSE_SUBMODULES_ON:
> +			*default_argv = "yes";
> +			break;
> +		case RECURSE_SUBMODULES_OFF:
> +			continue;

...in any case there's a lot more of them, so just having this "default"
case seems to make sense...
Glen Choo Feb. 16, 2022, 5:46 a.m. UTC | #3
Jonathan Tan <jonathantanmy@google.com> writes:

> Glen Choo <chooglen@google.com> writes:
>> Teach "git fetch" to fetch cloned, changed submodules regardless of
>> whether they are populated (this is in addition to the current behavior
>> of fetching populated submodules).
>
> I think add a note that the current behavior is regardless of what is
> being fetched. So, maybe something like:
>
>   Teach "git fetch" to fetch cloned, changed submodules regardless of
>   whether they are populated. This is in addition to the current behavior
>   of fetching populated submodules (which happens regardless of what was
>   fetched in the superproject, or even if nothing was fetched in the
>   superproject).

Makes sense, thanks. This description is true, though a bit misleading
in the [--recurse-submodules=on-demand] case - if nothing was fetched,
on-demand would still try to fetch submodules, though no submodules
would be fetched. I'll tweak it a little.

>> As I mentioned in the cover letter, I'm not entirely happy with the
>> name repo_has_absorbed_submodules() - it's not a standardized term AFAIK
>> and it's a little clunky.
>> 
>> "absorbed submodule" is just a stand-in for "submodule in .git/modules",
>> so if we have a better term for "submodule in .git/modules", let's use
>> that instead.
>
> I think that this is OK if the doc comment is updated. I'll make the
> suggestion in the appropriate place below.
>> +/*
>> + * Returns 1 if the repo has absorbed submodule gitdirs, and 0
>> + * otherwise. Like submodule_name_to_gitdir(), this checks
>> + * $GIT_DIR/modules, not $GIT_COMMON_DIR.
>> + */
>
> I think that if you replace the doc comment with something like:
>
>   Returns 1 if there is at least one submodule gitdir in
>   $GIT_DIR/modules, and 0 otherwise. (End users can move submodule
>   gitdirs into $GIT_DIR/modules by running "git submodule
>   absorbgitdirs".) Like submodule_name_to_gitdir()...
>
> then it would be fine.

Thanks! Sounds good.

>> @@ -1338,6 +1366,7 @@ struct submodule_parallel_fetch {
>>  	int result;
>>  
>>  	struct string_list changed_submodule_names;
>> +	struct string_list seen_submodule_names;
>
> Could we unify the 2 lists instead of having 2 separate ones?

If I understand the suggestion correctly (Can we combine the two lists
into a single changed_submodule_names list?) I don't think we can - at
least not without other changes. The intent behind seen_submodule_names
is to tell get_fetch_task_from_changed() which changed_submodule_names
items can be ignored. If we only used one list, we wouldn't be able to
tell whether we had already considered the submodule or not. If we
stored this info elsewhere, e.g. an extra field in
changed_submodule_data, then we could use a single list.

>
>> @@ -1529,12 +1569,77 @@ get_fetch_task(struct submodule_parallel_fetch *spf,
>>  	return NULL;
>>  }
>>  
>> +static struct fetch_task *
>> +get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
>> +			    const char **default_argv, struct strbuf *err)
>> +{
>> +	for (; spf->changed_count < spf->changed_submodule_names.nr;
>> +	     spf->changed_count++) {
>> +		struct string_list_item item =
>> +			spf->changed_submodule_names.items[spf->changed_count];
>> +		struct changed_submodule_data *cs_data = item.util;
>> +		struct fetch_task *task;
>> +
>> +		/*
>> +		 * We might have already considered this submodule
>> +		 * because we saw it in the index.
>> +		 */
>> +		if (string_list_lookup(&spf->seen_submodule_names, item.string))
>> +			continue;
>> +
>> +		task = fetch_task_create(spf->r, cs_data->path,
>> +					 cs_data->super_oid);
>> +		if (!task)
>> +			continue;
>> +
>> +		switch (get_fetch_recurse_config(task->sub, spf)) {
>> +		default:
>> +		case RECURSE_SUBMODULES_DEFAULT:
>> +		case RECURSE_SUBMODULES_ON_DEMAND:
>> +			*default_argv = "on-demand";
>> +			break;
>> +		case RECURSE_SUBMODULES_ON:
>> +			*default_argv = "yes";
>> +			break;
>> +		case RECURSE_SUBMODULES_OFF:
>> +			continue;
>> +		}
>> +
>> +		task->repo = get_submodule_repo_for(spf->r, task->sub->path,
>> +						    cs_data->super_oid);
>> +		if (!task->repo) {
>> +			fetch_task_release(task);
>> +			free(task);
>> +
>> +			strbuf_addf(err, _("Could not access submodule '%s'\n"),
>> +				    cs_data->path);
>> +			continue;
>> +		}
>> +		if (!is_tree_submodule_active(spf->r, cs_data->super_oid,
>> +					      task->sub->path))
>> +			continue;
>> +
>> +		if (!spf->quiet)
>> +			strbuf_addf(err,
>> +				    _("Fetching submodule %s%s at commit %s\n"),
>> +				    spf->prefix, task->sub->path,
>> +				    find_unique_abbrev(cs_data->super_oid,
>> +						       DEFAULT_ABBREV));
>> +		spf->changed_count++;
>> +		return task;
>> +	}
>> +	return NULL;
>> +}
>
> This is very similar to get_fetch_task_from_index(). Both:
>  1. loop over something
>  2. exit early if the submodule name is seen
>  3. create the fetch task
>  4. set the "recurse config"
>  5. get the submodule repo
>  6. if success, increment a counter
>  7. if failure, check for some conditions and maybe append to err
>
> Could a function be refactored that does 2-5?

Hm, it makes sense. I don't see a reason for 2-5 to be different for
the different functions.

>
>> +test_expect_success "'--recurse-submodules=on-demand' should fetch submodule commits if the submodule is changed but the index has no submodules" '
>> +	git -C downstream fetch --recurse-submodules &&
>
> First of all, thanks for updating the test - it is much easier to
> understand now.
>
> About this line, we shouldn't use the code being tested to set up (we're
> testing "fetch --recurse-submodules", so we shouldn't use the same
> command to set up). Also, if we don't have confidence in the starting
> state, it may be a sign to write it out more explicitly instead of
> relying on a complicated command to do the right thing.

True. I think the easiest way to do this would be the
"porcelain-downstream" you suggested in v1. But as you mentioned...

> However, in this case, I don't think we need this. All we need is to see
> that the test contains a new superproject commit that points to a new
> submodule commit (and recursively). So we don't need this line.

this isn't necessary, so I don't know if this is worth the effort at the
moment. I'll tinker with it.

As for the line itself, you're right, we don't need this. The state of
the downstream was more important when we cared about the old branch
head (it's needed for test_cmp), but we no longer do.

>> +	# Create new superproject commit with updated submodules
>> +	add_upstream_commit &&
>> +	(
>> +		cd submodule &&
>> +		(
>> +			cd subdir/deepsubmodule &&
>> +			git fetch &&
>> +			git checkout -q FETCH_HEAD
>> +		) &&
>> +		git add subdir/deepsubmodule &&
>> +		git commit -m "new deep submodule"
>> +	) &&
>> +	git add submodule &&
>> +	git commit -m "new submodule" &&
>
> I thought add_upstream_commit() would do this, but apparently it just
> adds commits to the submodules (which works for the earlier tests that
> just tested that the submodules were fetched, but not for this one). I
> think that all this should go into its own function.
>
> Also, I understand that "git fetch" is there to pick up the commit we
> made in add_upstream_commit, but this indirection is unnecessary in a
> test, I think. Can we not use add_upstream_commit and just create our
> own in subdir/deepsubmodule? (This might conflict with subsequent tests
> that use the old scheme, but I think that it should be fine.)

I copy-pasted this from existing tests, but I'm not happy with how noisy
it is either. I'll tinker with this too.

>> +# In downstream, init "submodule2", but do not check it out while
>> +# fetching. This lets us assert that unpopulated submodules can be
>> +# fetched.
>
> Firstly, "In upstream", I think? You want to fetch from it, so it has to
> be upstream.

It is "in downstream" - we "git init" the upstream, but we need to "git
submodule update --init" (which wraps "git submodule init") in the
downstream. If we didn't init it in the downstream, downstream wouldn't
have the clone and wouldn't fetch.


> Secondly, is this test needed? I thought the case in which the worktree
> has no submodules would be sufficient to test that unpopulated
> submodules can be fetched.

I'd prefer to have this test because it tests the interaction between
populated and unpopulated submodules. e.g. in a previous iteration, I
only had the "no submodules" test, but accidentally reused the
submodule_parallel_fetch.count variable for both populated and
unpopulated submodules. The test suite didn't catch the bug - I only
noticed the bug by a stroke of luck.

>> +test_expect_success "'--recurse-submodules' should fetch submodule commits in changed submodules and the index" '
>
> [snip]
>
>> +	git checkout super &&
>> +	(
>> +		cd downstream &&
>> +		git fetch --recurse-submodules >../actual.out 2>../actual.err
>> +	) &&
>> +	test_must_be_empty actual.out &&
>> +
>> +	# Assert that the submodules in the super branch are fetched
>> +	git rev-parse --short HEAD >superhead &&
>> +	git -C submodule rev-parse --short HEAD >subhead &&
>> +	git -C deepsubmodule rev-parse --short HEAD >deephead &&
>> +	verify_fetch_result actual.err &&
>> +	# grep for the exact line to check that the submodule is read
>> +	# from the index, not from a commit
>> +	grep "^Fetching submodule submodule\$" actual.err &&
>
> Instead of a grep, I think this should be done by precisely specifying
> what to fetch in the "git fetch" invocation, and then checking that the
> submodule has commits that it didn't have before.

verify_fetch_result() already tells us that the submodule
has the new commit:

	if [ -f subhead ]; then
		grep "Fetching submodule submodule" $ACTUAL_ERR &&
		grep -E "\.\.$(cat subhead)\s+sub\s+-> origin/sub" $ACTUAL_ERR

but (by design) it does not tell us whether "git fetch" read the
.gitmodules config from the index or a commit. The additional grep with
"^$" tells us that we read from the index because it checks that the
info message is not "Fetching submodule submodule at commit <id>". We
want to have this check because we want "git fetch" to prefer the index
in the event that the submodule is both changed and populated.

> In addition, I think the following cases also need to be tested:
>  - two fetched commits have submodules of the same name but different
>    URL
>  - a fetched commit and a commit in the index have submodules of the
>    same name but different URL

It makes sense to test both cases. The former is a new edge case
introduced by this commit, while the latter is a concern before this
commit. I _believe_ that we already handle the latter gracefully, and
that the same logic can be used to handle the former, but I don't think
we have any tests proving either of these hypotheses.
Glen Choo Feb. 16, 2022, 9:11 a.m. UTC | #4
Glen Choo <chooglen@google.com> writes:

> Jonathan Tan <jonathantanmy@google.com> writes:
>
>> Glen Choo <chooglen@google.com> writes:
>>> +	# Create new superproject commit with updated submodules
>>> +	add_upstream_commit &&
>>> +	(
>>> +		cd submodule &&
>>> +		(
>>> +			cd subdir/deepsubmodule &&
>>> +			git fetch &&
>>> +			git checkout -q FETCH_HEAD
>>> +		) &&
>>> +		git add subdir/deepsubmodule &&
>>> +		git commit -m "new deep submodule"
>>> +	) &&
>>> +	git add submodule &&
>>> +	git commit -m "new submodule" &&
>>
>> I thought add_upstream_commit() would do this, but apparently it just
>> adds commits to the submodules (which works for the earlier tests that
>> just tested that the submodules were fetched, but not for this one). I
>> think that all this should go into its own function.

I'm testing out a function that does exactly what these lines do, i.e.
create a superproject commit containing a submodule change containing a
deepsubmodule change. That works pretty well and it makes sense in the
context of the tests.

>> Also, I understand that "git fetch" is there to pick up the commit we
>> made in add_upstream_commit, but this indirection is unnecessary in a
>> test, I think. Can we not use add_upstream_commit and just create our
>> own in subdir/deepsubmodule? (This might conflict with subsequent tests
>> that use the old scheme, but I think that it should be fine.)

We can avoid the "git fetch" if we first need to fix an inconsistency in
how the submodules are set up. Right now, we have:

  test_expect_success setup '
    mkdir deepsubmodule &&
    [...]
    mkdir submodule &&
    (
    [...]
      git submodule add "$pwd/deepsubmodule" subdir/deepsubmodule &&
      git commit -a -m new &&
      git branch -M sub
    ) &&
    git submodule add "$pwd/submodule" submodule &&
    [...]
    (
      cd downstream &&
      git submodule update --init --recursive
    )
  '

resulting in a directory structure like:

$pwd
|_submodule
  |_subdir
    |_deepsubmodule
|_deepsubmodule

and upstream/downstream dependencies like:

upstream                             downstream            
--------                             ----------
$pwd/deepsubmodule                   $pwd/downstream/submodule/subdir/deepsubmodule (SUT)
                                     $pwd/submodule/subdir/deepsubmodule


So we can't create the commit in submodule/subdir/deepsubmodule, because
that's not where our SUT would fetch from.

Instead, we could fix this by having a more consistent
upstream/downstream structure:

$pwd
|_submodule
  |_subdir
    |_deepsubmodule

upstream                             downstream            
--------                             ----------
$pwd/submodule/subdir/deepsubmodule  $pwd/downstream/submodule/subdir/deepsubmodule (SUT)

This seems more convenient to test, but before I commit to this, is
there a downside to this that I'm not seeing?
Ævar Arnfjörð Bjarmason Feb. 16, 2022, 9:39 a.m. UTC | #5
On Wed, Feb 16 2022, Glen Choo wrote:

> Glen Choo <chooglen@google.com> writes:
>
>> Jonathan Tan <jonathantanmy@google.com> writes:
>>
>>> Glen Choo <chooglen@google.com> writes:
>>>> +	# Create new superproject commit with updated submodules
>>>> +	add_upstream_commit &&
>>>> +	(
>>>> +		cd submodule &&
>>>> +		(
>>>> +			cd subdir/deepsubmodule &&
>>>> +			git fetch &&
>>>> +			git checkout -q FETCH_HEAD
>>>> +		) &&
>>>> +		git add subdir/deepsubmodule &&
>>>> +		git commit -m "new deep submodule"
>>>> +	) &&
>>>> +	git add submodule &&
>>>> +	git commit -m "new submodule" &&
>>>
>>> I thought add_upstream_commit() would do this, but apparently it just
>>> adds commits to the submodules (which works for the earlier tests that
>>> just tested that the submodules were fetched, but not for this one). I
>>> think that all this should go into its own function.
>
> I'm testing out a function that does exactly what these lines do, i.e.
> create a superproject commit containing a submodule change containing a
> deepsubmodule change. That works pretty well and it makes sense in the
> context of the tests.
>
>>> Also, I understand that "git fetch" is there to pick up the commit we
>>> made in add_upstream_commit, but this indirection is unnecessary in a
>>> test, I think. Can we not use add_upstream_commit and just create our
>>> own in subdir/deepsubmodule? (This might conflict with subsequent tests
>>> that use the old scheme, but I think that it should be fine.)
>
> We can avoid the "git fetch" if we first need to fix an inconsistency in
> how the submodules are set up. Right now, we have:
>
>   test_expect_success setup '
>     mkdir deepsubmodule &&
>     [...]
>     mkdir submodule &&
>     (
>     [...]
>       git submodule add "$pwd/deepsubmodule" subdir/deepsubmodule &&
>       git commit -a -m new &&
>       git branch -M sub
>     ) &&
>     git submodule add "$pwd/submodule" submodule &&
>     [...]
>     (
>       cd downstream &&
>       git submodule update --init --recursive
>     )
>   '
>
> resulting in a directory structure like:
>
> $pwd
> |_submodule
>   |_subdir
>     |_deepsubmodule
> |_deepsubmodule
>
> and upstream/downstream dependencies like:
>
> upstream                             downstream            
> --------                             ----------
> $pwd/deepsubmodule                   $pwd/downstream/submodule/subdir/deepsubmodule (SUT)
>                                      $pwd/submodule/subdir/deepsubmodule
>
>
> So we can't create the commit in submodule/subdir/deepsubmodule, because
> that's not where our SUT would fetch from.
>
> Instead, we could fix this by having a more consistent
> upstream/downstream structure:
>
> $pwd
> |_submodule
>   |_subdir
>     |_deepsubmodule
>
> upstream                             downstream            
> --------                             ----------
> $pwd/submodule/subdir/deepsubmodule  $pwd/downstream/submodule/subdir/deepsubmodule (SUT)
>
> This seems more convenient to test, but before I commit to this, is
> there a downside to this that I'm not seeing?

Won't this sort of arrangement create N copies of e.g. a zlib.git or
some other common library that might be used by N submodules.

But I haven't read all the context, I'm assuming you're talking about
how we store in-tree a/b and x/y/b submodules now, we store those in
.git/ both as .git/modules/b.git or whatever? I can't remember ... :)

Whatever we do now I do think the caveat I've noted above is interesting
when it comes to submodule design, e.g. if both git.git and
some-random-thing.git both bring in the same sha1collisiondetection.git
from the same github URL should those be the same in our underlying
storage?

I think the answer to that would ideally be both "yes" and
"no".

I.e. "yes" because it's surely handy for "git fetch", now you don't need to
fetch the same stuff twice in the common case of just updating all our
recursive submodules.

And also "no" because maybe some users would really consider them
different. E.g. the you may want to "cd git/" and adjust the git.git one
and create a branch there for some hotfix it needs, which would not be
needed/wanted by some-random-thing.git.

Hrm...
Glen Choo Feb. 16, 2022, 5:33 p.m. UTC | #6
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes:

> On Wed, Feb 16 2022, Glen Choo wrote:
>
>> Glen Choo <chooglen@google.com> writes:
>>
>>> Jonathan Tan <jonathantanmy@google.com> writes:
>>>
>>>> Glen Choo <chooglen@google.com> writes:
>>>>> +	# Create new superproject commit with updated submodules
>>>>> +	add_upstream_commit &&
>>>>> +	(
>>>>> +		cd submodule &&
>>>>> +		(
>>>>> +			cd subdir/deepsubmodule &&
>>>>> +			git fetch &&
>>>>> +			git checkout -q FETCH_HEAD
>>>>> +		) &&
>>>>> +		git add subdir/deepsubmodule &&
>>>>> +		git commit -m "new deep submodule"
>>>>> +	) &&
>>>>> +	git add submodule &&
>>>>> +	git commit -m "new submodule" &&
>>>>
>>>> I thought add_upstream_commit() would do this, but apparently it just
>>>> adds commits to the submodules (which works for the earlier tests that
>>>> just tested that the submodules were fetched, but not for this one). I
>>>> think that all this should go into its own function.
>>
>> I'm testing out a function that does exactly what these lines do, i.e.
>> create a superproject commit containing a submodule change containing a
>> deepsubmodule change. That works pretty well and it makes sense in the
>> context of the tests.
>>
>>>> Also, I understand that "git fetch" is there to pick up the commit we
>>>> made in add_upstream_commit, but this indirection is unnecessary in a
>>>> test, I think. Can we not use add_upstream_commit and just create our
>>>> own in subdir/deepsubmodule? (This might conflict with subsequent tests
>>>> that use the old scheme, but I think that it should be fine.)
>>
>> We can avoid the "git fetch" if we first need to fix an inconsistency in
>> how the submodules are set up. Right now, we have:
>>
>>   test_expect_success setup '
>>     mkdir deepsubmodule &&
>>     [...]
>>     mkdir submodule &&
>>     (
>>     [...]
>>       git submodule add "$pwd/deepsubmodule" subdir/deepsubmodule &&
>>       git commit -a -m new &&
>>       git branch -M sub
>>     ) &&
>>     git submodule add "$pwd/submodule" submodule &&
>>     [...]
>>     (
>>       cd downstream &&
>>       git submodule update --init --recursive
>>     )
>>   '
>>
>> resulting in a directory structure like:
>>
>> $pwd
>> |_submodule
>>   |_subdir
>>     |_deepsubmodule
>> |_deepsubmodule
>>
>> and upstream/downstream dependencies like:
>>
>> upstream                             downstream            
>> --------                             ----------
>> $pwd/deepsubmodule                   $pwd/downstream/submodule/subdir/deepsubmodule (SUT)
>>                                      $pwd/submodule/subdir/deepsubmodule
>>
>>
>> So we can't create the commit in submodule/subdir/deepsubmodule, because
>> that's not where our SUT would fetch from.
>>
>> Instead, we could fix this by having a more consistent
>> upstream/downstream structure:
>>
>> $pwd
>> |_submodule
>>   |_subdir
>>     |_deepsubmodule
>>
>> upstream                             downstream            
>> --------                             ----------
>> $pwd/submodule/subdir/deepsubmodule  $pwd/downstream/submodule/subdir/deepsubmodule (SUT)
>>
>> This seems more convenient to test, but before I commit to this, is
>> there a downside to this that I'm not seeing?
>
> Won't this sort of arrangement create N copies of e.g. a zlib.git or
> some other common library that might be used by N submodules.
>
> But I haven't read all the context, I'm assuming you're talking about
> how we store in-tree a/b and x/y/b submodules now, we store those in
> .git/ both as .git/modules/b.git or whatever? I can't remember ... :)

Ah the problem I'm describing is much simpler, it's just "how do we want
our test setup (which has submodules) to look".

But we can also consider the question you are asking :)

> Whatever we do now I do think the caveat I've noted above is interesting
> when it comes to submodule design, e.g. if both git.git and
> some-random-thing.git both bring in the same sha1collisiondetection.git
> from the same github URL should those be the same in our underlying
> storage?
>
> I think the answer to that would ideally be both "yes" and
> "no".
>
> I.e. "yes" because it's surely handy for "git fetch", now you don't need to
> fetch the same stuff twice in the common case of just updating all our
> recursive submodules.

Hm, and it would save space on disk.

> And also "no" because maybe some users would really consider them
> different. E.g. the you may want to "cd git/" and adjust the git.git one
> and create a branch there for some hotfix it needs, which would not be
> needed/wanted by some-random-thing.git.

I don't think we could say "yes" for all users, because the subset of
users you describe here will probably appreciate them being separate.

But I can imagine doing this manually, like a "git submodule dedupe",
that lets users who really need it can opt into this risky setup where
submodules are shared. Does anyone really need it though? I'm not sure
yet.
diff mbox series

Patch

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index e967ff1874..38dad13683 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -185,15 +185,23 @@  endif::git-pull[]
 ifndef::git-pull[]
 --recurse-submodules[=yes|on-demand|no]::
 	This option controls if and under what conditions new commits of
-	populated submodules should be fetched too. It can be used as a
-	boolean option to completely disable recursion when set to 'no' or to
-	unconditionally recurse into all populated submodules when set to
-	'yes', which is the default when this option is used without any
-	value. Use 'on-demand' to only recurse into a populated submodule
-	when the superproject retrieves a commit that updates the submodule's
-	reference to a commit that isn't already in the local submodule
-	clone. By default, 'on-demand' is used, unless
-	`fetch.recurseSubmodules` is set (see linkgit:git-config[1]).
+	submodules should be fetched too. When recursing through submodules,
+	`git fetch` always attempts to fetch "changed" submodules, that is, a
+	submodule that has commits that are referenced by a newly fetched
+	superproject commit but are missing in the local submodule clone. A
+	changed submodule can be fetched as long as it is present locally e.g.
+	in `$GIT_DIR/modules/` (see linkgit:gitsubmodules[7]); if the upstream
+	adds a new submodule, that submodule cannot be fetched until it is
+	cloned e.g. by `git submodule update`.
++
+When set to 'on-demand', only changed submodules are fetched. When set
+to 'yes', all populated submodules are fetched and submodules that are
+both unpopulated and changed are fetched. When set to 'no', submodules
+are never fetched.
++
+When unspecified, this uses the value of `fetch.recurseSubmodules` if it
+is set (see linkgit:git-config[1]), defaulting to 'on-demand' if unset.
+When this option is used without any value, it defaults to 'yes'.
 endif::git-pull[]
 
 -j::
diff --git a/Documentation/git-fetch.txt b/Documentation/git-fetch.txt
index 550c16ca61..e9d364669a 100644
--- a/Documentation/git-fetch.txt
+++ b/Documentation/git-fetch.txt
@@ -287,12 +287,10 @@  include::transfer-data-leaks.txt[]
 
 BUGS
 ----
-Using --recurse-submodules can only fetch new commits in already checked
-out submodules right now. When e.g. upstream added a new submodule in the
-just fetched commits of the superproject the submodule itself cannot be
-fetched, making it impossible to check out that submodule later without
-having to do a fetch again. This is expected to be fixed in a future Git
-version.
+Using --recurse-submodules can only fetch new commits in submodules that are
+present locally e.g. in `$GIT_DIR/modules/`. If the upstream adds a new
+submodule, that submodule cannot be fetched until it is cloned e.g. by `git
+submodule update`. This is expected to be fixed in a future Git version.
 
 SEE ALSO
 --------
diff --git a/builtin/fetch.c b/builtin/fetch.c
index f7abbc31ff..faaf89f637 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -2122,13 +2122,13 @@  int cmd_fetch(int argc, const char **argv, const char *prefix)
 			max_children = fetch_parallel_config;
 
 		add_options_to_argv(&options);
-		result = fetch_populated_submodules(the_repository,
-						    &options,
-						    submodule_prefix,
-						    recurse_submodules,
-						    recurse_submodules_default,
-						    verbosity < 0,
-						    max_children);
+		result = fetch_submodules(the_repository,
+					  &options,
+					  submodule_prefix,
+					  recurse_submodules,
+					  recurse_submodules_default,
+					  verbosity < 0,
+					  max_children);
 		strvec_clear(&options);
 	}
 
diff --git a/submodule.c b/submodule.c
index 22d8a1ca12..3558fddeb7 100644
--- a/submodule.c
+++ b/submodule.c
@@ -811,6 +811,16 @@  static const char *default_name_or_path(const char *path_or_name)
  * member of the changed submodule string_list_item.
  */
 struct changed_submodule_data {
+	/*
+	 * The first superproject commit in the rev walk that points to the
+	 * submodule.
+	 */
+	const struct object_id *super_oid;
+	/*
+	 * Path to the submodule in the superproject commit referenced
+	 * by 'super_oid'.
+	 */
+	char *path;
 	/* The submodule commits that have changed in the rev walk. */
 	struct oid_array new_commits;
 };
@@ -818,6 +828,7 @@  struct changed_submodule_data {
 static void changed_submodule_data_clear(struct changed_submodule_data *cs_data)
 {
 	oid_array_clear(&cs_data->new_commits);
+	free(cs_data->path);
 }
 
 static void collect_changed_submodules_cb(struct diff_queue_struct *q,
@@ -865,6 +876,8 @@  static void collect_changed_submodules_cb(struct diff_queue_struct *q,
 		if (!item->util)
 			item->util = xcalloc(1, sizeof(struct changed_submodule_data));
 		cs_data = item->util;
+		cs_data->super_oid = commit_oid;
+		cs_data->path = xstrdup(p->two->path);
 		oid_array_append(&cs_data->new_commits, &p->two->oid);
 	}
 }
@@ -1248,14 +1261,28 @@  void check_for_new_submodule_commits(struct object_id *oid)
 	oid_array_append(&ref_tips_after_fetch, oid);
 }
 
+/*
+ * Returns 1 if the repo has absorbed submodule gitdirs, and 0
+ * otherwise. Like submodule_name_to_gitdir(), this checks
+ * $GIT_DIR/modules, not $GIT_COMMON_DIR.
+ */
+static int repo_has_absorbed_submodules(struct repository *r)
+{
+	struct strbuf buf = STRBUF_INIT;
+
+	strbuf_repo_git_path(&buf, r, "modules/");
+	return file_exists(buf.buf) && !is_empty_dir(buf.buf);
+}
+
 static void calculate_changed_submodule_paths(struct repository *r,
 		struct string_list *changed_submodule_names)
 {
 	struct strvec argv = STRVEC_INIT;
 	struct string_list_item *name;
 
-	/* No need to check if there are no submodules configured */
-	if (!submodule_from_path(r, NULL, NULL))
+	/* No need to check if no submodules would be fetched */
+	if (!submodule_from_path(r, NULL, NULL) &&
+	    !repo_has_absorbed_submodules(r))
 		return;
 
 	strvec_push(&argv, "--"); /* argv[0] program name */
@@ -1328,7 +1355,8 @@  int submodule_touches_in_range(struct repository *r,
 }
 
 struct submodule_parallel_fetch {
-	int count;
+	int index_count;
+	int changed_count;
 	struct strvec args;
 	struct repository *r;
 	const char *prefix;
@@ -1338,6 +1366,7 @@  struct submodule_parallel_fetch {
 	int result;
 
 	struct string_list changed_submodule_names;
+	struct string_list seen_submodule_names;
 
 	/* Pending fetches by OIDs */
 	struct fetch_task **oid_fetch_tasks;
@@ -1348,6 +1377,7 @@  struct submodule_parallel_fetch {
 #define SPF_INIT { \
 	.args = STRVEC_INIT, \
 	.changed_submodule_names = STRING_LIST_INIT_DUP, \
+	.seen_submodule_names = STRING_LIST_INIT_DUP, \
 	.submodules_with_errors = STRBUF_INIT, \
 }
 
@@ -1462,11 +1492,12 @@  static struct repository *get_submodule_repo_for(struct repository *r,
 }
 
 static struct fetch_task *
-get_fetch_task(struct submodule_parallel_fetch *spf,
-	       const char **default_argv, struct strbuf *err)
+get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
+			  const char **default_argv, struct strbuf *err)
 {
-	for (; spf->count < spf->r->index->cache_nr; spf->count++) {
-		const struct cache_entry *ce = spf->r->index->cache[spf->count];
+	for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) {
+		const struct cache_entry *ce =
+			spf->r->index->cache[spf->index_count];
 		struct fetch_task *task;
 
 		if (!S_ISGITLINK(ce->ce_mode))
@@ -1476,6 +1507,15 @@  get_fetch_task(struct submodule_parallel_fetch *spf,
 		if (!task)
 			continue;
 
+		/*
+		 * We might have already considered this submodule
+		 * because we saw it when iterating the changed
+		 * submodule names.
+		 */
+		if (string_list_lookup(&spf->seen_submodule_names,
+				       task->sub->name))
+			continue;
+
 		switch (get_fetch_recurse_config(task->sub, spf))
 		{
 		default:
@@ -1501,7 +1541,7 @@  get_fetch_task(struct submodule_parallel_fetch *spf,
 				strbuf_addf(err, _("Fetching submodule %s%s\n"),
 					    spf->prefix, ce->name);
 
-			spf->count++;
+			spf->index_count++;
 			return task;
 		} else {
 			struct strbuf empty_submodule_path = STRBUF_INIT;
@@ -1529,12 +1569,77 @@  get_fetch_task(struct submodule_parallel_fetch *spf,
 	return NULL;
 }
 
+static struct fetch_task *
+get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
+			    const char **default_argv, struct strbuf *err)
+{
+	for (; spf->changed_count < spf->changed_submodule_names.nr;
+	     spf->changed_count++) {
+		struct string_list_item item =
+			spf->changed_submodule_names.items[spf->changed_count];
+		struct changed_submodule_data *cs_data = item.util;
+		struct fetch_task *task;
+
+		/*
+		 * We might have already considered this submodule
+		 * because we saw it in the index.
+		 */
+		if (string_list_lookup(&spf->seen_submodule_names, item.string))
+			continue;
+
+		task = fetch_task_create(spf->r, cs_data->path,
+					 cs_data->super_oid);
+		if (!task)
+			continue;
+
+		switch (get_fetch_recurse_config(task->sub, spf)) {
+		default:
+		case RECURSE_SUBMODULES_DEFAULT:
+		case RECURSE_SUBMODULES_ON_DEMAND:
+			*default_argv = "on-demand";
+			break;
+		case RECURSE_SUBMODULES_ON:
+			*default_argv = "yes";
+			break;
+		case RECURSE_SUBMODULES_OFF:
+			continue;
+		}
+
+		task->repo = get_submodule_repo_for(spf->r, task->sub->path,
+						    cs_data->super_oid);
+		if (!task->repo) {
+			fetch_task_release(task);
+			free(task);
+
+			strbuf_addf(err, _("Could not access submodule '%s'\n"),
+				    cs_data->path);
+			continue;
+		}
+		if (!is_tree_submodule_active(spf->r, cs_data->super_oid,
+					      task->sub->path))
+			continue;
+
+		if (!spf->quiet)
+			strbuf_addf(err,
+				    _("Fetching submodule %s%s at commit %s\n"),
+				    spf->prefix, task->sub->path,
+				    find_unique_abbrev(cs_data->super_oid,
+						       DEFAULT_ABBREV));
+		spf->changed_count++;
+		return task;
+	}
+	return NULL;
+}
+
 static int get_next_submodule(struct child_process *cp, struct strbuf *err,
 			      void *data, void **task_cb)
 {
 	struct submodule_parallel_fetch *spf = data;
 	const char *default_argv = NULL;
-	struct fetch_task *task = get_fetch_task(spf, &default_argv, err);
+	struct fetch_task *task =
+		get_fetch_task_from_index(spf, &default_argv, err);
+	if (!task)
+		task = get_fetch_task_from_changed(spf, &default_argv, err);
 
 	if (task) {
 		struct strbuf submodule_prefix = STRBUF_INIT;
@@ -1555,6 +1660,7 @@  static int get_next_submodule(struct child_process *cp, struct strbuf *err,
 		*task_cb = task;
 
 		strbuf_release(&submodule_prefix);
+		string_list_insert(&spf->seen_submodule_names, task->sub->name);
 		return 1;
 	}
 
@@ -1669,11 +1775,11 @@  static int fetch_finish(int retvalue, struct strbuf *err,
 	return 0;
 }
 
-int fetch_populated_submodules(struct repository *r,
-			       const struct strvec *options,
-			       const char *prefix, int command_line_option,
-			       int default_option,
-			       int quiet, int max_parallel_jobs)
+int fetch_submodules(struct repository *r,
+		     const struct strvec *options,
+		     const char *prefix, int command_line_option,
+		     int default_option,
+		     int quiet, int max_parallel_jobs)
 {
 	int i;
 	struct submodule_parallel_fetch spf = SPF_INIT;
diff --git a/submodule.h b/submodule.h
index 784ceffc0e..61bebde319 100644
--- a/submodule.h
+++ b/submodule.h
@@ -88,12 +88,12 @@  int should_update_submodules(void);
  */
 const struct submodule *submodule_from_ce(const struct cache_entry *ce);
 void check_for_new_submodule_commits(struct object_id *oid);
-int fetch_populated_submodules(struct repository *r,
-			       const struct strvec *options,
-			       const char *prefix,
-			       int command_line_option,
-			       int default_option,
-			       int quiet, int max_parallel_jobs);
+int fetch_submodules(struct repository *r,
+		     const struct strvec *options,
+		     const char *prefix,
+		     int command_line_option,
+		     int default_option,
+		     int quiet, int max_parallel_jobs);
 unsigned is_submodule_modified(const char *path, int ignore_untracked);
 int submodule_uses_gitfile(const char *path);
 
diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh
index cb18f0ac21..df44757468 100755
--- a/t/t5526-fetch-submodules.sh
+++ b/t/t5526-fetch-submodules.sh
@@ -399,6 +399,201 @@  test_expect_success "'--recurse-submodules=on-demand' recurses as deep as necess
 	verify_fetch_result actual.err
 '
 
+# Test that we can fetch submodules in other branches by running fetch
+# in a commit that has no submodules.
+test_expect_success 'setup downstream branch without submodules' '
+	(
+		cd downstream &&
+		git checkout --recurse-submodules -b no-submodules &&
+		rm .gitmodules &&
+		git rm submodule &&
+		git add .gitmodules &&
+		git commit -m "no submodules" &&
+		git checkout --recurse-submodules super
+	)
+'
+
+test_expect_success "'--recurse-submodules=on-demand' should fetch submodule commits if the submodule is changed but the index has no submodules" '
+	git -C downstream fetch --recurse-submodules &&
+	# Create new superproject commit with updated submodules
+	add_upstream_commit &&
+	(
+		cd submodule &&
+		(
+			cd subdir/deepsubmodule &&
+			git fetch &&
+			git checkout -q FETCH_HEAD
+		) &&
+		git add subdir/deepsubmodule &&
+		git commit -m "new deep submodule"
+	) &&
+	git add submodule &&
+	git commit -m "new submodule" &&
+
+	# Fetch the new superproject commit
+	(
+		cd downstream &&
+		git switch --recurse-submodules no-submodules &&
+		git fetch --recurse-submodules=on-demand >../actual.out 2>../actual.err
+	) &&
+	test_must_be_empty actual.out &&
+	git rev-parse --short HEAD >superhead &&
+	git -C submodule rev-parse --short HEAD >subhead &&
+	git -C deepsubmodule rev-parse --short HEAD >deephead &&
+	verify_fetch_result actual.err &&
+
+	# Assert that the fetch happened at the non-HEAD commits
+	grep "Fetching submodule submodule at commit $superhead" actual.err &&
+	grep "Fetching submodule submodule/subdir/deepsubmodule at commit $subhead" actual.err
+'
+
+test_expect_success "'--recurse-submodules' should fetch submodule commits if the submodule is changed but the index has no submodules" '
+	# Fetch any leftover commits from other tests.
+	git -C downstream fetch --recurse-submodules &&
+	# Create new superproject commit with updated submodules
+	add_upstream_commit &&
+	(
+		cd submodule &&
+		(
+			cd subdir/deepsubmodule &&
+			git fetch &&
+			git checkout -q FETCH_HEAD
+		) &&
+		git add subdir/deepsubmodule &&
+		git commit -m "new deep submodule"
+	) &&
+	git add submodule &&
+	git commit -m "new submodule" &&
+
+	# Fetch the new superproject commit
+	(
+		cd downstream &&
+		git switch --recurse-submodules no-submodules &&
+		git fetch --recurse-submodules >../actual.out 2>../actual.err
+	) &&
+	test_must_be_empty actual.out &&
+	git rev-parse --short HEAD >superhead &&
+	git -C submodule rev-parse --short HEAD >subhead &&
+	git -C deepsubmodule rev-parse --short HEAD >deephead &&
+	verify_fetch_result actual.err &&
+
+	# Assert that the fetch happened at the non-HEAD commits
+	grep "Fetching submodule submodule at commit $superhead" actual.err &&
+	grep "Fetching submodule submodule/subdir/deepsubmodule at commit $subhead" actual.err
+'
+
+test_expect_success "'--recurse-submodules' should ignore changed, inactive submodules" '
+	# Fetch any leftover commits from other tests.
+	git -C downstream fetch --recurse-submodules &&
+	# Create new superproject commit with updated submodules
+	add_upstream_commit &&
+	(
+		cd submodule &&
+		(
+			cd subdir/deepsubmodule &&
+			git fetch &&
+			git checkout -q FETCH_HEAD
+		) &&
+		git add subdir/deepsubmodule &&
+		git commit -m "new deep submodule"
+	) &&
+	git add submodule &&
+	git commit -m "new submodule" &&
+
+	# Fetch the new superproject commit
+	(
+		cd downstream &&
+		git switch --recurse-submodules no-submodules &&
+		git -c submodule.submodule.active=false fetch --recurse-submodules >../actual.out 2>../actual.err
+	) &&
+	test_must_be_empty actual.out &&
+	git rev-parse --short HEAD >superhead &&
+	# Neither should be fetched because the submodule is inactive
+	rm subhead &&
+	rm deephead &&
+	verify_fetch_result actual.err
+'
+
+# In downstream, init "submodule2", but do not check it out while
+# fetching. This lets us assert that unpopulated submodules can be
+# fetched.
+test_expect_success 'setup downstream branch with other submodule' '
+	mkdir submodule2 &&
+	(
+		cd submodule2 &&
+		git init &&
+		echo sub2content >sub2file &&
+		git add sub2file &&
+		git commit -a -m new &&
+		git branch -M sub2
+	) &&
+	git checkout -b super-sub2-only &&
+	git submodule add "$pwd/submodule2" submodule2 &&
+	git commit -m "add sub2" &&
+	git checkout super &&
+	(
+		cd downstream &&
+		git fetch --recurse-submodules origin &&
+		git checkout super-sub2-only &&
+		# Explicitly run "git submodule update" because sub2 is new
+		# and has not been cloned.
+		git submodule update --init &&
+		git checkout --recurse-submodules super
+	)
+'
+
+test_expect_success "'--recurse-submodules' should fetch submodule commits in changed submodules and the index" '
+	# Fetch any leftover commits from other tests.
+	git -C downstream fetch --recurse-submodules &&
+	# Create new commit in origin/super
+	add_upstream_commit &&
+	(
+		cd submodule &&
+		(
+			cd subdir/deepsubmodule &&
+			git fetch &&
+			git checkout -q FETCH_HEAD
+		) &&
+		git add subdir/deepsubmodule &&
+		git commit -m "new deep submodule"
+	) &&
+	git add submodule &&
+	git commit -m "new submodule" &&
+
+	# Create new commit in origin/super-sub2-only
+	git checkout super-sub2-only &&
+	(
+		cd submodule2 &&
+		test_commit --no-tag foo
+	) &&
+	git add submodule2 &&
+	git commit -m "new submodule2" &&
+
+	git checkout super &&
+	(
+		cd downstream &&
+		git fetch --recurse-submodules >../actual.out 2>../actual.err
+	) &&
+	test_must_be_empty actual.out &&
+
+	# Assert that the submodules in the super branch are fetched
+	git rev-parse --short HEAD >superhead &&
+	git -C submodule rev-parse --short HEAD >subhead &&
+	git -C deepsubmodule rev-parse --short HEAD >deephead &&
+	verify_fetch_result actual.err &&
+	# grep for the exact line to check that the submodule is read
+	# from the index, not from a commit
+	grep "^Fetching submodule submodule\$" actual.err &&
+
+	# Assert that super-sub2-only and submodule2 were fetched even
+	# though another branch is checked out
+	super_sub2_only_head=$(git rev-parse --short super-sub2-only) &&
+	grep -E "\.\.${super_sub2_only_head}\s+super-sub2-only\s+-> origin/super-sub2-only" actual.err &&
+	grep "Fetching submodule submodule2 at commit $super_sub2_only_head" actual.err &&
+	sub2head=$(git -C submodule2 rev-parse --short HEAD) &&
+	grep -E "\.\.${sub2head}\s+sub2\s+-> origin/sub2" actual.err
+'
+
 test_expect_success "'--recurse-submodules=on-demand' stops when no new submodule commits are found in the superproject (and ignores config)" '
 	add_upstream_commit &&
 	echo a >> file &&