Message ID | 20230207181706.363453-7-calvinwan@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | submodule: parallelize diff | expand |
On Tue, Feb 07 2023, Calvin Wan wrote: > diff --git a/diff-lib.c b/diff-lib.c > index 7101cfda3f..e18c886a80 100644 > --- a/diff-lib.c > +++ b/diff-lib.c > @@ -73,18 +73,24 @@ static int match_stat_with_submodule(struct diff_options *diffopt, > unsigned *dirty_submodule) > { > int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); > - if (S_ISGITLINK(ce->ce_mode)) { > - struct diff_flags orig_flags = diffopt->flags; > - if (!diffopt->flags.override_submodule_config) > - set_diffopt_flags_from_submodule_config(diffopt, ce->name); > - if (diffopt->flags.ignore_submodules) > - changed = 0; > - else if (!diffopt->flags.ignore_dirty_submodules && > - (!changed || diffopt->flags.dirty_submodules)) > - *dirty_submodule = is_submodule_modified(ce->name, > - diffopt->flags.ignore_untracked_in_submodules); > - diffopt->flags = orig_flags; > + struct diff_flags orig_flags; > + > + if (!S_ISGITLINK(ce->ce_mode)) > + return changed; > + > + orig_flags = diffopt->flags; > + if (!diffopt->flags.override_submodule_config) > + set_diffopt_flags_from_submodule_config(diffopt, ce->name); > + if (diffopt->flags.ignore_submodules) { > + changed = 0; > + goto cleanup; > } > + if (!diffopt->flags.ignore_dirty_submodules && > + (!changed || diffopt->flags.dirty_submodules)) > + *dirty_submodule = is_submodule_modified(ce->name, > + diffopt->flags.ignore_untracked_in_submodules); > +cleanup: > + diffopt->flags = orig_flags; > return changed; > } Parallel to reviewing your topic I started wondering if we couldn't get rid of this "orig_flags" flip-flopping, i.e. can't we just set the specific flags we want in output parameters. Anyway, having looked at this closely I think this patch should be dropped entirely. I don't understand how this refactoring is meant to make the end result easier to read, reason about, or how it helps the subsequent patch. In addition to the above diff in 7/7 you do (and that's the change this is meant to help): static int match_stat_with_submodule(struct diff_options *diffopt, const struct cache_entry *ce, struct stat *st, unsigned ce_option, - unsigned *dirty_submodule) + unsigned *dirty_submodule, int *defer_submodule_status, + unsigned *ignore_untracked) { int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); struct diff_flags orig_flags; + int defer = 0; if (!S_ISGITLINK(ce->ce_mode)) - return changed; + goto ret; orig_flags = diffopt->flags; if (!diffopt->flags.override_submodule_config) @@ -86,11 +92,20 @@ static int match_stat_with_submodule(struct diff_options *diffopt, goto cleanup; } if (!diffopt->flags.ignore_dirty_submodules && - (!changed || diffopt->flags.dirty_submodules)) - *dirty_submodule = is_submodule_modified(ce->name, + (!changed || diffopt->flags.dirty_submodules)) { + if (defer_submodule_status && *defer_submodule_status) { + defer = 1; + *ignore_untracked = diffopt->flags.ignore_untracked_in_submodules; + } else { + *dirty_submodule = is_submodule_modified(ce->name, diffopt->flags.ignore_untracked_in_submodules); + } + } cleanup: diffopt->flags = orig_flags; +ret: + if (defer_submodule_status) + *defer_submodule_status = defer; return changed; } But if I rebase out this 6/7 patch and solve the conflict for 7/7 it becomes: @@ -65,14 +66,20 @@ static int check_removed(const struct index_state *istate, const struct cache_en * Return 1 when changes are detected, 0 otherwise. If the DIRTY_SUBMODULES * option is set, the caller does not only want to know if a submodule is * modified at all but wants to know all the conditions that are met (new - * commits, untracked content and/or modified content). + * commits, untracked content and/or modified content). If + * defer_submodule_status bit is set, dirty_submodule will be left to the + * caller to set. defer_submodule_status can also be set to 0 in this + * function if there is no need to check if the submodule is modified. */ static int match_stat_with_submodule(struct diff_options *diffopt, const struct cache_entry *ce, struct stat *st, unsigned ce_option, - unsigned *dirty_submodule) + unsigned *dirty_submodule, int *defer_submodule_status, + unsigned *ignore_untracked) { int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); + int defer = 0; + if (S_ISGITLINK(ce->ce_mode)) { struct diff_flags orig_flags = diffopt->flags; if (!diffopt->flags.override_submodule_config) @@ -80,11 +87,20 @@ static int match_stat_with_submodule(struct diff_options *diffopt, if (diffopt->flags.ignore_submodules) changed = 0; else if (!diffopt->flags.ignore_dirty_submodules && - (!changed || diffopt->flags.dirty_submodules)) - *dirty_submodule = is_submodule_modified(ce->name, - diffopt->flags.ignore_untracked_in_submodules); + (!changed || diffopt->flags.dirty_submodules)) { + if (defer_submodule_status && *defer_submodule_status) { + defer = 1; + *ignore_untracked = diffopt->flags.ignore_untracked_in_submodules; + } else { + *dirty_submodule = is_submodule_modified(ce->name, + diffopt->flags.ignore_untracked_in_submodules); + } + } diffopt->flags = orig_flags; } + + if (defer_submodule_status) + *defer_submodule_status = defer; return changed; } I can see how there's some room for *a* refactoring to reduce the subsequent diff, but not by mutch. But this commit didn't help at all. This whole "goto ret", and "goto cleanup" is just working around the fact that you pulled "orig_flags" out of the "if" scope. Normally the de-indentation would be worth it, but here it's not. The control flow becomes more complex to reason about as a result.
Hi Calvin On 07/02/2023 18:17, Calvin Wan wrote: > Flatten out the if statements in match_stat_with_submodule so the > logic is more readable and easier for future patches to add to. > orig_flags didn't need to be set if the cache entry wasn't a > GITLINK so defer setting it. > > Signed-off-by: Calvin Wan <calvinwan@google.com> > --- > diff-lib.c | 28 +++++++++++++++++----------- > 1 file changed, 17 insertions(+), 11 deletions(-) > > diff --git a/diff-lib.c b/diff-lib.c > index 7101cfda3f..e18c886a80 100644 > --- a/diff-lib.c > +++ b/diff-lib.c > @@ -73,18 +73,24 @@ static int match_stat_with_submodule(struct diff_options *diffopt, > unsigned *dirty_submodule) > { > int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); > - if (S_ISGITLINK(ce->ce_mode)) { > - struct diff_flags orig_flags = diffopt->flags; > - if (!diffopt->flags.override_submodule_config) > - set_diffopt_flags_from_submodule_config(diffopt, ce->name); > - if (diffopt->flags.ignore_submodules) > - changed = 0; > - else if (!diffopt->flags.ignore_dirty_submodules && > - (!changed || diffopt->flags.dirty_submodules)) > - *dirty_submodule = is_submodule_modified(ce->name, > - diffopt->flags.ignore_untracked_in_submodules); > - diffopt->flags = orig_flags; > + struct diff_flags orig_flags; > + > + if (!S_ISGITLINK(ce->ce_mode)) > + return changed; > + > + orig_flags = diffopt->flags; > + if (!diffopt->flags.override_submodule_config) > + set_diffopt_flags_from_submodule_config(diffopt, ce->name); > + if (diffopt->flags.ignore_submodules) { > + changed = 0; > + goto cleanup; Looking ahead to patch 7 there are no new uses of the "cleanup" label so I think it would be simpler to leave the code as it was, rather than changing the "else if" below to "if" and adding the goto here. Best Wishes Phillip > } > + if (!diffopt->flags.ignore_dirty_submodules && > + (!changed || diffopt->flags.dirty_submodules)) > + *dirty_submodule = is_submodule_modified(ce->name, > + diffopt->flags.ignore_untracked_in_submodules); > +cleanup: > + diffopt->flags = orig_flags; > return changed; > } >
On 08/02/2023 08:18, Ævar Arnfjörð Bjarmason wrote: > > On Tue, Feb 07 2023, Calvin Wan wrote: > Anyway, having looked at this closely I think this patch should be > dropped entirely. I don't understand how this refactoring is meant to > make the end result easier to read, reason about, or how it helps the > subsequent patch. That's my feeling too c.f. <19f91fea-a2a9-7dc6-d940-cc10f384fe76@dunelm.org.uk>. This patch has improved since that comment on v4 but I still think we'd be better off without it. Best Wishes Phillip > In addition to the above diff in 7/7 you do (and that's the change this > is meant to help): > > static int match_stat_with_submodule(struct diff_options *diffopt, > const struct cache_entry *ce, > struct stat *st, unsigned ce_option, > - unsigned *dirty_submodule) > + unsigned *dirty_submodule, int *defer_submodule_status, > + unsigned *ignore_untracked) > { > int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); > struct diff_flags orig_flags; > + int defer = 0; > > if (!S_ISGITLINK(ce->ce_mode)) > - return changed; > + goto ret; > > orig_flags = diffopt->flags; > if (!diffopt->flags.override_submodule_config) > @@ -86,11 +92,20 @@ static int match_stat_with_submodule(struct diff_options *diffopt, > goto cleanup; > } > if (!diffopt->flags.ignore_dirty_submodules && > - (!changed || diffopt->flags.dirty_submodules)) > - *dirty_submodule = is_submodule_modified(ce->name, > + (!changed || diffopt->flags.dirty_submodules)) { > + if (defer_submodule_status && *defer_submodule_status) { > + defer = 1; > + *ignore_untracked = diffopt->flags.ignore_untracked_in_submodules; > + } else { > + *dirty_submodule = is_submodule_modified(ce->name, > diffopt->flags.ignore_untracked_in_submodules); > + } > + } > cleanup: > diffopt->flags = orig_flags; > +ret: > + if (defer_submodule_status) > + *defer_submodule_status = defer; > return changed; > } > > But if I rebase out this 6/7 patch and solve the conflict for 7/7 it > becomes: > > @@ -65,14 +66,20 @@ static int check_removed(const struct index_state *istate, const struct cache_en > * Return 1 when changes are detected, 0 otherwise. If the DIRTY_SUBMODULES > * option is set, the caller does not only want to know if a submodule is > * modified at all but wants to know all the conditions that are met (new > - * commits, untracked content and/or modified content). > + * commits, untracked content and/or modified content). If > + * defer_submodule_status bit is set, dirty_submodule will be left to the > + * caller to set. defer_submodule_status can also be set to 0 in this > + * function if there is no need to check if the submodule is modified. > */ > static int match_stat_with_submodule(struct diff_options *diffopt, > const struct cache_entry *ce, > struct stat *st, unsigned ce_option, > - unsigned *dirty_submodule) > + unsigned *dirty_submodule, int *defer_submodule_status, > + unsigned *ignore_untracked) > { > int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); > + int defer = 0; > + > if (S_ISGITLINK(ce->ce_mode)) { > struct diff_flags orig_flags = diffopt->flags; > if (!diffopt->flags.override_submodule_config) > @@ -80,11 +87,20 @@ static int match_stat_with_submodule(struct diff_options *diffopt, > if (diffopt->flags.ignore_submodules) > changed = 0; > else if (!diffopt->flags.ignore_dirty_submodules && > - (!changed || diffopt->flags.dirty_submodules)) > - *dirty_submodule = is_submodule_modified(ce->name, > - diffopt->flags.ignore_untracked_in_submodules); > + (!changed || diffopt->flags.dirty_submodules)) { > + if (defer_submodule_status && *defer_submodule_status) { > + defer = 1; > + *ignore_untracked = diffopt->flags.ignore_untracked_in_submodules; > + } else { > + *dirty_submodule = is_submodule_modified(ce->name, > + diffopt->flags.ignore_untracked_in_submodules); > + } > + } > diffopt->flags = orig_flags; > } > + > + if (defer_submodule_status) > + *defer_submodule_status = defer; > return changed; > } > > > I can see how there's some room for *a* refactoring to reduce the > subsequent diff, but not by mutch. > > But this commit didn't help at all. This whole "goto ret", and "goto > cleanup" is just working around the fact that you pulled "orig_flags" > out of the "if" scope. Normally the de-indentation would be worth it, > but here it's not. The control flow becomes more complex to reason about > as a result. >
I agree that this patch should be dropped. Thanks for catching this.
diff --git a/diff-lib.c b/diff-lib.c index 7101cfda3f..e18c886a80 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -73,18 +73,24 @@ static int match_stat_with_submodule(struct diff_options *diffopt, unsigned *dirty_submodule) { int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option); - if (S_ISGITLINK(ce->ce_mode)) { - struct diff_flags orig_flags = diffopt->flags; - if (!diffopt->flags.override_submodule_config) - set_diffopt_flags_from_submodule_config(diffopt, ce->name); - if (diffopt->flags.ignore_submodules) - changed = 0; - else if (!diffopt->flags.ignore_dirty_submodules && - (!changed || diffopt->flags.dirty_submodules)) - *dirty_submodule = is_submodule_modified(ce->name, - diffopt->flags.ignore_untracked_in_submodules); - diffopt->flags = orig_flags; + struct diff_flags orig_flags; + + if (!S_ISGITLINK(ce->ce_mode)) + return changed; + + orig_flags = diffopt->flags; + if (!diffopt->flags.override_submodule_config) + set_diffopt_flags_from_submodule_config(diffopt, ce->name); + if (diffopt->flags.ignore_submodules) { + changed = 0; + goto cleanup; } + if (!diffopt->flags.ignore_dirty_submodules && + (!changed || diffopt->flags.dirty_submodules)) + *dirty_submodule = is_submodule_modified(ce->name, + diffopt->flags.ignore_untracked_in_submodules); +cleanup: + diffopt->flags = orig_flags; return changed; }
Flatten out the if statements in match_stat_with_submodule so the logic is more readable and easier for future patches to add to. orig_flags didn't need to be set if the cache entry wasn't a GITLINK so defer setting it. Signed-off-by: Calvin Wan <calvinwan@google.com> --- diff-lib.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-)