diff mbox series

[6/7] show, log: provide a --remerge-diff capability

Message ID b75e73384fde4f23296bd02eb40455263f805706.1630376800.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series Add a new --remerge-diff capability to show & log | expand

Commit Message

Elijah Newren Aug. 31, 2021, 2:26 a.m. UTC
From: Elijah Newren <newren@gmail.com>

When this option is specified, we remerge all (two parent) merge commits
and diff the actual merge commit to the automatically created version,
in order to show how users removed conflict markers, resolved the
different conflict versions, and potentially added new changes outside
of conflict regions in order to resolve semantic merge problems (or,
possibly, just to hide other random changes).

This capability works by creating a temporary object directory and
marking it as the primary object store, so that any blobs or trees
created during the automatic merge, can be easily removed afterwards by
just deleting all objects from the temporary object directory.  We can
do this after handling each merge commit, in order to avoid the need to
worry about doing `git gc --auto` runs while running `git log
--remerge-diff`.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 builtin/log.c | 23 +++++++++++++++++
 diff-merges.c | 12 +++++++++
 log-tree.c    | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++
 revision.h    |  6 ++++-
 4 files changed, 109 insertions(+), 1 deletion(-)

Comments

Sergey Organov Aug. 31, 2021, 9:19 a.m. UTC | #1
"Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Elijah Newren <newren@gmail.com>

[...]

> diff --git a/diff-merges.c b/diff-merges.c
> index d897fd8a293..3a24c45b8e5 100644
> --- a/diff-merges.c
> +++ b/diff-merges.c
> @@ -17,6 +17,7 @@ static void suppress(struct rev_info *revs)
>  	revs->combined_all_paths = 0;
>  	revs->merges_imply_patch = 0;
>  	revs->merges_need_diff = 0;
> +	revs->remerge_diff = 0;
>  }
>  
>  static void set_separate(struct rev_info *revs)
> @@ -45,6 +46,12 @@ static void set_dense_combined(struct rev_info *revs)
>  	revs->dense_combined_merges = 1;
>  }
>  
> +static void set_remerge_diff(struct rev_info *revs)
> +{
> +	suppress(revs);
> +	revs->remerge_diff = 1;
> +}
> +
>  static diff_merges_setup_func_t func_by_opt(const char *optarg)
>  {
>  	if (!strcmp(optarg, "off") || !strcmp(optarg, "none"))
> @@ -57,6 +64,8 @@ static diff_merges_setup_func_t func_by_opt(const char *optarg)
>  		return set_combined;
>  	else if (!strcmp(optarg, "cc") || !strcmp(optarg, "dense-combined"))
>  		return set_dense_combined;
> +	else if (!strcmp(optarg, "r") || !strcmp(optarg, "remerge"))
> +		return set_remerge_diff;
>  	else if (!strcmp(optarg, "m") || !strcmp(optarg, "on"))
>  		return set_to_default;
>  	return NULL;
> @@ -113,6 +122,9 @@ int diff_merges_parse_opts(struct rev_info *revs, const char **argv)
>  	} else if (!strcmp(arg, "--cc")) {
>  		set_dense_combined(revs);
>  		revs->merges_imply_patch = 1;
> +	} else if (!strcmp(arg, "--remerge-diff")) {
> +		set_remerge_diff(revs);
> +		revs->merges_imply_patch = 1;
>  	} else if (!strcmp(arg, "--no-diff-merges")) {
>  		suppress(revs);
>  	} else if (!strcmp(arg, "--combined-all-paths")) {

The diff-merges options handling looks fine to me.

Thanks,
-- Sergey Organov
Ævar Arnfjörð Bjarmason Sept. 28, 2021, 8:01 a.m. UTC | #2
On Tue, Aug 31 2021, Elijah Newren via GitGitGadget wrote:

>  static int decoration_given;
>  static int use_mailmap_config = 1;
> +static struct tmp_objdir *tmp_objdir;
>  static const char *fmt_patch_subject_prefix = "PATCH";
>  static int fmt_patch_name_max = FORMAT_PATCH_NAME_MAX_DEFAULT;
>  static const char *fmt_pretty;

So here we make this static file-level etc...

> @@ -407,6 +410,17 @@ static int cmd_log_walk(struct rev_info *rev)
>  	int saved_nrl = 0;
>  	int saved_dcctc = 0;
>  
> +	if (rev->remerge_diff) {
> +		tmp_objdir = tmp_objdir_create();
> +		if (!tmp_objdir)
> +			die(_("unable to create temporary object directory"));
> +		tmp_objdir_make_primary(the_repository, tmp_objdir);
> +
> +		strbuf_init(&rev->remerge_objdir_location, 0);
> +		strbuf_addstr(&rev->remerge_objdir_location,
> +			      tmp_objdir_path(tmp_objdir));
> +	}
> +
>  	if (rev->early_output)
>  		setup_early_output();
>  
> @@ -449,6 +463,13 @@ static int cmd_log_walk(struct rev_info *rev)
>  	rev->diffopt.no_free = 0;
>  	diff_free(&rev->diffopt);
>  
> +	if (rev->remerge_diff) {
> +		strbuf_release(&rev->remerge_objdir_location);
> +		tmp_objdir_remove_as_primary(the_repository, tmp_objdir);
> +		tmp_objdir_destroy(tmp_objdir);
> +		tmp_objdir = NULL;

...but all of the "tmp_objdir" usage is in one function, can't the
variable be declared here instead?

We need to hand the "remerge_objdir_location" off to the "rev_info"
struct, but that seems separate from its lifetime.

Re my [1] & [2] I like Neeraj's "atexit cleanup" approach better,
perhaps that makes your cleanup in log-tree.c redundant or easier?

Per [2] it looks like you need to "hand off" the
"remerge_objdir_location", so having the struct live in tmp-objdir.h as
I suggested in [2] might make that work...

1. https://lore.kernel.org/git/87v92lxhh4.fsf@evledraar.gmail.com/
2. https://lore.kernel.org/git/87r1d9xh71.fsf@evledraar.gmail.com/
Elijah Newren Sept. 29, 2021, 4 a.m. UTC | #3
On Tue, Sep 28, 2021 at 1:05 AM Ævar Arnfjörð Bjarmason
<avarab@gmail.com> wrote:
>
> On Tue, Aug 31 2021, Elijah Newren via GitGitGadget wrote:
>
> >  static int decoration_given;
> >  static int use_mailmap_config = 1;
> > +static struct tmp_objdir *tmp_objdir;
> >  static const char *fmt_patch_subject_prefix = "PATCH";
> >  static int fmt_patch_name_max = FORMAT_PATCH_NAME_MAX_DEFAULT;
> >  static const char *fmt_pretty;
>
> So here we make this static file-level etc...
>
> > @@ -407,6 +410,17 @@ static int cmd_log_walk(struct rev_info *rev)
> >       int saved_nrl = 0;
> >       int saved_dcctc = 0;
> >
> > +     if (rev->remerge_diff) {
> > +             tmp_objdir = tmp_objdir_create();
> > +             if (!tmp_objdir)
> > +                     die(_("unable to create temporary object directory"));
> > +             tmp_objdir_make_primary(the_repository, tmp_objdir);
> > +
> > +             strbuf_init(&rev->remerge_objdir_location, 0);
> > +             strbuf_addstr(&rev->remerge_objdir_location,
> > +                           tmp_objdir_path(tmp_objdir));
> > +     }
> > +
> >       if (rev->early_output)
> >               setup_early_output();
> >
> > @@ -449,6 +463,13 @@ static int cmd_log_walk(struct rev_info *rev)
> >       rev->diffopt.no_free = 0;
> >       diff_free(&rev->diffopt);
> >
> > +     if (rev->remerge_diff) {
> > +             strbuf_release(&rev->remerge_objdir_location);
> > +             tmp_objdir_remove_as_primary(the_repository, tmp_objdir);
> > +             tmp_objdir_destroy(tmp_objdir);
> > +             tmp_objdir = NULL;
>
> ...but all of the "tmp_objdir" usage is in one function, can't the
> variable be declared here instead?

That's a very good point.

> We need to hand the "remerge_objdir_location" off to the "rev_info"
> struct, but that seems separate from its lifetime.

Given Peff's suggestion elsewhere, though, to destroy the tmp_objdir
after each merge and create a new one, I wonder if I should actually
be passing a tmp_objdir** to rev_info (allowing log-tree to do the
work of destroying and creating a new one after each merge, instead of
using the "remerge_objdir_location" to run a recursive delete of
files).  That'd still work with your idea to remove the statically
scoped variable, though.

> Re my [1] & [2] I like Neeraj's "atexit cleanup" approach better,
> perhaps that makes your cleanup in log-tree.c redundant or easier?

Having an atexit cleanup as a safety measure seems fine.  However, I
don't like avoiding the manual cleanup step and relying on atexit
cleanup; I'd go so far as to say I think that'd be a bug, at least for
my usage.  It presumes one-shot usage, whereas I'd rather move git to
being more library-like.

However, fully destroying the tmp_objdir probably makes the cleanup in
log-tree.c easier.

> Per [2] it looks like you need to "hand off" the
> "remerge_objdir_location", so having the struct live in tmp-objdir.h as
> I suggested in [2] might make that work...
>
> 1. https://lore.kernel.org/git/87v92lxhh4.fsf@evledraar.gmail.com/
> 2. https://lore.kernel.org/git/87r1d9xh71.fsf@evledraar.gmail.com/
diff mbox series

Patch

diff --git a/builtin/log.c b/builtin/log.c
index 3d7717ba5ca..6c7288716e6 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -35,6 +35,8 @@ 
 #include "repository.h"
 #include "commit-reach.h"
 #include "range-diff.h"
+#include "dir.h"
+#include "tmp-objdir.h"
 
 #define MAIL_DEFAULT_WRAP 72
 #define COVER_FROM_AUTO_MAX_SUBJECT_LEN 100
@@ -51,6 +53,7 @@  static int default_encode_email_headers = 1;
 static int decoration_style;
 static int decoration_given;
 static int use_mailmap_config = 1;
+static struct tmp_objdir *tmp_objdir;
 static const char *fmt_patch_subject_prefix = "PATCH";
 static int fmt_patch_name_max = FORMAT_PATCH_NAME_MAX_DEFAULT;
 static const char *fmt_pretty;
@@ -407,6 +410,17 @@  static int cmd_log_walk(struct rev_info *rev)
 	int saved_nrl = 0;
 	int saved_dcctc = 0;
 
+	if (rev->remerge_diff) {
+		tmp_objdir = tmp_objdir_create();
+		if (!tmp_objdir)
+			die(_("unable to create temporary object directory"));
+		tmp_objdir_make_primary(the_repository, tmp_objdir);
+
+		strbuf_init(&rev->remerge_objdir_location, 0);
+		strbuf_addstr(&rev->remerge_objdir_location,
+			      tmp_objdir_path(tmp_objdir));
+	}
+
 	if (rev->early_output)
 		setup_early_output();
 
@@ -449,6 +463,13 @@  static int cmd_log_walk(struct rev_info *rev)
 	rev->diffopt.no_free = 0;
 	diff_free(&rev->diffopt);
 
+	if (rev->remerge_diff) {
+		strbuf_release(&rev->remerge_objdir_location);
+		tmp_objdir_remove_as_primary(the_repository, tmp_objdir);
+		tmp_objdir_destroy(tmp_objdir);
+		tmp_objdir = NULL;
+	}
+
 	if (rev->diffopt.output_format & DIFF_FORMAT_CHECKDIFF &&
 	    rev->diffopt.flags.check_failed) {
 		return 02;
@@ -1943,6 +1964,8 @@  int cmd_format_patch(int argc, const char **argv, const char *prefix)
 		die(_("--name-status does not make sense"));
 	if (rev.diffopt.output_format & DIFF_FORMAT_CHECKDIFF)
 		die(_("--check does not make sense"));
+	if (rev.remerge_diff)
+		die(_("--remerge_diff does not make sense"));
 
 	if (!use_patch_format &&
 		(!rev.diffopt.output_format ||
diff --git a/diff-merges.c b/diff-merges.c
index d897fd8a293..3a24c45b8e5 100644
--- a/diff-merges.c
+++ b/diff-merges.c
@@ -17,6 +17,7 @@  static void suppress(struct rev_info *revs)
 	revs->combined_all_paths = 0;
 	revs->merges_imply_patch = 0;
 	revs->merges_need_diff = 0;
+	revs->remerge_diff = 0;
 }
 
 static void set_separate(struct rev_info *revs)
@@ -45,6 +46,12 @@  static void set_dense_combined(struct rev_info *revs)
 	revs->dense_combined_merges = 1;
 }
 
+static void set_remerge_diff(struct rev_info *revs)
+{
+	suppress(revs);
+	revs->remerge_diff = 1;
+}
+
 static diff_merges_setup_func_t func_by_opt(const char *optarg)
 {
 	if (!strcmp(optarg, "off") || !strcmp(optarg, "none"))
@@ -57,6 +64,8 @@  static diff_merges_setup_func_t func_by_opt(const char *optarg)
 		return set_combined;
 	else if (!strcmp(optarg, "cc") || !strcmp(optarg, "dense-combined"))
 		return set_dense_combined;
+	else if (!strcmp(optarg, "r") || !strcmp(optarg, "remerge"))
+		return set_remerge_diff;
 	else if (!strcmp(optarg, "m") || !strcmp(optarg, "on"))
 		return set_to_default;
 	return NULL;
@@ -113,6 +122,9 @@  int diff_merges_parse_opts(struct rev_info *revs, const char **argv)
 	} else if (!strcmp(arg, "--cc")) {
 		set_dense_combined(revs);
 		revs->merges_imply_patch = 1;
+	} else if (!strcmp(arg, "--remerge-diff")) {
+		set_remerge_diff(revs);
+		revs->merges_imply_patch = 1;
 	} else if (!strcmp(arg, "--no-diff-merges")) {
 		suppress(revs);
 	} else if (!strcmp(arg, "--combined-all-paths")) {
diff --git a/log-tree.c b/log-tree.c
index 6dc4412268b..ed69a4da140 100644
--- a/log-tree.c
+++ b/log-tree.c
@@ -1,4 +1,5 @@ 
 #include "cache.h"
+#include "commit-reach.h"
 #include "config.h"
 #include "diff.h"
 #include "object-store.h"
@@ -7,6 +8,7 @@ 
 #include "tag.h"
 #include "graph.h"
 #include "log-tree.h"
+#include "merge-ort.h"
 #include "reflog-walk.h"
 #include "refs.h"
 #include "string-list.h"
@@ -16,6 +18,7 @@ 
 #include "line-log.h"
 #include "help.h"
 #include "range-diff.h"
+#include "dir.h"
 
 static struct decoration name_decoration = { "object names" };
 static int decoration_loaded;
@@ -902,6 +905,60 @@  static int do_diff_combined(struct rev_info *opt, struct commit *commit)
 	return !opt->loginfo;
 }
 
+static int do_remerge_diff(struct rev_info *opt,
+			   struct commit_list *parents,
+			   struct object_id *oid,
+			   struct commit *commit)
+{
+	struct merge_options o;
+	struct commit_list *bases;
+	struct merge_result res;
+	struct pretty_print_context ctx = {0};
+	struct strbuf commit1 = STRBUF_INIT;
+	struct strbuf commit2 = STRBUF_INIT;
+
+	/* Setup merge options */
+	init_merge_options(&o, the_repository);
+	memset(&res, 0, sizeof(res));
+	o.show_rename_progress = 0;
+
+	ctx.abbrev = DEFAULT_ABBREV;
+	format_commit_message(parents->item,       "%h (%s)", &commit1, &ctx);
+	format_commit_message(parents->next->item, "%h (%s)", &commit2, &ctx);
+	o.branch1 = commit1.buf;
+	o.branch2 = commit2.buf;
+	o.record_conflict_msgs_in_tree = 1;
+
+	/* Parse the relevant commits and get the merge bases */
+	parse_commit_or_die(parents->item);
+	parse_commit_or_die(parents->next->item);
+	bases = get_merge_bases(parents->item, parents->next->item);
+
+	/* Re-merge the parents */
+	merge_incore_recursive(&o,
+			       bases, parents->item, parents->next->item,
+			       &res);
+
+	/* Show the diff */
+	diff_tree_oid(&res.tree->object.oid, oid, "", &opt->diffopt);
+	log_tree_diff_flush(opt);
+
+	/* Cleanup */
+	strbuf_release(&commit1);
+	strbuf_release(&commit2);
+	merge_finalize(&o, &res);
+
+	/* Clean up the temporary object directory */
+	if (opt->remerge_objdir_location.buf != NULL &&
+	    *opt->remerge_objdir_location.buf != '\0')
+		remove_dir_recursively(&opt->remerge_objdir_location,
+				       REMOVE_DIR_KEEP_TOPLEVEL);
+	else
+		BUG("unable to remove temporary object directory");
+
+	return !opt->loginfo;
+}
+
 /*
  * Show the diff of a commit.
  *
@@ -936,6 +993,18 @@  static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log
 	}
 
 	if (is_merge) {
+		int octopus = (parents->next->next != NULL);
+
+		if (opt->remerge_diff) {
+			if (octopus) {
+				show_log(opt);
+				fprintf(opt->diffopt.file,
+					"diff: warning: Skipping remerge-diff "
+					"for octopus merges.\n");
+				return 1;
+			}
+			return do_remerge_diff(opt, parents, oid, commit);
+		}
 		if (opt->combine_merges)
 			return do_diff_combined(opt, commit);
 		if (opt->separate_merges) {
diff --git a/revision.h b/revision.h
index fbb068da9fb..d6562c52252 100644
--- a/revision.h
+++ b/revision.h
@@ -198,7 +198,8 @@  struct rev_info {
 			combine_merges:1,
 			combined_all_paths:1,
 			dense_combined_merges:1,
-			first_parent_merges:1;
+			first_parent_merges:1,
+			remerge_diff:1;
 
 	/* Format info */
 	int		show_notes;
@@ -320,6 +321,9 @@  struct rev_info {
 
 	/* misc. flags related to '--no-kept-objects' */
 	unsigned keep_pack_cache_flags;
+
+	/* Location where temporary objects for remerge-diff are written. */
+	struct strbuf remerge_objdir_location;
 };
 
 int ref_excluded(struct string_list *, const char *path);