diff mbox series

[09/10] diff --color-moved-ws=allow-indentation-change: improve hash lookups

Message ID 41cdedd60907b966dffa6cf0c9825ffb448f4971.1623675889.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series diff --color-moved[-ws] speedups | expand

Commit Message

Phillip Wood June 14, 2021, 1:04 p.m. UTC
From: Phillip Wood <phillip.wood@dunelm.org.uk>

As libxdiff does not have a whitespace flag to ignore the indentation
the code for --color-moved-ws=allow-indentation-change uses
XDF_IGNORE_WHITESPACE and then filters out any hash lookups where
there are non-indentation changes. This is filtering is inefficient as
we have to perform another string comparison.

By using the offset data that we have already computed to skip the
indentation we can avoid using XDF_IGNORE_WHITESPACE and safely remove
the extra checks which improves the performance by 14% and paves the
way for the elimination of string comparisons in the next commit.

This change slightly increases the runtime of other --color-moved
modes. This could be avoided by using different comparison functions
for the different modes but after the changes in the next commit there
is no measurable benefit.

Before this change
Benchmark #1: bin-wrappers/git diff --diff-algorithm=myers --color-moved --no-color-moved-ws v2.28.0 v2.29.0
  Time (mean ± σ):      1.116 s ±  0.005 s    [User: 1.057 s, System: 0.056 s]
  Range (min … max):    1.109 s …  1.123 s    10 runs

Benchmark #2: bin-wrappers/git diff --diff-algorithm=myers --color-moved --color-moved-ws=allow-indentation-change v2.28.0 v2.29.0
  Time (mean ± σ):      1.216 s ±  0.005 s    [User: 1.155 s, System: 0.059 s]
  Range (min … max):    1.206 s …  1.223 s    10 runs

After this change
Benchmark #1: bin-wrappers/git diff --diff-algorithm=myers --color-moved --no-color-moved-ws v2.28.0 v2.29.0
  Time (mean ± σ):      1.147 s ±  0.005 s    [User: 1.085 s, System: 0.059 s]
  Range (min … max):    1.140 s …  1.154 s    10 runs

Benchmark #2: bin-wrappers/git diff --diff-algorithm=myers --color-moved --color-moved-ws=allow-indentation-change v2.28.0 v2.29.0
  Time (mean ± σ):      1.048 s ±  0.005 s    [User: 987.4 ms, System: 58.8 ms]
  Range (min … max):    1.043 s …  1.056 s    10 runs

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
---
 diff.c | 66 +++++++++++++++++-----------------------------------------
 1 file changed, 19 insertions(+), 47 deletions(-)

Comments

Elijah Newren July 9, 2021, 3:36 p.m. UTC | #1
On Mon, Jun 14, 2021 at 6:06 AM Phillip Wood via GitGitGadget
<gitgitgadget@gmail.com> wrote:
>
> From: Phillip Wood <phillip.wood@dunelm.org.uk>
>
> As libxdiff does not have a whitespace flag to ignore the indentation
> the code for --color-moved-ws=allow-indentation-change uses
> XDF_IGNORE_WHITESPACE and then filters out any hash lookups where
> there are non-indentation changes. This is filtering is inefficient as

s/This is filtering is/This filtering is/
diff mbox series

Patch

diff --git a/diff.c b/diff.c
index ee58373f55f8..e6f3586b39bf 100644
--- a/diff.c
+++ b/diff.c
@@ -850,28 +850,15 @@  static void fill_es_indent_data(struct emitted_diff_symbol *es)
 }
 
 static int compute_ws_delta(const struct emitted_diff_symbol *a,
-			    const struct emitted_diff_symbol *b,
-			    int *out)
-{
-	int a_len = a->len,
-	    b_len = b->len,
-	    a_off = a->indent_off,
-	    a_width = a->indent_width,
-	    b_off = b->indent_off,
+			    const struct emitted_diff_symbol *b)
+{
+	int a_width = a->indent_width,
 	    b_width = b->indent_width;
 
-	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) {
-		*out = INDENT_BLANKLINE;
-		return 1;
-	}
-
-	if (a_len - a_off != b_len - b_off ||
-	    memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
-		return 0;
-
-	*out = a_width - b_width;
+	if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE)
+		return INDENT_BLANKLINE;
 
-	return 1;
+	return a_width - b_width;
 }
 
 static int cmp_in_block_with_wsd(const struct diff_options *o,
@@ -917,26 +904,17 @@  static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
 			   const void *keydata)
 {
 	const struct diff_options *diffopt = hashmap_cmp_fn_data;
-	const struct moved_entry *a, *b;
+	const struct emitted_diff_symbol *a, *b;
 	unsigned flags = diffopt->color_moved_ws_handling
 			 & XDF_WHITESPACE_FLAGS;
 
-	a = container_of(eptr, const struct moved_entry, ent);
-	b = container_of(entry_or_key, const struct moved_entry, ent);
-
-	if (diffopt->color_moved_ws_handling &
-	    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-		/*
-		 * As there is not specific white space config given,
-		 * we'd need to check for a new block, so ignore all
-		 * white space. The setup of the white space
-		 * configuration for the next block is done else where
-		 */
-		flags |= XDF_IGNORE_WHITESPACE;
+	a = container_of(eptr, const struct moved_entry, ent)->es;
+	b = container_of(entry_or_key, const struct moved_entry, ent)->es;
 
-	return !xdiff_compare_lines(a->es->line, a->es->len,
-				    b->es->line, b->es->len,
-				    flags);
+	return !xdiff_compare_lines(a->line + a->indent_off,
+				    a->len - a->indent_off,
+				    b->line + b->indent_off,
+				    b->len - b->indent_off, flags);
 }
 
 static struct moved_entry *prepare_entry(struct diff_options *o,
@@ -945,7 +923,8 @@  static struct moved_entry *prepare_entry(struct diff_options *o,
 	struct moved_entry *ret = xmalloc(sizeof(*ret));
 	struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no];
 	unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
-	unsigned int hash = xdiff_hash_string(l->line, l->len, flags);
+	unsigned int hash = xdiff_hash_string(l->line + l->indent_off,
+					      l->len - l->indent_off, flags);
 
 	hashmap_entry_init(&ret->ent, hash);
 	ret->es = l;
@@ -1113,14 +1092,11 @@  static void mark_color_as_moved(struct diff_options *o,
 			hashmap_for_each_entry_from(hm, match, ent) {
 				ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
 				if (o->color_moved_ws_handling &
-				    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) {
-					if (compute_ws_delta(l, match->es,
-							     &pmb[pmb_nr].wsd))
-						pmb[pmb_nr++].match = match;
-				} else {
+				    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+					pmb[pmb_nr].wsd = compute_ws_delta(l, match->es);
+				else
 					pmb[pmb_nr].wsd = 0;
-					pmb[pmb_nr++].match = match;
-				}
+				pmb[pmb_nr++].match = match;
 			}
 
 			if (adjust_last_block(o, n, block_length) &&
@@ -6240,10 +6216,6 @@  static void diff_flush_patch_all_file_pairs(struct diff_options *o)
 		if (o->color_moved) {
 			struct hashmap add_lines, del_lines;
 
-			if (o->color_moved_ws_handling &
-			    COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
-				o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE;
-
 			hashmap_init(&del_lines, moved_entry_cmp, o, 0);
 			hashmap_init(&add_lines, moved_entry_cmp, o, 0);