@@ -379,7 +379,6 @@ static const char *get_basename(const char *filename)
return base ? base + 1 : filename;
}
-MAYBE_UNUSED
static int find_basename_matches(struct diff_options *options,
int minimum_score,
int num_src)
@@ -727,12 +726,57 @@ void diffcore_rename(struct diff_options *options)
if (minimum_score == MAX_SCORE)
goto cleanup;
+ num_sources = rename_src_nr;
+
+ if (want_copies || break_idx) {
+ /*
+ * Cull sources:
+ * - remove ones corresponding to exact renames
+ */
+ trace2_region_enter("diff", "cull after exact", options->repo);
+ remove_unneeded_paths_from_src(want_copies);
+ trace2_region_leave("diff", "cull after exact", options->repo);
+ } else {
+ /* Determine minimum score to match basenames */
+ double factor = 0.5;
+ char *basename_factor = getenv("GIT_BASENAME_FACTOR");
+ int min_basename_score;
+
+ if (basename_factor)
+ factor = strtol(basename_factor, NULL, 10)/100.0;
+ assert(factor >= 0.0 && factor <= 1.0);
+ min_basename_score = minimum_score +
+ (int)(factor * (MAX_SCORE - minimum_score));
+
+ /*
+ * Cull sources:
+ * - remove ones involved in renames (found via exact match)
+ */
+ trace2_region_enter("diff", "cull after exact", options->repo);
+ remove_unneeded_paths_from_src(want_copies);
+ trace2_region_leave("diff", "cull after exact", options->repo);
+
+ /* Utilize file basenames to quickly find renames. */
+ trace2_region_enter("diff", "basename matches", options->repo);
+ rename_count += find_basename_matches(options,
+ min_basename_score,
+ rename_src_nr);
+ trace2_region_leave("diff", "basename matches", options->repo);
+
+ /*
+ * Cull sources, again:
+ * - remove ones involved in renames (found via basenames)
+ */
+ trace2_region_enter("diff", "cull basename", options->repo);
+ remove_unneeded_paths_from_src(want_copies);
+ trace2_region_leave("diff", "cull basename", options->repo);
+ }
+
/*
- * Calculate how many renames are left
+ * Calculate how many rename destinations are left
*/
num_destinations = (rename_dst_nr - rename_count);
- remove_unneeded_paths_from_src(want_copies);
- num_sources = rename_src_nr;
+ num_sources = rename_src_nr; /* rename_src_nr reflects lower number */
/* All done? */
if (!num_destinations || !num_sources)
@@ -764,7 +808,7 @@ void diffcore_rename(struct diff_options *options)
struct diff_score *m;
if (rename_dst[i].is_rename)
- continue; /* dealt with exact match already. */
+ continue; /* exact or basename match already handled */
m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST];
for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
@@ -280,8 +280,8 @@ test_expect_success 'basename similarity vs best similarity' '
# subdir/file.txt is 89% similar to file.md, 78% similar to file.txt,
# but since same basenames are checked first...
cat >expected <<-\EOF &&
- R088 subdir/file.txt file.md
- A file.txt
+ A file.md
+ R078 subdir/file.txt file.txt
EOF
test_cmp expected actual
'