@@ -9,7 +9,9 @@ git-repack - Pack unpacked objects in a repository
SYNOPSIS
--------
[verse]
-'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m] [--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>] [--write-midx]
+'git repack' [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]
+ [--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]
+ [--write-midx] [--name-hash-version=<n>]
DESCRIPTION
-----------
@@ -249,6 +251,36 @@ linkgit:git-multi-pack-index[1]).
Write a multi-pack index (see linkgit:git-multi-pack-index[1])
containing the non-redundant packs.
+--name-hash-version=<n>::
+ While performing delta compression, Git groups objects that may be
+ similar based on heuristics using the path to that object. While
+ grouping objects by an exact path match is good for paths with
+ many versions, there are benefits for finding delta pairs across
+ different full paths. Git collects objects by type and then by a
+ "name hash" of the path and then by size, hoping to group objects
+ that will compress well together.
++
+The default name hash version is `1`, which prioritizes hash locality by
+considering the final bytes of the path as providing the maximum magnitude
+to the hash function. This version excels at distinguishing short paths
+and finding renames across directories. However, the hash function depends
+primarily on the final 16 bytes of the path. If there are many paths in
+the repo that have the same final 16 bytes and differ only by parent
+directory, then this name-hash may lead to too many collisions and cause
+poor results. At the moment, this version is required when writing
+reachability bitmap files with `--write-bitmap-index`.
++
+The name hash version `2` has similar locality features as version `1`,
+except it considers each path component separately and overlays the hashes
+with a shift. This still prioritizes the final bytes of the path, but also
+"salts" the lower bits of the hash using the parent directory names. This
+method allows for some of the locality benefits of version `1` while
+breaking most of the collisions from a similarly-named file appearing in
+many different directories. At the moment, this version is not allowed
+when writing reachability bitmap files with `--write-bitmap-index` and it
+will be automatically changed to version `1`.
+
+
CONFIGURATION
-------------
@@ -39,7 +39,9 @@ static int run_update_server_info = 1;
static char *packdir, *packtmp_name, *packtmp;
static const char *const git_repack_usage[] = {
- N_("git repack [<options>]"),
+ N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n"
+ "[--window=<n>] [--depth=<n>] [--threads=<n>] [--keep-pack=<pack-name>]\n"
+ "[--write-midx] [--name-hash-version=<n>]"),
NULL
};
@@ -58,7 +60,7 @@ struct pack_objects_args {
int no_reuse_object;
int quiet;
int local;
- int full_name_hash;
+ int name_hash_version;
struct list_objects_filter_options filter_options;
};
@@ -307,6 +309,8 @@ static void prepare_pack_objects(struct child_process *cmd,
strvec_pushf(&cmd->args, "--no-reuse-delta");
if (args->no_reuse_object)
strvec_pushf(&cmd->args, "--no-reuse-object");
+ if (args->name_hash_version)
+ strvec_pushf(&cmd->args, "--name-hash-version=%d", args->name_hash_version);
if (args->local)
strvec_push(&cmd->args, "--local");
if (args->quiet)
@@ -1204,6 +1208,8 @@ int cmd_repack(int argc,
N_("pass --no-reuse-delta to git-pack-objects")),
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
N_("pass --no-reuse-object to git-pack-objects")),
+ OPT_INTEGER(0, "name-hash-version", &po_args.name_hash_version,
+ N_("specify the name hash version to use for grouping similar objects by path")),
OPT_NEGBIT('n', NULL, &run_update_server_info,
N_("do not run git-update-server-info"), 1),
OPT__QUIET(&po_args.quiet, N_("be quiet")),
@@ -45,7 +45,6 @@ rebase
remote
remote-ext
remote-fd
-repack
reset
restore
rev-parse
@@ -777,6 +777,12 @@ test_expect_success 'repack -ad cleans up old .tmp-* packs' '
test_must_be_empty tmpfiles
'
+test_expect_success '--name-hash-version option passes through to pack-objects' '
+ GIT_TRACE2_EVENT="$(pwd)/hash-trace.txt" \
+ git repack -a --name-hash-version=2 &&
+ test_subcommand_flex git pack-objects --name-hash-version=2 <hash-trace.txt
+'
+
test_expect_success 'setup for update-server-info' '
git init update-server-info &&
test_commit -C update-server-info message
@@ -1886,6 +1886,32 @@ test_subcommand () {
fi
}
+# Check that the given subcommand was run with the given set of
+# arguments in order (but with possible extra arguments).
+#
+# test_subcommand_flex [!] <command> <args>... < <trace>
+#
+# If the first parameter passed is !, this instead checks that
+# the given command was not called.
+#
+test_subcommand_flex () {
+ local negate=
+ if test "$1" = "!"
+ then
+ negate=t
+ shift
+ fi
+
+ local expr="$(printf '"%s".*' "$@")"
+
+ if test -n "$negate"
+ then
+ ! grep "\[$expr\]"
+ else
+ grep "\[$expr\]"
+ fi
+}
+
# Check that the given command was invoked as part of the
# trace2-format trace on stdin.
#