@@ -15,7 +15,7 @@ commitGraph.readChangedPaths::
commitGraph.changedPathsVersion::
Specifies the version of the changed-path Bloom filters that Git will read and
- write. May be -1, 0 or 1. Note that values greater than 1 may be
+ write. May be -1, 0, 1, or 2. Note that values greater than 1 may be
incompatible with older versions of Git which do not yet understand
those versions. Use caution when operating in a mixed-version
environment.
@@ -31,4 +31,7 @@ filters when instructed to write.
If 1, Git will only read version 1 Bloom filters, and will write version 1
Bloom filters.
+
+If 2, Git will only read version 2 Bloom filters, and will write version 2
+Bloom filters.
++
See linkgit:git-commit-graph[1] for more information.
@@ -100,7 +100,64 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
* Not considered to be cryptographically secure.
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
*/
-uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
+uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
+{
+ const uint32_t c1 = 0xcc9e2d51;
+ const uint32_t c2 = 0x1b873593;
+ const uint32_t r1 = 15;
+ const uint32_t r2 = 13;
+ const uint32_t m = 5;
+ const uint32_t n = 0xe6546b64;
+ int i;
+ uint32_t k1 = 0;
+ const char *tail;
+
+ int len4 = len / sizeof(uint32_t);
+
+ uint32_t k;
+ for (i = 0; i < len4; i++) {
+ uint32_t byte1 = (uint32_t)(unsigned char)data[4*i];
+ uint32_t byte2 = ((uint32_t)(unsigned char)data[4*i + 1]) << 8;
+ uint32_t byte3 = ((uint32_t)(unsigned char)data[4*i + 2]) << 16;
+ uint32_t byte4 = ((uint32_t)(unsigned char)data[4*i + 3]) << 24;
+ k = byte1 | byte2 | byte3 | byte4;
+ k *= c1;
+ k = rotate_left(k, r1);
+ k *= c2;
+
+ seed ^= k;
+ seed = rotate_left(seed, r2) * m + n;
+ }
+
+ tail = (data + len4 * sizeof(uint32_t));
+
+ switch (len & (sizeof(uint32_t) - 1)) {
+ case 3:
+ k1 ^= ((uint32_t)(unsigned char)tail[2]) << 16;
+ /*-fallthrough*/
+ case 2:
+ k1 ^= ((uint32_t)(unsigned char)tail[1]) << 8;
+ /*-fallthrough*/
+ case 1:
+ k1 ^= ((uint32_t)(unsigned char)tail[0]) << 0;
+ k1 *= c1;
+ k1 = rotate_left(k1, r1);
+ k1 *= c2;
+ seed ^= k1;
+ break;
+ }
+
+ seed ^= (uint32_t)len;
+ seed ^= (seed >> 16);
+ seed *= 0x85ebca6b;
+ seed ^= (seed >> 13);
+ seed *= 0xc2b2ae35;
+ seed ^= (seed >> 16);
+
+ return seed;
+}
+
+static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len)
{
const uint32_t c1 = 0xcc9e2d51;
const uint32_t c2 = 0x1b873593;
@@ -165,8 +222,14 @@ void fill_bloom_key(const char *data,
int i;
const uint32_t seed0 = 0x293ae76f;
const uint32_t seed1 = 0x7e646e2c;
- const uint32_t hash0 = murmur3_seeded(seed0, data, len);
- const uint32_t hash1 = murmur3_seeded(seed1, data, len);
+ uint32_t hash0, hash1;
+ if (settings->hash_version == 2) {
+ hash0 = murmur3_seeded_v2(seed0, data, len);
+ hash1 = murmur3_seeded_v2(seed1, data, len);
+ } else {
+ hash0 = murmur3_seeded_v1(seed0, data, len);
+ hash1 = murmur3_seeded_v1(seed1, data, len);
+ }
key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t));
for (i = 0; i < settings->num_hashes; i++)
@@ -8,9 +8,11 @@ struct commit_graph;
struct bloom_filter_settings {
/*
* The version of the hashing technique being used.
- * We currently only support version = 1 which is
+ * The newest version is 2, which is
* the seeded murmur3 hashing technique implemented
- * in bloom.c.
+ * in bloom.c. Bloom filters of version 1 were created
+ * with prior versions of Git, which had a bug in the
+ * implementation of the hash function.
*/
uint32_t hash_version;
@@ -81,7 +83,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
* Not considered to be cryptographically secure.
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
*/
-uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
+uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);
void fill_bloom_key(const char *data,
size_t len,
@@ -344,7 +344,6 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
size_t chunk_size, void *data)
{
struct commit_graph *g = data;
- uint32_t hash_version;
if (chunk_size < BLOOMDATA_CHUNK_HEADER_SIZE) {
warning(_("ignoring too-small changed-path chunk"
@@ -356,10 +355,9 @@ static int graph_read_bloom_data(const unsigned char *chunk_start,
g->chunk_bloom_data = chunk_start;
g->chunk_bloom_data_size = chunk_size;
- hash_version = get_be32(chunk_start);
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
- g->bloom_filter_settings->hash_version = hash_version;
+ g->bloom_filter_settings->hash_version = get_be32(chunk_start);
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
@@ -2499,6 +2497,13 @@ int write_commit_graph(struct object_directory *odb,
}
if (!commit_graph_compatible(r))
return 0;
+ if (r->settings.commit_graph_changed_paths_version < -1
+ || r->settings.commit_graph_changed_paths_version > 2) {
+ warning(_("attempting to write a commit-graph, but "
+ "'commitGraph.changedPathsVersion' (%d) is not supported"),
+ r->settings.commit_graph_changed_paths_version);
+ return 0;
+ }
CALLOC_ARRAY(ctx, 1);
ctx->r = r;
@@ -2541,7 +2546,7 @@ int write_commit_graph(struct object_directory *odb,
g = ctx->r->objects->commit_graph;
/* We have changed-paths already. Keep them in the next graph */
- if (g && g->chunk_bloom_data) {
+ if (g && g->bloom_filter_settings) {
ctx->changed_paths = 1;
/* don't propagate the hash_version unless unspecified */
@@ -49,6 +49,7 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
static const char *bloom_usage = "\n"
" test-tool bloom get_murmur3 <string>\n"
+" test-tool bloom get_murmur3_seven_highbit\n"
" test-tool bloom generate_filter <string> [<string>...]\n"
" test-tool bloom get_filter_for_commit <commit-hex>\n";
@@ -63,7 +64,13 @@ int cmd__bloom(int argc, const char **argv)
uint32_t hashed;
if (argc < 3)
usage(bloom_usage);
- hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
+ hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2]));
+ printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
+ }
+
+ if (!strcmp(argv[1], "get_murmur3_seven_highbit")) {
+ uint32_t hashed;
+ hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7);
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}
@@ -29,6 +29,14 @@ test_expect_success 'compute unseeded murmur3 hash for test string 2' '
test_cmp expect actual
'
+test_expect_success 'compute unseeded murmur3 hash for test string 3' '
+ cat >expect <<-\EOF &&
+ Murmur3 Hash with seed=0:0xa183ccfd
+ EOF
+ test-tool bloom get_murmur3_seven_highbit >actual &&
+ test_cmp expect actual
+'
+
test_expect_success 'compute bloom key for empty string' '
cat >expect <<-\EOF &&
Hashes:0x5615800c|0x5b966560|0x61174ab4|0x66983008|0x6c19155c|0x7199fab0|0x771ae004|
@@ -488,14 +488,49 @@ test_expect_success 'merge graph layers with incompatible Bloom settings' '
test_must_be_empty err
'
+# chosen to be the same under all Unicode normalization forms
+CENT=$(printf "\302\242")
+
+test_expect_success 'ensure Bloom filter with incompatible versions are ignored' '
+ rm "$repo/$graph" &&
+
+ git -C $repo log --oneline --no-decorate -- $CENT >expect &&
+
+ # Compute v1 Bloom filters for commits at the bottom.
+ git -C $repo rev-parse HEAD^ >in &&
+ git -C $repo commit-graph write --stdin-commits --changed-paths \
+ --split <in &&
+
+ # Compute v2 Bloomfilters for the rest of the commits at the top.
+ git -C $repo rev-parse HEAD >in &&
+ git -C $repo -c commitGraph.changedPathsVersion=2 commit-graph write \
+ --stdin-commits --changed-paths --split=no-merge <in &&
+
+ test_line_count = 2 $repo/$chain &&
+
+ git -C $repo log --oneline --no-decorate -- $CENT >actual 2>err &&
+ test_cmp expect actual &&
+
+ layer="$(head -n 1 $repo/$chain)" &&
+ cat >expect.err <<-EOF &&
+ warning: disabling Bloom filters for commit-graph layer $SQ$layer$SQ due to incompatible settings
+ EOF
+ test_cmp expect.err err &&
+
+ # Merge the two layers with incompatible bloom filter versions,
+ # ensuring that the v2 filters are used.
+ >trace2.txt &&
+ GIT_TRACE2_EVENT="$(pwd)/trace2.txt" \
+ git -C $repo -c commitGraph.changedPathsVersion=2 commit-graph write --reachable --changed-paths 2>err &&
+ grep "disabling Bloom filters for commit-graph layer .$layer." err &&
+ grep "{\"hash_version\":2,\"num_hashes\":7,\"bits_per_entry\":10,\"max_changed_paths\":512" trace2.txt
+'
+
get_first_changed_path_filter () {
test-tool read-graph bloom-filters >filters.dat &&
head -n 1 filters.dat
}
-# chosen to be the same under all Unicode normalization forms
-CENT=$(printf "\302\242")
-
test_expect_success 'set up repo with high bit path, version 1 changed-path' '
git init highbit1 &&
test_commit -C highbit1 c1 "$CENT" &&
@@ -539,6 +574,120 @@ test_expect_success 'version 1 changed-path used when version 1 requested' '
)
'
+test_expect_success 'version 1 changed-path not used when version 2 requested' '
+ (
+ cd highbit1 &&
+ git config --add commitGraph.changedPathsVersion 2 &&
+ test_bloom_filters_not_used "-- another$CENT"
+ )
+'
+
+test_expect_success 'version 1 changed-path used when autodetect requested' '
+ (
+ cd highbit1 &&
+ git config --add commitGraph.changedPathsVersion -1 &&
+ test_bloom_filters_used "-- another$CENT"
+ )
+'
+
+test_expect_success 'when writing another commit graph, preserve existing version 1 of changed-path' '
+ test_commit -C highbit1 c1double "$CENT$CENT" &&
+ git -C highbit1 commit-graph write --reachable --changed-paths &&
+ (
+ cd highbit1 &&
+ git config --add commitGraph.changedPathsVersion -1 &&
+ echo "options: bloom(1,10,7) read_generation_data" >expect &&
+ test-tool read-graph >full &&
+ grep options full >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success 'set up repo with high bit path, version 2 changed-path' '
+ git init highbit2 &&
+ git -C highbit2 config --add commitGraph.changedPathsVersion 2 &&
+ test_commit -C highbit2 c2 "$CENT" &&
+ git -C highbit2 commit-graph write --reachable --changed-paths
+'
+
+test_expect_success 'check value of version 2 changed-path' '
+ (
+ cd highbit2 &&
+ echo "c01f" >expect &&
+ get_first_changed_path_filter >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success 'setup make another commit' '
+ # "git log" does not use Bloom filters for root commits - see how, in
+ # revision.c, rev_compare_tree() (the only code path that eventually calls
+ # get_bloom_filter()) is only called by try_to_simplify_commit() when the commit
+ # has one parent. Therefore, make another commit so that we perform the tests on
+ # a non-root commit.
+ test_commit -C highbit2 anotherc2 "another$CENT"
+'
+
+test_expect_success 'version 2 changed-path used when version 2 requested' '
+ (
+ cd highbit2 &&
+ test_bloom_filters_used "-- another$CENT"
+ )
+'
+
+test_expect_success 'version 2 changed-path not used when version 1 requested' '
+ (
+ cd highbit2 &&
+ git config --add commitGraph.changedPathsVersion 1 &&
+ test_bloom_filters_not_used "-- another$CENT"
+ )
+'
+
+test_expect_success 'version 2 changed-path used when autodetect requested' '
+ (
+ cd highbit2 &&
+ git config --add commitGraph.changedPathsVersion -1 &&
+ test_bloom_filters_used "-- another$CENT"
+ )
+'
+
+test_expect_success 'when writing another commit graph, preserve existing version 2 of changed-path' '
+ test_commit -C highbit2 c2double "$CENT$CENT" &&
+ git -C highbit2 commit-graph write --reachable --changed-paths &&
+ (
+ cd highbit2 &&
+ git config --add commitGraph.changedPathsVersion -1 &&
+ echo "options: bloom(2,10,7) read_generation_data" >expect &&
+ test-tool read-graph >full &&
+ grep options full >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success 'when writing commit graph, do not reuse changed-path of another version' '
+ git init doublewrite &&
+ test_commit -C doublewrite c "$CENT" &&
+ git -C doublewrite config --add commitGraph.changedPathsVersion 1 &&
+ git -C doublewrite commit-graph write --reachable --changed-paths &&
+ for v in -2 3
+ do
+ git -C doublewrite config --add commitGraph.changedPathsVersion $v &&
+ git -C doublewrite commit-graph write --reachable --changed-paths 2>err &&
+ cat >expect <<-EOF &&
+ warning: attempting to write a commit-graph, but ${SQ}commitGraph.changedPathsVersion${SQ} ($v) is not supported
+ EOF
+ test_cmp expect err || return 1
+ done &&
+ git -C doublewrite config --add commitGraph.changedPathsVersion 2 &&
+ git -C doublewrite commit-graph write --reachable --changed-paths &&
+ (
+ cd doublewrite &&
+ echo "c01f" >expect &&
+ get_first_changed_path_filter >actual &&
+ test_cmp expect actual
+ )
+'
+
corrupt_graph () {
test_when_finished "rm -rf $graph" &&
git commit-graph write --reachable --changed-paths &&