@@ -1,6 +1,7 @@
#include "git-compat-util.h"
#include "chunk-format.h"
#include "csum-file.h"
+#include "cache.h"
#define CHUNK_LOOKUP_WIDTH 12
void write_table_of_contents(struct hashfile *f,
@@ -47,3 +48,58 @@ int write_chunks(struct hashfile *f,
return 0;
}
+
+int read_table_of_contents(const unsigned char *mfile,
+ size_t mfile_size,
+ uint64_t toc_offset,
+ int toc_length,
+ struct read_chunk_info *chunks,
+ int nr,
+ void *data)
+{
+ uint32_t chunk_id;
+ const unsigned char *table_of_contents = mfile + toc_offset;
+
+ while (toc_length--) {
+ int i;
+ uint64_t chunk_offset, next_chunk_offset;
+
+ chunk_id = get_be32(table_of_contents);
+ chunk_offset = get_be64(table_of_contents + 4);
+
+ if (!chunk_id) {
+ error(_("terminating chunk id appears earlier than expected"));
+ return 1;
+ }
+
+ table_of_contents += CHUNK_LOOKUP_WIDTH;
+ next_chunk_offset = get_be64(table_of_contents + 4);
+
+ if (next_chunk_offset < chunk_offset ||
+ next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
+ error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
+ chunk_offset, next_chunk_offset);
+ return 1;
+ }
+ for (i = 0; i < nr; i++) {
+ if (chunks[i].id == chunk_id) {
+ int result = chunks[i].read_fn(
+ mfile + chunk_offset,
+ next_chunk_offset - chunk_offset,
+ data);
+
+ if (result)
+ return result;
+ break;
+ }
+ }
+ }
+
+ chunk_id = get_be32(table_of_contents);
+ if (chunk_id) {
+ error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
+ return 1;
+ }
+
+ return 0;
+}
@@ -46,4 +46,24 @@ int write_chunks(struct hashfile *f,
int nr,
void *data);
+/*
+ * When reading a table of contents, we find the chunk with matching 'id'
+ * then call its read_fn to populate the necessary 'data' based on the
+ * chunk start and size.
+ */
+typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
+ size_t chunk_size, void *data);
+struct read_chunk_info {
+ uint32_t id;
+ chunk_read_fn read_fn;
+};
+
+int read_table_of_contents(const unsigned char *mfile,
+ size_t mfile_size,
+ uint64_t toc_offset,
+ int toc_length,
+ struct read_chunk_info *chunks,
+ int nr,
+ void *data);
+
#endif
@@ -289,15 +289,114 @@ static int verify_commit_graph_lite(struct commit_graph *g)
return 0;
}
+static int graph_read_oid_fanout(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ g->chunk_oid_fanout = (uint32_t*)chunk_start;
+ return 0;
+}
+
+static int graph_read_oid_lookup(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ g->chunk_oid_lookup = chunk_start;
+ g->num_commits = chunk_size / g->hash_len;
+ return 0;
+}
+
+static int graph_read_data(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ g->chunk_commit_data = chunk_start;
+ return 0;
+}
+
+static int graph_read_extra_edges(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ g->chunk_extra_edges = chunk_start;
+ return 0;
+}
+
+static int graph_read_base_graphs(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ g->chunk_base_graphs = chunk_start;
+ return 0;
+}
+
+static int graph_read_bloom_indices(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ g->chunk_bloom_indexes = chunk_start;
+ return 0;
+}
+
+static int graph_read_bloom_data(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct commit_graph *g = (struct commit_graph *)data;
+ uint32_t hash_version;
+ g->chunk_bloom_data = chunk_start;
+ hash_version = get_be32(chunk_start);
+
+ if (hash_version != 1)
+ return 0;
+
+ g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
+ g->bloom_filter_settings->hash_version = hash_version;
+ g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
+ g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
+ g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
+
+ return 0;
+}
+
+static struct read_chunk_info read_chunks[] = {
+ [0] = {
+ GRAPH_CHUNKID_OIDFANOUT,
+ graph_read_oid_fanout
+ },
+ [1] = {
+ GRAPH_CHUNKID_OIDLOOKUP,
+ graph_read_oid_lookup
+ },
+ [2] = {
+ GRAPH_CHUNKID_DATA,
+ graph_read_data
+ },
+ [3] = {
+ GRAPH_CHUNKID_EXTRAEDGES,
+ graph_read_extra_edges
+ },
+ [4] = {
+ GRAPH_CHUNKID_BASE,
+ graph_read_base_graphs
+ },
+ [5] = {
+ GRAPH_CHUNKID_BLOOMINDEXES,
+ graph_read_bloom_indices
+ },
+ [6] = {
+ GRAPH_CHUNKID_BLOOMDATA,
+ graph_read_bloom_data
+ }
+};
+
struct commit_graph *parse_commit_graph(struct repository *r,
void *graph_map, size_t graph_size)
{
- const unsigned char *data, *chunk_lookup;
- uint32_t i;
+ const unsigned char *data;
struct commit_graph *graph;
- uint64_t next_chunk_offset;
uint32_t graph_signature;
unsigned char graph_version, hash_version;
+ int chunks_nr = MAX_NUM_CHUNKS;
if (!graph_map)
return NULL;
@@ -346,95 +445,14 @@ struct commit_graph *parse_commit_graph(struct repository *r,
return NULL;
}
- chunk_lookup = data + 8;
- next_chunk_offset = get_be64(chunk_lookup + 4);
- for (i = 0; i < graph->num_chunks; i++) {
- uint32_t chunk_id;
- uint64_t chunk_offset = next_chunk_offset;
- int chunk_repeated = 0;
-
- chunk_id = get_be32(chunk_lookup + 0);
-
- chunk_lookup += GRAPH_CHUNKLOOKUP_WIDTH;
- next_chunk_offset = get_be64(chunk_lookup + 4);
-
- if (chunk_offset > graph_size - the_hash_algo->rawsz) {
- error(_("commit-graph improper chunk offset %08x%08x"), (uint32_t)(chunk_offset >> 32),
- (uint32_t)chunk_offset);
- goto free_and_return;
- }
-
- switch (chunk_id) {
- case GRAPH_CHUNKID_OIDFANOUT:
- if (graph->chunk_oid_fanout)
- chunk_repeated = 1;
- else
- graph->chunk_oid_fanout = (uint32_t*)(data + chunk_offset);
- break;
-
- case GRAPH_CHUNKID_OIDLOOKUP:
- if (graph->chunk_oid_lookup)
- chunk_repeated = 1;
- else {
- graph->chunk_oid_lookup = data + chunk_offset;
- graph->num_commits = (next_chunk_offset - chunk_offset)
- / graph->hash_len;
- }
- break;
-
- case GRAPH_CHUNKID_DATA:
- if (graph->chunk_commit_data)
- chunk_repeated = 1;
- else
- graph->chunk_commit_data = data + chunk_offset;
- break;
+ /* limit the chunk-format list if we are ignoring Bloom filters */
+ if (!r->settings.commit_graph_read_changed_paths)
+ chunks_nr = 5;
- case GRAPH_CHUNKID_EXTRAEDGES:
- if (graph->chunk_extra_edges)
- chunk_repeated = 1;
- else
- graph->chunk_extra_edges = data + chunk_offset;
- break;
-
- case GRAPH_CHUNKID_BASE:
- if (graph->chunk_base_graphs)
- chunk_repeated = 1;
- else
- graph->chunk_base_graphs = data + chunk_offset;
- break;
-
- case GRAPH_CHUNKID_BLOOMINDEXES:
- if (graph->chunk_bloom_indexes)
- chunk_repeated = 1;
- else if (r->settings.commit_graph_read_changed_paths)
- graph->chunk_bloom_indexes = data + chunk_offset;
- break;
-
- case GRAPH_CHUNKID_BLOOMDATA:
- if (graph->chunk_bloom_data)
- chunk_repeated = 1;
- else if (r->settings.commit_graph_read_changed_paths) {
- uint32_t hash_version;
- graph->chunk_bloom_data = data + chunk_offset;
- hash_version = get_be32(data + chunk_offset);
-
- if (hash_version != 1)
- break;
-
- graph->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
- graph->bloom_filter_settings->hash_version = hash_version;
- graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
- graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
- graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
- }
- break;
- }
-
- if (chunk_repeated) {
- error(_("commit-graph chunk id %08x appears multiple times"), chunk_id);
- goto free_and_return;
- }
- }
+ if (read_table_of_contents(
+ graph->data, graph_size, GRAPH_HEADER_SIZE, graph->num_chunks,
+ read_chunks, chunks_nr, graph))
+ goto free_and_return;
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
init_bloom_filters();
@@ -54,6 +54,74 @@ static char *get_midx_filename(const char *object_dir)
return xstrfmt("%s/pack/multi-pack-index", object_dir);
}
+static int midx_read_pack_names(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct multi_pack_index *m = (struct multi_pack_index *)data;
+ m->chunk_pack_names = chunk_start;
+ return 0;
+}
+
+static int midx_read_oid_fanout(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct multi_pack_index *m = (struct multi_pack_index *)data;
+ m->chunk_oid_fanout = (uint32_t *)chunk_start;
+
+ if (chunk_size != 4 * 256) {
+ error(_("multi-pack-index OID fanout is of the wrong size"));
+ return 1;
+ }
+ return 0;
+}
+
+static int midx_read_oid_lookup(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct multi_pack_index *m = (struct multi_pack_index *)data;
+ m->chunk_oid_lookup = chunk_start;
+ return 0;
+}
+
+static int midx_read_offsets(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct multi_pack_index *m = (struct multi_pack_index *)data;
+ m->chunk_object_offsets = chunk_start;
+ return 0;
+}
+
+static int midx_read_large_offsets(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct multi_pack_index *m = (struct multi_pack_index *)data;
+ m->chunk_large_offsets = chunk_start;
+ return 0;
+}
+
+static struct read_chunk_info read_chunks[] = {
+ [0] = {
+ MIDX_CHUNKID_PACKNAMES,
+ midx_read_pack_names
+ },
+ [1] = {
+ MIDX_CHUNKID_OIDFANOUT,
+ midx_read_oid_fanout
+ },
+ [2] = {
+ MIDX_CHUNKID_OIDLOOKUP,
+ midx_read_oid_lookup
+ },
+ [3] = {
+ MIDX_CHUNKID_OBJECTOFFSETS,
+ midx_read_offsets
+ },
+ [4] = {
+ MIDX_CHUNKID_LARGEOFFSETS,
+ midx_read_large_offsets
+ }
+};
+
struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local)
{
struct multi_pack_index *m = NULL;
@@ -114,48 +182,10 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
- for (i = 0; i < m->num_chunks; i++) {
- uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE +
- MIDX_CHUNKLOOKUP_WIDTH * i);
- uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
- MIDX_CHUNKLOOKUP_WIDTH * i);
-
- if (chunk_offset >= m->data_len)
- die(_("invalid chunk offset (too large)"));
-
- switch (chunk_id) {
- case MIDX_CHUNKID_PACKNAMES:
- m->chunk_pack_names = m->data + chunk_offset;
- break;
-
- case MIDX_CHUNKID_OIDFANOUT:
- m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset);
- break;
-
- case MIDX_CHUNKID_OIDLOOKUP:
- m->chunk_oid_lookup = m->data + chunk_offset;
- break;
-
- case MIDX_CHUNKID_OBJECTOFFSETS:
- m->chunk_object_offsets = m->data + chunk_offset;
- break;
-
- case MIDX_CHUNKID_LARGEOFFSETS:
- m->chunk_large_offsets = m->data + chunk_offset;
- break;
-
- case 0:
- die(_("terminating multi-pack-index chunk id appears earlier than expected"));
- break;
-
- default:
- /*
- * Do nothing on unrecognized chunks, allowing future
- * extensions to add optional chunks.
- */
- break;
- }
- }
+ if (read_table_of_contents(m->data, midx_size, MIDX_HEADER_SIZE,
+ m->num_chunks, read_chunks,
+ MIDX_MAX_CHUNKS, m))
+ goto cleanup_fail;
if (!m->chunk_pack_names)
die(_("multi-pack-index missing required pack-name chunk"));
@@ -563,7 +563,7 @@ test_expect_success 'detect bad hash version' '
test_expect_success 'detect low chunk count' '
corrupt_graph_and_verify $GRAPH_BYTE_CHUNK_COUNT "\01" \
- "missing the .* chunk"
+ "final chunk has non-zero id"
'
test_expect_success 'detect missing OID fanout chunk' '
@@ -314,12 +314,12 @@ test_expect_success 'verify bad OID version' '
test_expect_success 'verify truncated chunk count' '
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\01" $objdir \
- "missing required"
+ "final chunk has non-zero id"
'
test_expect_success 'verify extended chunk count' '
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_COUNT "\07" $objdir \
- "terminating multi-pack-index chunk id appears earlier than expected"
+ "terminating chunk id appears earlier than expected"
'
test_expect_success 'verify missing required chunk' '
@@ -329,7 +329,7 @@ test_expect_success 'verify missing required chunk' '
test_expect_success 'verify invalid chunk offset' '
corrupt_midx_and_verify $MIDX_BYTE_CHUNK_OFFSET "\01" $objdir \
- "invalid chunk offset (too large)"
+ "improper chunk offset(s)"
'
test_expect_success 'verify packnames out of order' '