Message ID | 106dd51f75699fbf4fc1e46687124995f5ef0278.1607012215.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Refactor chunk-format into an API | expand |
On 12/3/2020 11:16 AM, Derrick Stolee via GitGitGadget wrote: > From: Derrick Stolee <dstolee@microsoft.com> > > The previous change introduced read_table_of_contents() in the > chunk-format API, but dropped the duplicate chunk check from the > commit-graph parsing logic. This was done to keep flexibility in the > chunk-format API. "keep flexibility" is bogus. This is the biggest YAGNI of this series. Instead, consider the patch below instead which restores duplicate checks for the commit-graph file AND adds them to the multi-pack-index file due to the shared API. This is also roughly half of the added lines from the previous patch. Thanks, -Stolee -- >8 -- From 0df4959d59d7f9df3e9f6326bb0acb7b84f84980 Mon Sep 17 00:00:00 2001 From: Derrick Stolee <dstolee@microsoft.com> Date: Mon, 7 Dec 2020 08:36:42 -0500 Subject: [PATCH] chunk-format: restore duplicate chunk checks Before refactoring into the chunk-format API, the commit-graph parsing logic included checks for duplicate chunks. It is unlikely that we would desire a chunk-based file format that allows duplicate chunk IDs in the table of contents, so add duplicate checks into read_table_of_contents(). Signed-off-by: Derrick Stolee <dstolee@microsoft.com> --- chunk-format.c | 15 ++++++++++++++- chunk-format.h | 3 +++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/chunk-format.c b/chunk-format.c index d888ef6ec73..a4891bbd28a 100644 --- a/chunk-format.c +++ b/chunk-format.c @@ -57,9 +57,13 @@ int read_table_of_contents(const unsigned char *mfile, int nr, void *data) { + int i; uint32_t chunk_id; const unsigned char *table_of_contents = mfile + toc_offset; + for (i = 0; i < nr; i++) + chunks[i].found = 0; + while (toc_length--) { int i; uint64_t chunk_offset, next_chunk_offset; @@ -83,7 +87,16 @@ int read_table_of_contents(const unsigned char *mfile, } for (i = 0; i < nr; i++) { if (chunks[i].id == chunk_id) { - int result = chunks[i].read_fn( + int result; + + if (chunks[i].found) { + error(_("duplicate chunk ID %"PRIx32" found"), + chunk_id); + return 1; + } + + chunks[i].found = 1; + result = chunks[i].read_fn( mfile + chunk_offset, next_chunk_offset - chunk_offset, data); diff --git a/chunk-format.h b/chunk-format.h index 7049800f734..de45797223a 100644 --- a/chunk-format.h +++ b/chunk-format.h @@ -56,6 +56,9 @@ typedef int (*chunk_read_fn)(const unsigned char *chunk_start, struct read_chunk_info { uint32_t id; chunk_read_fn read_fn; + + /* used internally */ + unsigned found:1; }; int read_table_of_contents(const unsigned char *mfile,
diff --git a/commit-graph.c b/commit-graph.c index 0a3ba147df..c0102fceba 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -289,10 +289,20 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 0; } +static int report_duplicate(void) +{ + warning(_("duplicate chunk detected")); + return 1; +} + static int graph_read_oid_fanout(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = (struct commit_graph *)data; + + if (g->chunk_oid_fanout) + return report_duplicate(); + g->chunk_oid_fanout = (uint32_t*)chunk_start; return 0; } @@ -301,6 +311,10 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = (struct commit_graph *)data; + + if (g->chunk_oid_lookup) + return report_duplicate(); + g->chunk_oid_lookup = chunk_start; g->num_commits = chunk_size / g->hash_len; return 0; @@ -310,6 +324,10 @@ static int graph_read_data(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = (struct commit_graph *)data; + + if (g->chunk_commit_data) + return report_duplicate(); + g->chunk_commit_data = chunk_start; return 0; } @@ -318,6 +336,10 @@ static int graph_read_extra_edges(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = (struct commit_graph *)data; + + if (g->chunk_extra_edges) + return report_duplicate(); + g->chunk_extra_edges = chunk_start; return 0; } @@ -326,6 +348,10 @@ static int graph_read_base_graphs(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = (struct commit_graph *)data; + + if (g->chunk_base_graphs) + return report_duplicate(); + g->chunk_base_graphs = chunk_start; return 0; } @@ -334,6 +360,10 @@ static int graph_read_bloom_indices(const unsigned char *chunk_start, size_t chunk_size, void *data) { struct commit_graph *g = (struct commit_graph *)data; + + if (g->chunk_bloom_indexes) + return report_duplicate(); + g->chunk_bloom_indexes = chunk_start; return 0; } @@ -343,6 +373,10 @@ static int graph_read_bloom_data(const unsigned char *chunk_start, { struct commit_graph *g = (struct commit_graph *)data; uint32_t hash_version; + + if (g->chunk_bloom_data) + return report_duplicate(); + g->chunk_bloom_data = chunk_start; hash_version = get_be32(chunk_start);