diff mbox series

[v2,16/17] chunk-format: restore duplicate chunk checks

Message ID 669eeec707ab92a3e5983ad12baddc2c15012d43.1611759716.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series Refactor chunk-format into an API | expand

Commit Message

Derrick Stolee Jan. 27, 2021, 3:01 p.m. UTC
From: Derrick Stolee <dstolee@microsoft.com>

Before refactoring into the chunk-format API, the commit-graph parsing
logic included checks for duplicate chunks. It is unlikely that we would
desire a chunk-based file format that allows duplicate chunk IDs in the
table of contents, so add duplicate checks into
read_table_of_contents().

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
---
 chunk-format.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

Comments

Junio C Hamano Feb. 5, 2021, 12:05 a.m. UTC | #1
"Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Derrick Stolee <dstolee@microsoft.com>
>
> Before refactoring into the chunk-format API, the commit-graph parsing
> logic included checks for duplicate chunks. It is unlikely that we would
> desire a chunk-based file format that allows duplicate chunk IDs in the
> table of contents, so add duplicate checks into
> read_table_of_contents().

Makes sense.  This answers a question I had while reading one of the
previous steps about the design, I think.

However...

> diff --git a/chunk-format.c b/chunk-format.c
> index 74501084cf8..1ee875df423 100644
> --- a/chunk-format.c
> +++ b/chunk-format.c
> @@ -14,6 +14,7 @@ struct chunk_info {
>  	chunk_write_fn write_fn;
>  
>  	const void *start;
> +	unsigned found:1;

This defines a .found member ...

> @@ -98,6 +99,7 @@ int read_table_of_contents(struct chunkfile *cf,
>  			   uint64_t toc_offset,
>  			   int toc_length)
>  {
> +	int i;
>  	uint32_t chunk_id;
>  	const unsigned char *table_of_contents = mfile + toc_offset;
>  
> @@ -124,6 +126,14 @@ int read_table_of_contents(struct chunkfile *cf,
>  			return -1;
>  		}
>  
> +		for (i = 0; i < cf->chunks_nr; i++) {
> +			if (cf->chunks[i].id == chunk_id) {
> +				error(_("duplicate chunk ID %"PRIx32" found"),
> +					chunk_id);
> +				return -1;
> +			}
> +		}
> +
>  		cf->chunks[cf->chunks_nr].id = chunk_id;
>  		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
>  		cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;

... and no new code touches it.

The way duplicate is found is by having a inner loop that checks the
IDs of chunks we've seen so far (quadratic, but presumably that
would not matter as long as we'd be dealing with just half a dozen
chunk types).

Is the .found bit used for something else and needs to be added in a
different step?
Derrick Stolee Feb. 5, 2021, 12:31 p.m. UTC | #2
On 2/4/2021 7:05 PM, Junio C Hamano wrote:
> "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>  	const void *start;
>> +	unsigned found:1;
> 
> This defines a .found member ...
>> ... and no new code touches it.
> 
> The way duplicate is found is by having a inner loop that checks the
> IDs of chunks we've seen so far (quadratic, but presumably that
> would not matter as long as we'd be dealing with just half a dozen
> chunk types).
> 
> Is the .found bit used for something else and needs to be added in a
> different step?

Nope. It is just noise that I should have caught and deleted.

Thanks,
-Stolee
diff mbox series

Patch

diff --git a/chunk-format.c b/chunk-format.c
index 74501084cf8..1ee875df423 100644
--- a/chunk-format.c
+++ b/chunk-format.c
@@ -14,6 +14,7 @@  struct chunk_info {
 	chunk_write_fn write_fn;
 
 	const void *start;
+	unsigned found:1;
 };
 
 struct chunkfile {
@@ -98,6 +99,7 @@  int read_table_of_contents(struct chunkfile *cf,
 			   uint64_t toc_offset,
 			   int toc_length)
 {
+	int i;
 	uint32_t chunk_id;
 	const unsigned char *table_of_contents = mfile + toc_offset;
 
@@ -124,6 +126,14 @@  int read_table_of_contents(struct chunkfile *cf,
 			return -1;
 		}
 
+		for (i = 0; i < cf->chunks_nr; i++) {
+			if (cf->chunks[i].id == chunk_id) {
+				error(_("duplicate chunk ID %"PRIx32" found"),
+					chunk_id);
+				return -1;
+			}
+		}
+
 		cf->chunks[cf->chunks_nr].id = chunk_id;
 		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
 		cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;