diff mbox series

[v2,5/7] reftable: ensure that obj_id_len is >= 2 on writing

Message ID 2bd3d44ba57ddb43c09367b45a8f056233d465e9.1645106124.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series reftable: avoid reading and writing empty keys | expand

Commit Message

Han-Wen Nienhuys Feb. 17, 2022, 1:55 p.m. UTC
From: Han-Wen Nienhuys <hanwen@google.com>

When writing the same hash many times, we might decide to use a
length-1 object ID prefix for the ObjectID => ref table, which is out
of spec.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
---
 reftable/readwrite_test.c | 37 +++++++++++++++++++++++++++++++++++++
 reftable/writer.c         |  4 +++-
 2 files changed, 40 insertions(+), 1 deletion(-)

Comments

Junio C Hamano Feb. 18, 2022, 12:01 a.m. UTC | #1
"Han-Wen Nienhuys via GitGitGadget" <gitgitgadget@gmail.com> writes:

> diff --git a/reftable/writer.c b/reftable/writer.c
> index d54215a50dc..5e4e6e93416 100644
> --- a/reftable/writer.c
> +++ b/reftable/writer.c
> @@ -515,7 +515,9 @@ static void object_record_free(void *void_arg, void *key)
>  static int writer_dump_object_index(struct reftable_writer *w)
>  {
>  	struct write_record_arg closure = { .w = w };
> -	struct common_prefix_arg common = { NULL };
> +	struct common_prefix_arg common = {
> +		.max = 1,		/* obj_id_len should be >= 2. */
> +	};

It feels somewhat strange that we have to set .max to set the floor
for the minimum length, but given the way update_common() uses and
maintains the member, it is more like "max size we have seen so
far", and by pretending that we have already seen common prefix of
length 1, we'd force ourselves that we need at least 2 to
differentiate.

>  	if (w->obj_index_tree) {
>  		infix_walk(w->obj_index_tree, &update_common, &common);
>  	}
diff mbox series

Patch

diff --git a/reftable/readwrite_test.c b/reftable/readwrite_test.c
index fd5922e55f6..35142eb070e 100644
--- a/reftable/readwrite_test.c
+++ b/reftable/readwrite_test.c
@@ -667,6 +667,42 @@  static void test_write_empty_table(void)
 	strbuf_release(&buf);
 }
 
+static void test_write_object_id_min_length(void)
+{
+	struct reftable_write_options opts = {
+		.block_size = 75,
+	};
+	struct strbuf buf = STRBUF_INIT;
+	struct reftable_writer *w =
+		reftable_new_writer(&strbuf_add_void, &buf, &opts);
+	uint8_t hash[GIT_SHA1_RAWSZ] = {42};
+	struct reftable_ref_record ref = {
+		.update_index = 1,
+		.value_type = REFTABLE_REF_VAL1,
+		.value.val1 = hash,
+	};
+	int err;
+	int i;
+
+	reftable_writer_set_limits(w, 1, 1);
+
+	/* Write the same hash in many refs. If there is only 1 hash, the
+	 * disambiguating prefix is length 0 */
+	for (i = 0; i < 256; i++) {
+		char name[256];
+		snprintf(name, sizeof(name), "ref%05d", i);
+		ref.refname = name;
+		err = reftable_writer_add_ref(w, &ref);
+		EXPECT_ERR(err);
+	}
+
+	err = reftable_writer_close(w);
+	EXPECT_ERR(err);
+	EXPECT(writer_stats(w)->object_id_len == 2);
+	reftable_writer_free(w);
+	strbuf_release(&buf);
+}
+
 static void test_write_empty_key(void)
 {
 	struct reftable_write_options opts = { 0 };
@@ -772,5 +808,6 @@  int readwrite_test_main(int argc, const char *argv[])
 	RUN_TEST(test_write_empty_key);
 	RUN_TEST(test_write_empty_table);
 	RUN_TEST(test_log_overflow);
+	RUN_TEST(test_write_object_id_min_length);
 	return 0;
 }
diff --git a/reftable/writer.c b/reftable/writer.c
index d54215a50dc..5e4e6e93416 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -515,7 +515,9 @@  static void object_record_free(void *void_arg, void *key)
 static int writer_dump_object_index(struct reftable_writer *w)
 {
 	struct write_record_arg closure = { .w = w };
-	struct common_prefix_arg common = { NULL };
+	struct common_prefix_arg common = {
+		.max = 1,		/* obj_id_len should be >= 2. */
+	};
 	if (w->obj_index_tree) {
 		infix_walk(w->obj_index_tree, &update_common, &common);
 	}