diff mbox series

[v2,4/7] reftable: avoid writing empty keys at the block layer

Message ID ba036ee8543b2dc28ac046eb0c8c0aef9e751c80.1645106124.git.gitgitgadget@gmail.com (mailing list archive)
State Superseded
Headers show
Series reftable: avoid reading and writing empty keys | expand

Commit Message

Han-Wen Nienhuys Feb. 17, 2022, 1:55 p.m. UTC
From: Han-Wen Nienhuys <hanwen@google.com>

The public interface (reftable_writer) already ensures that keys are
written in strictly increasing order, and an empty key by definition
fails this check.

However, by also enforcing this at the block layer, it is easier to
verify that records (which are written into blocks) never have to
consider the possibility of empty keys.

Signed-off-by: Han-Wen Nienhuys <hanwen@google.com>
---
 reftable/block.c      | 27 +++++++++++++++++----------
 reftable/block_test.c |  5 +++++
 reftable/writer.c     |  3 +--
 3 files changed, 23 insertions(+), 12 deletions(-)

Comments

Junio C Hamano Feb. 17, 2022, 11:55 p.m. UTC | #1
"Han-Wen Nienhuys via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -105,8 +106,14 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
>  	int is_restart = 0;
>  	struct strbuf key = STRBUF_INIT;
>  	int n = 0;
> +	int err = -1;
>  
>  	reftable_record_key(rec, &key);
> +	if (!key.len) {
> +		err = REFTABLE_API_ERROR;
> +		goto done;
> +	}

OK; we get an API_ERROR when trying to write a bad one.  And ...

> @@ -332,6 +334,9 @@ int block_iter_next(struct block_iter *it, struct reftable_record *rec)
>  	if (n < 0)
>  		return -1;
>  
> +	if (!key.len)
> +		return REFTABLE_FORMAT_ERROR;

... we get a FORMAT_ERROR when the data we try to read is bad
(i.e. not our fault).  OK.

> @@ -358,6 +363,8 @@ int block_reader_first_key(struct block_reader *br, struct strbuf *key)
>  	int n = reftable_decode_key(key, &extra, empty, in);
>  	if (n < 0)
>  		return n;
> +	if (!key->len)
> +		return -1;

It is curious that this gets a different error out of the same
sequence, i.e. decode-key did not return an error but the length of
the key happens to be 0, not FORMAT_ERROR.

> diff --git a/reftable/writer.c b/reftable/writer.c
> index 944c2329ab5..d54215a50dc 100644
> --- a/reftable/writer.c
> +++ b/reftable/writer.c
> @@ -240,14 +240,13 @@ static int writer_add_record(struct reftable_writer *w,
>  
>  	writer_reinit_block_writer(w, reftable_record_type(rec));
>  	err = block_writer_add(w->block_writer, rec);
> -	if (err < 0) {
> +	if (err == -1) {
>  		/* we are writing into memory, so an error can only mean it
>  		 * doesn't fit. */
>  		err = REFTABLE_ENTRY_TOO_BIG_ERROR;
>  		goto done;
>  	}
>  
> -	err = 0;

Is this "doesn't fit" related to "we catch 0-length keys", or an
unrelated fix was included in this step by "rebase -i" mistake?
Han-Wen Nienhuys Feb. 21, 2022, 2:32 p.m. UTC | #2
On Fri, Feb 18, 2022 at 12:55 AM Junio C Hamano <gitster@pobox.com> wrote:
> > @@ -358,6 +363,8 @@ int block_reader_first_key(struct block_reader *br, struct strbuf *key)
> >       int n = reftable_decode_key(key, &extra, empty, in);
> >       if (n < 0)
> >               return n;
> > +     if (!key->len)
> > +             return -1;
>
> It is curious that this gets a different error out of the same
> sequence, i.e. decode-key did not return an error but the length of
> the key happens to be 0, not FORMAT_ERROR.

fixed.

> > --- a/reftable/writer.c
> > +++ b/reftable/writer.c
> > @@ -240,14 +240,13 @@ static int writer_add_record(struct reftable_writer *w,
> >
> >       writer_reinit_block_writer(w, reftable_record_type(rec));
> >       err = block_writer_add(w->block_writer, rec);
> > -     if (err < 0) {
> > +     if (err == -1) {
> >               /* we are writing into memory, so an error can only mean it
> >                * doesn't fit. */
> >               err = REFTABLE_ENTRY_TOO_BIG_ERROR;
> >               goto done;
> >       }
> >
> > -     err = 0;
>
> Is this "doesn't fit" related to "we catch 0-length keys", or an
> unrelated fix was included in this step by "rebase -i" mistake?

We don't want to reinterpret API_ERROR (from block_writer_add) as
ENTRY_TOO_BIG_ERROR, so we have to tweak the condition here.
diff mbox series

Patch

diff --git a/reftable/block.c b/reftable/block.c
index 2170748c5e9..4a095afe1e2 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -88,8 +88,9 @@  uint8_t block_writer_type(struct block_writer *bw)
 	return bw->buf[bw->header_off];
 }
 
-/* adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
-   success */
+/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
+   success. Returns REFTABLE_API_ERROR if attempting to write a record with
+   empty key. */
 int block_writer_add(struct block_writer *w, struct reftable_record *rec)
 {
 	struct strbuf empty = STRBUF_INIT;
@@ -105,8 +106,14 @@  int block_writer_add(struct block_writer *w, struct reftable_record *rec)
 	int is_restart = 0;
 	struct strbuf key = STRBUF_INIT;
 	int n = 0;
+	int err = -1;
 
 	reftable_record_key(rec, &key);
+	if (!key.len) {
+		err = REFTABLE_API_ERROR;
+		goto done;
+	}
+
 	n = reftable_encode_key(&is_restart, out, last, key,
 				reftable_record_val_type(rec));
 	if (n < 0)
@@ -118,16 +125,11 @@  int block_writer_add(struct block_writer *w, struct reftable_record *rec)
 		goto done;
 	string_view_consume(&out, n);
 
-	if (block_writer_register_restart(w, start.len - out.len, is_restart,
-					  &key) < 0)
-		goto done;
-
-	strbuf_release(&key);
-	return 0;
-
+	err = block_writer_register_restart(w, start.len - out.len, is_restart,
+					    &key);
 done:
 	strbuf_release(&key);
-	return -1;
+	return err;
 }
 
 int block_writer_finish(struct block_writer *w)
@@ -332,6 +334,9 @@  int block_iter_next(struct block_iter *it, struct reftable_record *rec)
 	if (n < 0)
 		return -1;
 
+	if (!key.len)
+		return REFTABLE_FORMAT_ERROR;
+
 	string_view_consume(&in, n);
 	n = reftable_record_decode(rec, key, extra, in, it->br->hash_size);
 	if (n < 0)
@@ -358,6 +363,8 @@  int block_reader_first_key(struct block_reader *br, struct strbuf *key)
 	int n = reftable_decode_key(key, &extra, empty, in);
 	if (n < 0)
 		return n;
+	if (!key->len)
+		return -1;
 
 	return 0;
 }
diff --git a/reftable/block_test.c b/reftable/block_test.c
index fa2ee092ec0..cb88af4a563 100644
--- a/reftable/block_test.c
+++ b/reftable/block_test.c
@@ -42,6 +42,11 @@  static void test_block_read_write(void)
 	block_writer_init(&bw, BLOCK_TYPE_REF, block.data, block_size,
 			  header_off, hash_size(GIT_SHA1_FORMAT_ID));
 
+	rec.u.ref.refname = "";
+	rec.u.ref.value_type = REFTABLE_REF_DELETION;
+	n = block_writer_add(&bw, &rec);
+	EXPECT(n == REFTABLE_API_ERROR);
+
 	for (i = 0; i < N; i++) {
 		char name[100];
 		uint8_t hash[GIT_SHA1_RAWSZ];
diff --git a/reftable/writer.c b/reftable/writer.c
index 944c2329ab5..d54215a50dc 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -240,14 +240,13 @@  static int writer_add_record(struct reftable_writer *w,
 
 	writer_reinit_block_writer(w, reftable_record_type(rec));
 	err = block_writer_add(w->block_writer, rec);
-	if (err < 0) {
+	if (err == -1) {
 		/* we are writing into memory, so an error can only mean it
 		 * doesn't fit. */
 		err = REFTABLE_ENTRY_TOO_BIG_ERROR;
 		goto done;
 	}
 
-	err = 0;
 done:
 	strbuf_release(&key);
 	return err;