diff mbox series

[v2,3/8] refs/reftable: read references via `struct reftable_backend`

Message ID 1b50655202f311c6a6ded61d4d50b1f287761d84.1730792627.git.ps@pks.im (mailing list archive)
State New
Headers show
Series refs/reftable: reuse iterators when reading refs | expand

Commit Message

Patrick Steinhardt Nov. 5, 2024, 9:12 a.m. UTC
Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
as input instead of accepting a `struct reftable_stack`. This allows us
to implement an additional caching layer when reading refs where we can
reuse reftable iterators.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c   | 110 ++++++++++++++++++++------------------
 reftable/reftable-stack.h |   3 ++
 reftable/stack.c          |   5 ++
 3 files changed, 67 insertions(+), 51 deletions(-)

Comments

Junio C Hamano Nov. 12, 2024, 7:26 a.m. UTC | #1
Patrick Steinhardt <ps@pks.im> writes:

> Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
> as input instead of accepting a `struct reftable_stack`. This allows us
> to implement an additional caching layer when reading refs where we can
> reuse reftable iterators.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  refs/reftable-backend.c   | 110 ++++++++++++++++++++------------------
>  reftable/reftable-stack.h |   3 ++
>  reftable/stack.c          |   5 ++
>  3 files changed, 67 insertions(+), 51 deletions(-)
>
> diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
> index 4a28dc8a9d..230adb690d 100644
> --- a/refs/reftable-backend.c
> +++ b/refs/reftable-backend.c
> @@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
>  	be->stack = NULL;
>  }
>  
> +static int reftable_backend_read_ref(struct reftable_backend *be,
> +				     const char *refname,
> +				     struct object_id *oid,
> +				     struct strbuf *referent,
> +				     unsigned int *type)
> +{
> +	struct reftable_ref_record ref = {0};
> +	int ret;
> +
> +	ret = reftable_stack_read_ref(be->stack, refname, &ref);
> +	if (ret)
> +		goto done;
> +
> +	if (ref.value_type == REFTABLE_REF_SYMREF) {
> +		strbuf_reset(referent);
> +		strbuf_addstr(referent, ref.value.symref);
> +		*type |= REF_ISSYMREF;
> +	} else if (reftable_ref_record_val1(&ref)) {
> +		unsigned int hash_id;
> +
> +		switch (reftable_stack_hash_id(be->stack)) {

So, relative to the original, instead of relying on the repository
and its knowledge of what hash function is used, we ask the stack
what hash function is in use and use that instead.

> +		case REFTABLE_HASH_SHA1:
> +			hash_id = GIT_HASH_SHA1;
> +			break;
> +		case REFTABLE_HASH_SHA256:
> +			hash_id = GIT_HASH_SHA256;
> +			break;
> +		default:
> +			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
> +		}
> +
> +		oidread(oid, reftable_ref_record_val1(&ref),
> +			&hash_algos[hash_id]);
> +	} else {
> +		/* We got a tombstone, which should not happen. */
> +		BUG("unhandled reference value type %d", ref.value_type);
> +	}
> +
> +done:
> +	assert(ret != REFTABLE_API_ERROR);
> +	reftable_ref_record_release(&ref);
> +	return ret;
> +}

Here is the original that got replaced.  Since ...

> -static int read_ref_without_reload(struct reftable_ref_store *refs,
> -				   struct reftable_stack *stack,
> -				   const char *refname,
> -				   struct object_id *oid,
> -				   struct strbuf *referent,
> -				   unsigned int *type)
> -{
> -	struct reftable_ref_record ref = {0};
> -	int ret;
> -
> -	ret = reftable_stack_read_ref(stack, refname, &ref);
> -	if (ret)
> -		goto done;
> -
> -	if (ref.value_type == REFTABLE_REF_SYMREF) {
> -		strbuf_reset(referent);
> -		strbuf_addstr(referent, ref.value.symref);
> -		*type |= REF_ISSYMREF;
> -	} else if (reftable_ref_record_val1(&ref)) {
> -		oidread(oid, reftable_ref_record_val1(&ref),
> -			refs->base.repo->hash_algo);

... we have access to "refs", which is a ref_store, that knows its
repository, it was just a few pointer references away to get the
hash id of the Git side.  But of course we use REFTABLE_HASH_*NAME*
to identify the algorithm at this layer, so we need to translate it
back to the ide on the Git side before asking oidread() to read it.

> -	} else {
> -		/* We got a tombstone, which should not happen. */
> -		BUG("unhandled reference value type %d", ref.value_type);
> -	}
> -
> -done:
> -	assert(ret != REFTABLE_API_ERROR);
> -	reftable_ref_record_release(&ref);
> -	return ret;
> -}

There is one thing that is curious about this step.

It isn't like we teach stack what hash it uses in this step---the
reftable_stack_hash_id() could have been implemented as early as
59343984 (reftable/system: stop depending on "hash.h", 2024-11-08).

Other than that this step introduces the first caller of
reftable_stack_hash_id() in the series, the remaining hunks of this
patch do not have to be part of this patch, but could have been a
separate step.  Not a suggestion to split it out, but merely an
observation (to make sure I am reading the code correctly).

> diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
> index 54787f2ef5..ae14270ea7 100644
> --- a/reftable/reftable-stack.h
> +++ b/reftable/reftable-stack.h
> @@ -149,4 +149,7 @@ struct reftable_compaction_stats {
>  struct reftable_compaction_stats *
>  reftable_stack_compaction_stats(struct reftable_stack *st);
>  
> +/* Return the hash of the stack. */
> +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
> +
>  #endif
> diff --git a/reftable/stack.c b/reftable/stack.c
> index 1fffd75630..d97b64a40d 100644
> --- a/reftable/stack.c
> +++ b/reftable/stack.c
> @@ -1791,3 +1791,8 @@ int reftable_stack_clean(struct reftable_stack *st)
>  	reftable_addition_destroy(add);
>  	return err;
>  }
> +
> +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
> +{
> +	return reftable_merged_table_hash_id(st->merged);
> +}
Patrick Steinhardt Nov. 12, 2024, 9:05 a.m. UTC | #2
On Tue, Nov 12, 2024 at 04:26:38PM +0900, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
> > as input instead of accepting a `struct reftable_stack`. This allows us
> > to implement an additional caching layer when reading refs where we can
> > reuse reftable iterators.
> >
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> >  refs/reftable-backend.c   | 110 ++++++++++++++++++++------------------
> >  reftable/reftable-stack.h |   3 ++
> >  reftable/stack.c          |   5 ++
> >  3 files changed, 67 insertions(+), 51 deletions(-)
> >
> > diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
> > index 4a28dc8a9d..230adb690d 100644
> > --- a/refs/reftable-backend.c
> > +++ b/refs/reftable-backend.c
> > @@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
> >  	be->stack = NULL;
> >  }
> >  
> > +static int reftable_backend_read_ref(struct reftable_backend *be,
> > +				     const char *refname,
> > +				     struct object_id *oid,
> > +				     struct strbuf *referent,
> > +				     unsigned int *type)
> > +{
> > +	struct reftable_ref_record ref = {0};
> > +	int ret;
> > +
> > +	ret = reftable_stack_read_ref(be->stack, refname, &ref);
> > +	if (ret)
> > +		goto done;
> > +
> > +	if (ref.value_type == REFTABLE_REF_SYMREF) {
> > +		strbuf_reset(referent);
> > +		strbuf_addstr(referent, ref.value.symref);
> > +		*type |= REF_ISSYMREF;
> > +	} else if (reftable_ref_record_val1(&ref)) {
> > +		unsigned int hash_id;
> > +
> > +		switch (reftable_stack_hash_id(be->stack)) {
> 
> So, relative to the original, instead of relying on the repository
> and its knowledge of what hash function is used, we ask the stack
> what hash function is in use and use that instead.
> 
> > +		case REFTABLE_HASH_SHA1:
> > +			hash_id = GIT_HASH_SHA1;
> > +			break;
> > +		case REFTABLE_HASH_SHA256:
> > +			hash_id = GIT_HASH_SHA256;
> > +			break;
> > +		default:
> > +			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
> > +		}
> > +
> > +		oidread(oid, reftable_ref_record_val1(&ref),
> > +			&hash_algos[hash_id]);
> > +	} else {
> > +		/* We got a tombstone, which should not happen. */
> > +		BUG("unhandled reference value type %d", ref.value_type);
> > +	}
> > +
> > +done:
> > +	assert(ret != REFTABLE_API_ERROR);
> > +	reftable_ref_record_release(&ref);
> > +	return ret;
> > +}
> 
> Here is the original that got replaced.  Since ...
> 
> > -static int read_ref_without_reload(struct reftable_ref_store *refs,
> > -				   struct reftable_stack *stack,
> > -				   const char *refname,
> > -				   struct object_id *oid,
> > -				   struct strbuf *referent,
> > -				   unsigned int *type)
> > -{
> > -	struct reftable_ref_record ref = {0};
> > -	int ret;
> > -
> > -	ret = reftable_stack_read_ref(stack, refname, &ref);
> > -	if (ret)
> > -		goto done;
> > -
> > -	if (ref.value_type == REFTABLE_REF_SYMREF) {
> > -		strbuf_reset(referent);
> > -		strbuf_addstr(referent, ref.value.symref);
> > -		*type |= REF_ISSYMREF;
> > -	} else if (reftable_ref_record_val1(&ref)) {
> > -		oidread(oid, reftable_ref_record_val1(&ref),
> > -			refs->base.repo->hash_algo);
> 
> ... we have access to "refs", which is a ref_store, that knows its
> repository, it was just a few pointer references away to get the
> hash id of the Git side.  But of course we use REFTABLE_HASH_*NAME*
> to identify the algorithm at this layer, so we need to translate it
> back to the ide on the Git side before asking oidread() to read it.
> 
> > -	} else {
> > -		/* We got a tombstone, which should not happen. */
> > -		BUG("unhandled reference value type %d", ref.value_type);
> > -	}
> > -
> > -done:
> > -	assert(ret != REFTABLE_API_ERROR);
> > -	reftable_ref_record_release(&ref);
> > -	return ret;
> > -}
> 
> There is one thing that is curious about this step.
> 
> It isn't like we teach stack what hash it uses in this step---the
> reftable_stack_hash_id() could have been implemented as early as
> 59343984 (reftable/system: stop depending on "hash.h", 2024-11-08).

In theory we could've implemented it even earlier than that: the commit
only introduces the reftable-specific hashes, and we had the
Git-specific hashes available before that. Like that we wouldn't even
have to translate between the different hashes in the first place.

> Other than that this step introduces the first caller of
> reftable_stack_hash_id() in the series, the remaining hunks of this
> patch do not have to be part of this patch, but could have been a
> separate step.  Not a suggestion to split it out, but merely an
> observation (to make sure I am reading the code correctly).

Yup, your understanding matches mine.

Patrick
diff mbox series

Patch

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 4a28dc8a9d..230adb690d 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -51,6 +51,50 @@  static void reftable_backend_release(struct reftable_backend *be)
 	be->stack = NULL;
 }
 
+static int reftable_backend_read_ref(struct reftable_backend *be,
+				     const char *refname,
+				     struct object_id *oid,
+				     struct strbuf *referent,
+				     unsigned int *type)
+{
+	struct reftable_ref_record ref = {0};
+	int ret;
+
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (ret)
+		goto done;
+
+	if (ref.value_type == REFTABLE_REF_SYMREF) {
+		strbuf_reset(referent);
+		strbuf_addstr(referent, ref.value.symref);
+		*type |= REF_ISSYMREF;
+	} else if (reftable_ref_record_val1(&ref)) {
+		unsigned int hash_id;
+
+		switch (reftable_stack_hash_id(be->stack)) {
+		case REFTABLE_HASH_SHA1:
+			hash_id = GIT_HASH_SHA1;
+			break;
+		case REFTABLE_HASH_SHA256:
+			hash_id = GIT_HASH_SHA256;
+			break;
+		default:
+			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
+		}
+
+		oidread(oid, reftable_ref_record_val1(&ref),
+			&hash_algos[hash_id]);
+	} else {
+		/* We got a tombstone, which should not happen. */
+		BUG("unhandled reference value type %d", ref.value_type);
+	}
+
+done:
+	assert(ret != REFTABLE_API_ERROR);
+	reftable_ref_record_release(&ref);
+	return ret;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -243,38 +287,6 @@  static void fill_reftable_log_record(struct reftable_log_record *log, const stru
 	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
-static int read_ref_without_reload(struct reftable_ref_store *refs,
-				   struct reftable_stack *stack,
-				   const char *refname,
-				   struct object_id *oid,
-				   struct strbuf *referent,
-				   unsigned int *type)
-{
-	struct reftable_ref_record ref = {0};
-	int ret;
-
-	ret = reftable_stack_read_ref(stack, refname, &ref);
-	if (ret)
-		goto done;
-
-	if (ref.value_type == REFTABLE_REF_SYMREF) {
-		strbuf_reset(referent);
-		strbuf_addstr(referent, ref.value.symref);
-		*type |= REF_ISSYMREF;
-	} else if (reftable_ref_record_val1(&ref)) {
-		oidread(oid, reftable_ref_record_val1(&ref),
-			refs->base.repo->hash_algo);
-	} else {
-		/* We got a tombstone, which should not happen. */
-		BUG("unhandled reference value type %d", ref.value_type);
-	}
-
-done:
-	assert(ret != REFTABLE_API_ERROR);
-	reftable_ref_record_release(&ref);
-	return ret;
-}
-
 static int reftable_be_config(const char *var, const char *value,
 			      const struct config_context *ctx,
 			      void *_opts)
@@ -855,7 +867,7 @@  static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
+	ret = reftable_backend_read_ref(be, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -1074,8 +1086,8 @@  static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	if (ret)
 		goto done;
 
-	ret = read_ref_without_reload(refs, be->stack, "HEAD",
-				      &head_oid, &head_referent, &head_type);
+	ret = reftable_backend_read_ref(be, "HEAD", &head_oid,
+					&head_referent, &head_type);
 	if (ret < 0)
 		goto done;
 	ret = 0;
@@ -1143,8 +1155,8 @@  static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
-					      &current_oid, &referent, &u->type);
+		ret = reftable_backend_read_ref(be, rewritten_ref,
+						&current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
 		if (ret > 0 && !ref_update_expects_existing_old_ref(u)) {
@@ -1602,7 +1614,7 @@  struct write_create_symref_arg {
 
 struct write_copy_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	const char *oldname;
 	const char *newname;
 	const char *logmsg;
@@ -1627,7 +1639,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
 		BUG("failed splitting committer info");
 
-	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
+	if (reftable_stack_read_ref(arg->be->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
 	}
@@ -1666,7 +1678,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * the old branch and the creation of the new branch, and we cannot do
 	 * two changes to a reflog in a single update.
 	 */
-	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack);
+	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->be->stack);
 	if (arg->delete_old)
 		creation_ts++;
 	reftable_writer_set_limits(writer, deletion_ts, creation_ts);
@@ -1709,8 +1721,8 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
-		ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid,
-					      &head_referent, &head_type);
+		ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid,
+						&head_referent, &head_type);
 		if (ret < 0)
 			goto done;
 		append_head_reflog = (head_type & REF_ISSYMREF) && !strcmp(head_referent.buf, arg->oldname);
@@ -1753,7 +1765,7 @@  static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * copy over all log entries from the old reflog. Last but not least,
 	 * when renaming we also have to delete all the old reflog entries.
 	 */
-	ret = reftable_stack_init_log_iterator(arg->stack, &it);
+	ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -1826,7 +1838,6 @@  static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1840,11 +1851,10 @@  static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1858,7 +1868,6 @@  static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1871,11 +1880,10 @@  static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
index 54787f2ef5..ae14270ea7 100644
--- a/reftable/reftable-stack.h
+++ b/reftable/reftable-stack.h
@@ -149,4 +149,7 @@  struct reftable_compaction_stats {
 struct reftable_compaction_stats *
 reftable_stack_compaction_stats(struct reftable_stack *st);
 
+/* Return the hash of the stack. */
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
+
 #endif
diff --git a/reftable/stack.c b/reftable/stack.c
index 1fffd75630..d97b64a40d 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1791,3 +1791,8 @@  int reftable_stack_clean(struct reftable_stack *st)
 	reftable_addition_destroy(add);
 	return err;
 }
+
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
+{
+	return reftable_merged_table_hash_id(st->merged);
+}