diff mbox series

[v2,04/10] packfile: inline cache_or_unpack_entry

Message ID 20240823224630.1180772-5-e@80x24.org (mailing list archive)
State New
Headers show
Series cat-file speedups | expand

Commit Message

Eric Wong Aug. 23, 2024, 10:46 p.m. UTC
We need to check delta_base_cache anyways to fill in the
`whence' field in `struct object_info'.  Inlining (and getting
rid of) cache_or_unpack_entry() makes it easier to only do the
hashmap lookup once and avoid a redundant lookup later on.

This code reorganization will also make an optimization to
use the cache entry directly easier to implement in the next
commit.

Signed-off-by: Eric Wong <e@80x24.org>
---
 packfile.c | 48 +++++++++++++++++++++---------------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

Comments

Junio C Hamano Aug. 26, 2024, 5:09 p.m. UTC | #1
Eric Wong <e@80x24.org> writes:

> We need to check delta_base_cache anyways to fill in the
> `whence' field in `struct object_info'.  Inlining (and getting
> rid of) cache_or_unpack_entry() makes it easier to only do the
> hashmap lookup once and avoid a redundant lookup later on.
>
> This code reorganization will also make an optimization to
> use the cache entry directly easier to implement in the next
> commit.

"cache entry" -> "cached entry"; we tend to use "cache entry"
exclusively to mean an entry in the in-core index structure,
and not the cached objects held in the object layer.

>  {
>  	struct pack_window *w_curs = NULL;
> -	unsigned long size;
>  	off_t curpos = obj_offset;
>  	enum object_type type;
> +	struct delta_base_cache_entry *ent;
>  
>  	/*
>  	 * We always get the representation type, but only convert it to
>  	 * a "real" type later if the caller is interested.
>  	 */
> -	if (oi->contentp && !oi->content_limit) {
> -		*oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,
> -						      &type);
> +	oi->whence = OI_PACKED;
> +	ent = get_delta_base_cache_entry(p, obj_offset);
> +	if (ent) {
> +		oi->whence = OI_DBCACHED;

OK.  This is very straight-forward.  It is packed but if we grabbed
it from the delta-base-cache, that is the only case we know it is
dbcached.

> +		type = ent->type;
> +		if (oi->sizep)
> +			*oi->sizep = ent->size;
> +		if (oi->contentp) {
> +			if (!oi->content_limit ||
> +					ent->size <= oi->content_limit)
> +				*oi->contentp = xmemdupz(ent->data, ent->size);
> +			else
> +				*oi->contentp = NULL; /* caller must stream */

This assignment of NULL is more explicit than the original; is it
because the original assumed that *(oi->contentp) is initialized to
NULL if oi->contentp asks us to give the contents?

> +	} else if (oi->contentp && !oi->content_limit) {
> +		*oi->contentp = unpack_entry(r, p, obj_offset, &type,
> +						oi->sizep);
>  		if (!*oi->contentp)
>  			type = OBJ_BAD;

Nice.  The code structure is still easy to follow, even though the
if/else cascade here are organized differently with more cases (used
to be "are we peeking the contents, or not?"---now it is "do this if
we can grab from the delta base cache, do one of these other things
if we have go to the packfile").
Eric Wong Oct. 6, 2024, 5:40 p.m. UTC | #2
Junio C Hamano <gitster@pobox.com> wrote:
> Eric Wong <e@80x24.org> writes:
> 
> > We need to check delta_base_cache anyways to fill in the
> > `whence' field in `struct object_info'.  Inlining (and getting
> > rid of) cache_or_unpack_entry() makes it easier to only do the
> > hashmap lookup once and avoid a redundant lookup later on.
> >
> > This code reorganization will also make an optimization to
> > use the cache entry directly easier to implement in the next
> > commit.
> 
> "cache entry" -> "cached entry"; we tend to use "cache entry"
> exclusively to mean an entry in the in-core index structure,
> and not the cached objects held in the object layer.

OK, will do for v3 (apologies for the delay, Real Life is awful :<).

> > +		type = ent->type;
> > +		if (oi->sizep)
> > +			*oi->sizep = ent->size;
> > +		if (oi->contentp) {
> > +			if (!oi->content_limit ||
> > +					ent->size <= oi->content_limit)
> > +				*oi->contentp = xmemdupz(ent->data, ent->size);
> > +			else
> > +				*oi->contentp = NULL; /* caller must stream */
> 
> This assignment of NULL is more explicit than the original; is it
> because the original assumed that *(oi->contentp) is initialized to
> NULL if oi->contentp asks us to give the contents?

The original code unconditionally assigned cache_or_unpack_entry() result
to *oi->contentp, and there's a bunch of non-cat-file callers which pass
a non-NULL *oi->contentp and expect packed_object_info() to NULL it.
diff mbox series

Patch

diff --git a/packfile.c b/packfile.c
index 8ec86d2d69..0a90a5ed67 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1444,23 +1444,6 @@  static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent)
 	free(ent);
 }
 
-static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p,
-				   off_t base_offset, unsigned long *base_size,
-				   enum object_type *type)
-{
-	struct delta_base_cache_entry *ent;
-
-	ent = get_delta_base_cache_entry(p, base_offset);
-	if (!ent)
-		return unpack_entry(r, p, base_offset, type, base_size);
-
-	if (type)
-		*type = ent->type;
-	if (base_size)
-		*base_size = ent->size;
-	return xmemdupz(ent->data, ent->size);
-}
-
 static inline void release_delta_base_cache(struct delta_base_cache_entry *ent)
 {
 	free(ent->data);
@@ -1521,20 +1504,35 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 		       off_t obj_offset, struct object_info *oi)
 {
 	struct pack_window *w_curs = NULL;
-	unsigned long size;
 	off_t curpos = obj_offset;
 	enum object_type type;
+	struct delta_base_cache_entry *ent;
 
 	/*
 	 * We always get the representation type, but only convert it to
 	 * a "real" type later if the caller is interested.
 	 */
-	if (oi->contentp && !oi->content_limit) {
-		*oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep,
-						      &type);
+	oi->whence = OI_PACKED;
+	ent = get_delta_base_cache_entry(p, obj_offset);
+	if (ent) {
+		oi->whence = OI_DBCACHED;
+		type = ent->type;
+		if (oi->sizep)
+			*oi->sizep = ent->size;
+		if (oi->contentp) {
+			if (!oi->content_limit ||
+					ent->size <= oi->content_limit)
+				*oi->contentp = xmemdupz(ent->data, ent->size);
+			else
+				*oi->contentp = NULL; /* caller must stream */
+		}
+	} else if (oi->contentp && !oi->content_limit) {
+		*oi->contentp = unpack_entry(r, p, obj_offset, &type,
+						oi->sizep);
 		if (!*oi->contentp)
 			type = OBJ_BAD;
 	} else {
+		unsigned long size;
 		type = unpack_object_header(p, &w_curs, &curpos, &size);
 
 		if (oi->sizep) {
@@ -1558,8 +1556,8 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 
 		if (oi->contentp) {
 			if (oi->sizep && *oi->sizep <= oi->content_limit) {
-				*oi->contentp = cache_or_unpack_entry(r, p, obj_offset,
-								      oi->sizep, &type);
+				*oi->contentp = unpack_entry(r, p, obj_offset,
+							&type, oi->sizep);
 				if (!*oi->contentp)
 					type = OBJ_BAD;
 			} else {
@@ -1608,10 +1606,6 @@  int packed_object_info(struct repository *r, struct packed_git *p,
 		} else
 			oidclr(oi->delta_base_oid, the_repository->hash_algo);
 	}
-
-	oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
-							  OI_PACKED;
-
 out:
 	unuse_pack(&w_curs);
 	return type;