diff mbox series

[3/5] make object_directory.loose_objects_subdir_seen a bitmap

Message ID 20210627024718.25383-4-e@80x24.org (mailing list archive)
State New, archived
Headers show
Series optimizations for many odb alternates | expand

Commit Message

Eric Wong June 27, 2021, 2:47 a.m. UTC
There's no point in using 8 bits per-directory when 1 bit
will do.  This saves us 224 bytes per object directory, which
ends up being 22MB when dealing with 100K alternates.

Signed-off-by: Eric Wong <e@80x24.org>
---
 object-file.c  | 10 +++++++---
 object-store.h |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

Comments

René Scharfe June 27, 2021, 10:23 a.m. UTC | #1
Am 27.06.21 um 04:47 schrieb Eric Wong:
> There's no point in using 8 bits per-directory when 1 bit
> will do.  This saves us 224 bytes per object directory, which
> ends up being 22MB when dealing with 100K alternates.

The point was simplicity under the assumption that the number of
repositories is low -- for most users it's only one.  That obviously
doesn't hold for your use case anymore. :)

A compact representation should also reduce dcache misses, so this
should be a win for the single-repo case as well.

> Signed-off-by: Eric Wong <e@80x24.org>
> ---
>  object-file.c  | 10 +++++++---
>  object-store.h |  2 +-
>  2 files changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/object-file.c b/object-file.c
> index 6be43c2b60..2c8b9c05f9 100644
> --- a/object-file.c
> +++ b/object-file.c
> @@ -2463,12 +2463,16 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
>  {
>  	int subdir_nr = oid->hash[0];
>  	struct strbuf buf = STRBUF_INIT;
> +	size_t BM_SIZE = sizeof(odb->loose_objects_subdir_seen[0]) * CHAR_BIT;

With that name I'd expect the variable to contain the number of bytes or
bits in the whole bitmap.  And to not be a variable at all, but rather a
macro.  Perhaps word_bits?

bitsizeof() does the same and is slightly shorter.

> +	uint32_t *bitmap;

Ah, you call the array items bitmap, which they are.  Hmm.  I rather
think of the whole thing as a bitmap and its uint32_t elements as words.
Does it matter?  Not sure.

> +	uint32_t bit = 1 << (subdir_nr % BM_SIZE);

I'd call that mask, but bit is fine as well..

Anyway, it would look something like this:

	size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]);
	size_t word_index = subdir_nr / word_bits;
	size_t mask = 1 << (subdir_nr % word_bits);

>
>  	if (subdir_nr < 0 ||
> -	    subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen))
> +	    subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen) * BM_SIZE)

bitsizeof(odb->loose_objects_subdir_seen) would be easier to read and
understand, I think.

>  		BUG("subdir_nr out of range");
>
> -	if (odb->loose_objects_subdir_seen[subdir_nr])
> +	bitmap = &odb->loose_objects_subdir_seen[subdir_nr / BM_SIZE];
> +	if (*bitmap & bit)
>  		return &odb->loose_objects_cache[subdir_nr];
>
>  	strbuf_addstr(&buf, odb->path);
> @@ -2476,7 +2480,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
>  				    append_loose_object,
>  				    NULL, NULL,
>  				    &odb->loose_objects_cache[subdir_nr]);
> -	odb->loose_objects_subdir_seen[subdir_nr] = 1;
> +	*bitmap |= bit;
>  	strbuf_release(&buf);
>  	return &odb->loose_objects_cache[subdir_nr];
>  }
> diff --git a/object-store.h b/object-store.h
> index 20c1cedb75..8fcddf3e65 100644
> --- a/object-store.h
> +++ b/object-store.h
> @@ -22,7 +22,7 @@ struct object_directory {
>  	 *
>  	 * Be sure to call odb_load_loose_cache() before using.
>  	 */
> -	char loose_objects_subdir_seen[256];
> +	uint32_t loose_objects_subdir_seen[8]; /* 256 bits */

Perhaps	DIV_ROUND_UP(256, bitsizeof(uint32_t))?  The comment explains
it nicely already, though.

>  	struct oid_array loose_objects_cache[256];
>
>  	/*
>

Summary: Good idea, the implementation looks correct, I stumbled
over some of the names, bitsizeof() could be used.

René
Eric Wong June 28, 2021, 11:09 p.m. UTC | #2
René Scharfe <l.s.r@web.de> wrote:
> Am 27.06.21 um 04:47 schrieb Eric Wong:
> Anyway, it would look something like this:
> 
> 	size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]);
> 	size_t word_index = subdir_nr / word_bits;
> 	size_t mask = 1 << (subdir_nr % word_bits);

<snip> yeah, I missed bitsizeof :x

> > --- a/object-store.h
> > +++ b/object-store.h
> > @@ -22,7 +22,7 @@ struct object_directory {
> >  	 *
> >  	 * Be sure to call odb_load_loose_cache() before using.
> >  	 */
> > -	char loose_objects_subdir_seen[256];
> > +	uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
> 
> Perhaps	DIV_ROUND_UP(256, bitsizeof(uint32_t))?  The comment explains
> it nicely already, though.

I think I'll keep my original there...  IMHO the macros
obfuscate the meaning for those less familiar with our codebase
(myself included :x)

> >  	struct oid_array loose_objects_cache[256];
> >
> >  	/*
> >
> 
> Summary: Good idea, the implementation looks correct, I stumbled
> over some of the names, bitsizeof() could be used.

Thanks for the review.

I'll squash up the following for v2 while awaiting feedback
for the rest of the series:

diff --git a/object-file.c b/object-file.c
index d33b84c4a4..6c397fb4f1 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2463,16 +2463,17 @@ struct oidtree *odb_loose_cache(struct object_directory *odb,
 {
 	int subdir_nr = oid->hash[0];
 	struct strbuf buf = STRBUF_INIT;
-	size_t BM_SIZE = sizeof(odb->loose_objects_subdir_seen[0]) * CHAR_BIT;
+	size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]);
+	size_t word_index = subdir_nr / word_bits;
+	size_t mask = 1 << (subdir_nr % word_bits);
 	uint32_t *bitmap;
-	uint32_t bit = 1 << (subdir_nr % BM_SIZE);
 
 	if (subdir_nr < 0 ||
-	    subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen) * BM_SIZE)
+	    subdir_nr >= bitsizeof(odb->loose_objects_subdir_seen))
 		BUG("subdir_nr out of range");
 
-	bitmap = &odb->loose_objects_subdir_seen[subdir_nr / BM_SIZE];
-	if (*bitmap & bit)
+	bitmap = &odb->loose_objects_subdir_seen[word_index];
+	if (*bitmap & mask)
 		return &odb->loose_objects_cache;
 
 	strbuf_addstr(&buf, odb->path);
@@ -2480,7 +2481,7 @@ struct oidtree *odb_loose_cache(struct object_directory *odb,
 				    append_loose_object,
 				    NULL, NULL,
 				    &odb->loose_objects_cache);
-	*bitmap |= bit;
+	*bitmap |= mask;
 	strbuf_release(&buf);
 	return &odb->loose_objects_cache;
 }
diff mbox series

Patch

diff --git a/object-file.c b/object-file.c
index 6be43c2b60..2c8b9c05f9 100644
--- a/object-file.c
+++ b/object-file.c
@@ -2463,12 +2463,16 @@  struct oid_array *odb_loose_cache(struct object_directory *odb,
 {
 	int subdir_nr = oid->hash[0];
 	struct strbuf buf = STRBUF_INIT;
+	size_t BM_SIZE = sizeof(odb->loose_objects_subdir_seen[0]) * CHAR_BIT;
+	uint32_t *bitmap;
+	uint32_t bit = 1 << (subdir_nr % BM_SIZE);
 
 	if (subdir_nr < 0 ||
-	    subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen))
+	    subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen) * BM_SIZE)
 		BUG("subdir_nr out of range");
 
-	if (odb->loose_objects_subdir_seen[subdir_nr])
+	bitmap = &odb->loose_objects_subdir_seen[subdir_nr / BM_SIZE];
+	if (*bitmap & bit)
 		return &odb->loose_objects_cache[subdir_nr];
 
 	strbuf_addstr(&buf, odb->path);
@@ -2476,7 +2480,7 @@  struct oid_array *odb_loose_cache(struct object_directory *odb,
 				    append_loose_object,
 				    NULL, NULL,
 				    &odb->loose_objects_cache[subdir_nr]);
-	odb->loose_objects_subdir_seen[subdir_nr] = 1;
+	*bitmap |= bit;
 	strbuf_release(&buf);
 	return &odb->loose_objects_cache[subdir_nr];
 }
diff --git a/object-store.h b/object-store.h
index 20c1cedb75..8fcddf3e65 100644
--- a/object-store.h
+++ b/object-store.h
@@ -22,7 +22,7 @@  struct object_directory {
 	 *
 	 * Be sure to call odb_load_loose_cache() before using.
 	 */
-	char loose_objects_subdir_seen[256];
+	uint32_t loose_objects_subdir_seen[8]; /* 256 bits */
 	struct oid_array loose_objects_cache[256];
 
 	/*