diff mbox series

[v2,5/5] packfile: use oidset for bad objects

Message ID fb64d00f-b44a-6a23-3ddd-027f1df83a22@web.de (mailing list archive)
State New, archived
Headers show
Series packfile: use oidset for bad objects | expand

Commit Message

René Scharfe Sept. 11, 2021, 8:43 p.m. UTC
Store the object ID of broken pack entries in an oidset instead of
keeping only their hashes in an unsorted array.  The resulting code is
shorter and easier to read.  It also handles the (hopefully) very rare
case of having a high number of bad objects better.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: René Scharfe <l.s.r@web.de>
---
 midx.c         | 10 +++-------
 object-store.h |  4 ++--
 packfile.c     | 28 ++++++----------------------
 3 files changed, 11 insertions(+), 31 deletions(-)

--
2.33.0
diff mbox series

Patch

diff --git a/midx.c b/midx.c
index 8cb063023c..76322b713c 100644
--- a/midx.c
+++ b/midx.c
@@ -307,13 +307,9 @@  int fill_midx_entry(struct repository * r,
 	if (!is_pack_valid(p))
 		return 0;

-	if (p->num_bad_objects) {
-		uint32_t i;
-		for (i = 0; i < p->num_bad_objects; i++)
-			if (hasheq(oid->hash,
-				   p->bad_object_sha1 + the_hash_algo->rawsz * i))
-				return 0;
-	}
+	if (oidset_size(&p->bad_objects) &&
+	    oidset_contains(&p->bad_objects, oid))
+		return 0;

 	e->offset = nth_midxed_offset(m, pos);
 	e->p = p;
diff --git a/object-store.h b/object-store.h
index b4dc6668aa..c7bead66f6 100644
--- a/object-store.h
+++ b/object-store.h
@@ -10,6 +10,7 @@ 
 #include "khash.h"
 #include "dir.h"
 #include "oidtree.h"
+#include "oidset.h"

 struct object_directory {
 	struct object_directory *next;
@@ -75,9 +76,8 @@  struct packed_git {
 	const void *index_data;
 	size_t index_size;
 	uint32_t num_objects;
-	uint32_t num_bad_objects;
 	uint32_t crc_offset;
-	unsigned char *bad_object_sha1;
+	struct oidset bad_objects;
 	int index_version;
 	time_t mtime;
 	int pack_fd;
diff --git a/packfile.c b/packfile.c
index 04080a558b..caba29c624 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1163,29 +1163,17 @@  int unpack_object_header(struct packed_git *p,

 void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid)
 {
-	unsigned i;
-	const unsigned hashsz = the_hash_algo->rawsz;
-	for (i = 0; i < p->num_bad_objects; i++)
-		if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i))
-			return;
-	p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
-				      st_mult(GIT_MAX_RAWSZ,
-					      st_add(p->num_bad_objects, 1)));
-	hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash);
-	p->num_bad_objects++;
+	oidset_insert(&p->bad_objects, oid);
 }

 const struct packed_git *has_packed_and_bad(struct repository *r,
 					    const struct object_id *oid)
 {
 	struct packed_git *p;
-	unsigned i;

 	for (p = r->objects->packed_git; p; p = p->next)
-		for (i = 0; i < p->num_bad_objects; i++)
-			if (hasheq(oid->hash,
-				   p->bad_object_sha1 + the_hash_algo->rawsz * i))
-				return p;
+		if (oidset_contains(&p->bad_objects, oid))
+			return p;
 	return NULL;
 }

@@ -2016,13 +2004,9 @@  static int fill_pack_entry(const struct object_id *oid,
 {
 	off_t offset;

-	if (p->num_bad_objects) {
-		unsigned i;
-		for (i = 0; i < p->num_bad_objects; i++)
-			if (hasheq(oid->hash,
-				   p->bad_object_sha1 + the_hash_algo->rawsz * i))
-				return 0;
-	}
+	if (oidset_size(&p->bad_objects) &&
+	    oidset_contains(&p->bad_objects, oid))
+		return 0;

 	offset = find_pack_entry_one(oid->hash, p);
 	if (!offset)