diff mbox series

[v2,6/8] midx-write.c: support reading an existing MIDX with `packs_to_include`

Message ID fe187b1939d9c74782306cd3e4fcd2f25451ad69.1717023301.git.me@ttaylorr.com (mailing list archive)
State Accepted
Commit d6a8c58675a04abe2597179e3332bf031bd6e48a
Headers show
Series midx-write: miscellaneous clean-ups for incremental MIDXs | expand

Commit Message

Taylor Blau May 29, 2024, 10:55 p.m. UTC
Avoid unconditionally copying all packs from an existing MIDX into a new
MIDX by checking that packs added via `fill_packs_from_midx()` don't
appear in the `to_include` set, if one was provided.

Do so by calling `should_include_pack()` from both `add_pack_to_midx()`
and `fill_packs_from_midx()`.

In order to make this work, teach `should_include_pack()` a new
"exclude_from_midx" parameter, which allows skipping the first check.
This is done so that the caller in `fill_packs_from_midx()` doesn't
reject all of the packs it provided since they appear in an existing
MIDX by definition.

The sum total of this change is that we are now able to read and
reference objects in an existing MIDX even when given a non-NULL
`packs_to_include`. This is a prerequisite step for incremental MIDXs,
which need to load any existing MIDX (if one is present) in order to
determine whether or not an object already appears in an earlier portion
of the MIDX to avoid duplicating it across multiple portions.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
 midx-write.c | 42 +++++++++++-------------------------------
 1 file changed, 11 insertions(+), 31 deletions(-)

Comments

Jeff King May 30, 2024, 7:05 a.m. UTC | #1
On Wed, May 29, 2024 at 06:55:39PM -0400, Taylor Blau wrote:

>  static int should_include_pack(const struct write_midx_context *ctx,
> -			       const char *file_name)
> +			       const char *file_name,
> +			       int exclude_from_midx)

Hmm, so we grow a new flag...

>  {
> -	/*
> -	 * Note that at most one of ctx->m and ctx->to_include are set,
> -	 * so we are testing midx_contains_pack() and
> -	 * string_list_has_string() independently (guarded by the
> -	 * appropriate NULL checks).
> -	 *
> -	 * We could support passing to_include while reusing an existing
> -	 * MIDX, but don't currently since the reuse process drags
> -	 * forward all packs from an existing MIDX (without checking
> -	 * whether or not they appear in the to_include list).
> -	 *
> -	 * If we added support for that, these next two conditional
> -	 * should be performed independently (likely checking
> -	 * to_include before the existing MIDX).
> -	 */
> -	if (ctx->m && midx_contains_pack(ctx->m, file_name))
> +	if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
>  		return 0;
> -	else if (ctx->to_include &&
> -		 !string_list_has_string(ctx->to_include, file_name))
> +	if (ctx->to_include && !string_list_has_string(ctx->to_include,
> +						       file_name))
>  		return 0;

...that disables half of what the function is checking.

It makes me wonder if it should just be two functions (or if the callers
should just inline them, since they are themselves basically
one-liners). But I think this is getting into bike-shedding territory.
I'm OK with it as-is.

-Peff
diff mbox series

Patch

diff --git a/midx-write.c b/midx-write.c
index c93e1ae305..8b96c566e7 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -101,27 +101,13 @@  struct write_midx_context {
 };
 
 static int should_include_pack(const struct write_midx_context *ctx,
-			       const char *file_name)
+			       const char *file_name,
+			       int exclude_from_midx)
 {
-	/*
-	 * Note that at most one of ctx->m and ctx->to_include are set,
-	 * so we are testing midx_contains_pack() and
-	 * string_list_has_string() independently (guarded by the
-	 * appropriate NULL checks).
-	 *
-	 * We could support passing to_include while reusing an existing
-	 * MIDX, but don't currently since the reuse process drags
-	 * forward all packs from an existing MIDX (without checking
-	 * whether or not they appear in the to_include list).
-	 *
-	 * If we added support for that, these next two conditional
-	 * should be performed independently (likely checking
-	 * to_include before the existing MIDX).
-	 */
-	if (ctx->m && midx_contains_pack(ctx->m, file_name))
+	if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
 		return 0;
-	else if (ctx->to_include &&
-		 !string_list_has_string(ctx->to_include, file_name))
+	if (ctx->to_include && !string_list_has_string(ctx->to_include,
+						       file_name))
 		return 0;
 	return 1;
 }
@@ -135,7 +121,7 @@  static void add_pack_to_midx(const char *full_path, size_t full_path_len,
 	if (ends_with(file_name, ".idx")) {
 		display_progress(ctx->progress, ++ctx->pack_paths_checked);
 
-		if (!should_include_pack(ctx, file_name))
+		if (!should_include_pack(ctx, file_name, 1))
 			return;
 
 		ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
@@ -894,6 +880,9 @@  static int fill_packs_from_midx(struct write_midx_context *ctx,
 	uint32_t i;
 
 	for (i = 0; i < ctx->m->num_packs; i++) {
+		if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
+			continue;
+
 		ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);
 
 		if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
@@ -948,15 +937,7 @@  static int write_midx_internal(const char *object_dir,
 		die_errno(_("unable to create leading directories of %s"),
 			  midx_name.buf);
 
-	if (!packs_to_include) {
-		/*
-		 * Only reference an existing MIDX when not filtering which
-		 * packs to include, since all packs and objects are copied
-		 * blindly from an existing MIDX if one is present.
-		 */
-		ctx.m = lookup_multi_pack_index(the_repository, object_dir);
-	}
-
+	ctx.m = lookup_multi_pack_index(the_repository, object_dir);
 	if (ctx.m && !midx_checksum_valid(ctx.m)) {
 		warning(_("ignoring existing multi-pack-index; checksum mismatch"));
 		ctx.m = NULL;
@@ -965,6 +946,7 @@  static int write_midx_internal(const char *object_dir,
 	ctx.nr = 0;
 	ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
 	ctx.info = NULL;
+	ctx.to_include = packs_to_include;
 	ALLOC_ARRAY(ctx.info, ctx.alloc);
 
 	if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
@@ -981,8 +963,6 @@  static int write_midx_internal(const char *object_dir,
 	else
 		ctx.progress = NULL;
 
-	ctx.to_include = packs_to_include;
-
 	for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
 	stop_progress(&ctx.progress);