diff mbox series

[07/30] path-walk: allow consumer to specify object types

Message ID 2829fe3875438f3a9907f36d825d6c24952abded.1725935335.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series Path-walk API and applications | expand

Commit Message

Derrick Stolee Sept. 10, 2024, 2:28 a.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

This adds the ability to ask for the commits as a single list. This will
also reduce the calls in 'git backfill' to be a BUG() statement if called
with anything other than blobs.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 builtin/backfill.c |  2 +-
 path-walk.c        | 40 ++++++++++++++++++++++++++++++++++------
 path-walk.h        | 12 +++++++++++-
 3 files changed, 46 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/builtin/backfill.c b/builtin/backfill.c
index 82a18e58a41..2a1b043f188 100644
--- a/builtin/backfill.c
+++ b/builtin/backfill.c
@@ -61,7 +61,7 @@  static int fill_missing_blobs(const char *path,
 	struct backfill_context *ctx = data;
 
 	if (type != OBJ_BLOB)
-		return 0;
+		BUG("fill_missing_blobs only takes blob objects");
 
 	for (size_t i = 0; i < list->nr; i++) {
 		off_t size = 0;
diff --git a/path-walk.c b/path-walk.c
index dc2390dd9ea..d70e6840fb5 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -83,6 +83,10 @@  static int add_children(struct path_walk_context *ctx,
 		if (S_ISGITLINK(entry.mode))
 			continue;
 
+		/* If the caller doesn't want blobs, then don't bother. */
+		if (!ctx->info->blobs && type == OBJ_BLOB)
+			continue;
+
 		if (type == OBJ_TREE) {
 			struct tree *child = lookup_tree(ctx->repo, &entry.oid);
 			o = child ? &child->object : NULL;
@@ -156,9 +160,11 @@  static int walk_path(struct path_walk_context *ctx,
 
 	list = strmap_get(&ctx->paths_to_lists, path);
 
-	/* Evaluate function pointer on this data. */
-	ret = ctx->info->path_fn(path, &list->oids, list->type,
-				 ctx->info->path_fn_data);
+	/* Evaluate function pointer on this data, if requested. */
+	if ((list->type == OBJ_TREE && ctx->info->trees) ||
+	    (list->type == OBJ_BLOB && ctx->info->blobs))
+		ret = ctx->info->path_fn(path, &list->oids, list->type,
+					ctx->info->path_fn_data);
 
 	/* Expand data for children. */
 	if (list->type == OBJ_TREE) {
@@ -200,6 +206,7 @@  int walk_objects_by_path(struct path_walk_info *info)
 	size_t commits_nr = 0, paths_nr = 0;
 	struct commit *c;
 	struct type_and_oid_list *root_tree_list;
+	struct type_and_oid_list *commit_list;
 	struct path_walk_context ctx = {
 		.repo = info->revs->repo,
 		.revs = info->revs,
@@ -210,28 +217,49 @@  int walk_objects_by_path(struct path_walk_info *info)
 
 	trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
 
+	CALLOC_ARRAY(commit_list, 1);
+	commit_list->type = OBJ_COMMIT;
+
 	/* Insert a single list for the root tree into the paths. */
 	CALLOC_ARRAY(root_tree_list, 1);
 	root_tree_list->type = OBJ_TREE;
 	strmap_put(&ctx.paths_to_lists, root_path, root_tree_list);
-
 	if (prepare_revision_walk(info->revs))
 		die(_("failed to setup revision walk"));
 
 	while ((c = get_revision(info->revs))) {
-		struct object_id *oid = get_commit_tree_oid(c);
-		struct tree *t = lookup_tree(info->revs->repo, oid);
+		struct object_id *oid;
+		struct tree *t;
 		commits_nr++;
 
+		if (info->commits)
+			oid_array_append(&commit_list->oids,
+					 &c->object.oid);
+
+		/* If we only care about commits, then skip trees. */
+		if (!info->trees && !info->blobs)
+			continue;
+
+		oid = get_commit_tree_oid(c);
+		t = lookup_tree(info->revs->repo, oid);
+
 		if (t)
 			oid_array_append(&root_tree_list->oids, oid);
 		else
 			warning("could not find tree %s", oid_to_hex(oid));
+
 	}
 
 	trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
 	trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
 
+	/* Track all commits. */
+	if (info->commits)
+		ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
+				    info->path_fn_data);
+	oid_array_clear(&commit_list->oids);
+	free(commit_list);
+
 	string_list_append(&ctx.path_stack, root_path);
 
 	trace2_region_enter("path-walk", "path-walk", info->revs->repo);
diff --git a/path-walk.h b/path-walk.h
index bc1ebba5081..49b982dade6 100644
--- a/path-walk.h
+++ b/path-walk.h
@@ -32,6 +32,14 @@  struct path_walk_info {
 	path_fn path_fn;
 	void *path_fn_data;
 
+	/**
+	 * Initialize which object types the path_fn should be called on. This
+	 * could also limit the walk to skip blobs if not set.
+	 */
+	int commits;
+	int trees;
+	int blobs;
+
 	/**
 	 * Specify a sparse-checkout definition to match our paths to. Do not
 	 * walk outside of this sparse definition. If the patterns are in
@@ -43,7 +51,9 @@  struct path_walk_info {
 	struct pattern_list *pl;
 };
 
-#define PATH_WALK_INFO_INIT { 0 }
+#define PATH_WALK_INFO_INIT {   \
+	.blobs = 1,		\
+}
 
 /**
  * Given the configuration of 'info', walk the commits based on 'info->revs' and