diff mbox series

[v3,4/7] path-walk: allow consumer to specify object types

Message ID 42e71e6285f6a6976aee4ce04f1285bcb0305223.1733514359.git.gitgitgadget@gmail.com (mailing list archive)
State New
Headers show
Series PATH WALK I: The path-walk API | expand

Commit Message

Derrick Stolee Dec. 6, 2024, 7:45 p.m. UTC
From: Derrick Stolee <derrickstolee@github.com>

We add the ability to filter the object types in the path-walk API so
the callback function is called fewer times.

This adds the ability to ask for the commits in a list, as well. We
re-use the empty string for this set of objects because these are passed
directly to the callback function instead of being part of the
'path_stack'.

Future changes will add the ability to visit annotated tags.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 Documentation/technical/api-path-walk.txt |   9 ++
 path-walk.c                               |  33 ++++-
 path-walk.h                               |  14 +-
 t/helper/test-path-walk.c                 |  15 ++-
 t/t6601-path-walk.sh                      | 149 +++++++++++++++-------
 5 files changed, 170 insertions(+), 50 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/technical/api-path-walk.txt b/Documentation/technical/api-path-walk.txt
index 662162ec70b..dce553b6114 100644
--- a/Documentation/technical/api-path-walk.txt
+++ b/Documentation/technical/api-path-walk.txt
@@ -39,6 +39,15 @@  It is also important that you do not specify the `--objects` flag for the
 the objects will be walked in a separate way based on those starting
 commits.
 
+`commits`, `blobs`, `trees`::
+	By default, these members are enabled and signal that the path-walk
+	API should call the `path_fn` on objects of these types. Specialized
+	applications could disable some options to make it simpler to walk
+	the objects or to have fewer calls to `path_fn`.
++
+While it is possible to walk only commits in this way, consumers would be
+better off using the revision walk API instead.
+
 Examples
 --------
 
diff --git a/path-walk.c b/path-walk.c
index 24cf04c1e7d..2ca08402367 100644
--- a/path-walk.c
+++ b/path-walk.c
@@ -98,6 +98,10 @@  static int add_children(struct path_walk_context *ctx,
 		if (S_ISGITLINK(entry.mode))
 			continue;
 
+		/* If the caller doesn't want blobs, then don't bother. */
+		if (!ctx->info->blobs && type == OBJ_BLOB)
+			continue;
+
 		if (type == OBJ_TREE) {
 			struct tree *child = lookup_tree(ctx->repo, &entry.oid);
 			o = child ? &child->object : NULL;
@@ -157,9 +161,11 @@  static int walk_path(struct path_walk_context *ctx,
 	if (!list->oids.nr)
 		return 0;
 
-	/* Evaluate function pointer on this data. */
-	ret = ctx->info->path_fn(path, &list->oids, list->type,
-				 ctx->info->path_fn_data);
+	/* Evaluate function pointer on this data, if requested. */
+	if ((list->type == OBJ_TREE && ctx->info->trees) ||
+	    (list->type == OBJ_BLOB && ctx->info->blobs))
+		ret = ctx->info->path_fn(path, &list->oids, list->type,
+					ctx->info->path_fn_data);
 
 	/* Expand data for children. */
 	if (list->type == OBJ_TREE) {
@@ -201,6 +207,7 @@  int walk_objects_by_path(struct path_walk_info *info)
 	size_t commits_nr = 0, paths_nr = 0;
 	struct commit *c;
 	struct type_and_oid_list *root_tree_list;
+	struct type_and_oid_list *commit_list;
 	struct path_walk_context ctx = {
 		.repo = info->revs->repo,
 		.revs = info->revs,
@@ -212,6 +219,9 @@  int walk_objects_by_path(struct path_walk_info *info)
 
 	trace2_region_enter("path-walk", "commit-walk", info->revs->repo);
 
+	CALLOC_ARRAY(commit_list, 1);
+	commit_list->type = OBJ_COMMIT;
+
 	/* Insert a single list for the root tree into the paths. */
 	CALLOC_ARRAY(root_tree_list, 1);
 	root_tree_list->type = OBJ_TREE;
@@ -222,10 +232,18 @@  int walk_objects_by_path(struct path_walk_info *info)
 		die(_("failed to setup revision walk"));
 
 	while ((c = get_revision(info->revs))) {
-		struct object_id *oid = get_commit_tree_oid(c);
+		struct object_id *oid;
 		struct tree *t;
 		commits_nr++;
 
+		if (info->commits)
+			oid_array_append(&commit_list->oids,
+					 &c->object.oid);
+
+		/* If we only care about commits, then skip trees. */
+		if (!info->trees && !info->blobs)
+			continue;
+
 		oid = get_commit_tree_oid(c);
 		t = lookup_tree(info->revs->repo, oid);
 
@@ -243,6 +261,13 @@  int walk_objects_by_path(struct path_walk_info *info)
 	trace2_data_intmax("path-walk", ctx.repo, "commits", commits_nr);
 	trace2_region_leave("path-walk", "commit-walk", info->revs->repo);
 
+	/* Track all commits. */
+	if (info->commits && commit_list->oids.nr)
+		ret = info->path_fn("", &commit_list->oids, OBJ_COMMIT,
+				    info->path_fn_data);
+	oid_array_clear(&commit_list->oids);
+	free(commit_list);
+
 	trace2_region_enter("path-walk", "path-walk", info->revs->repo);
 	while (!ret && ctx.path_stack.nr) {
 		char *path = ctx.path_stack.items[ctx.path_stack.nr - 1].string;
diff --git a/path-walk.h b/path-walk.h
index c9e94a98bc8..2d2afc29b47 100644
--- a/path-walk.h
+++ b/path-walk.h
@@ -30,9 +30,21 @@  struct path_walk_info {
 	 */
 	path_fn path_fn;
 	void *path_fn_data;
+
+	/**
+	 * Initialize which object types the path_fn should be called on. This
+	 * could also limit the walk to skip blobs if not set.
+	 */
+	int commits;
+	int trees;
+	int blobs;
 };
 
-#define PATH_WALK_INFO_INIT { 0 }
+#define PATH_WALK_INFO_INIT {   \
+	.blobs = 1,		\
+	.trees = 1,		\
+	.commits = 1,		\
+}
 
 /**
  * Given the configuration of 'info', walk the commits based on 'info->revs' and
diff --git a/t/helper/test-path-walk.c b/t/helper/test-path-walk.c
index def7c81ac4f..a57a05a6391 100644
--- a/t/helper/test-path-walk.c
+++ b/t/helper/test-path-walk.c
@@ -19,6 +19,8 @@  static const char * const path_walk_usage[] = {
 
 struct path_walk_test_data {
 	uintmax_t batch_nr;
+
+	uintmax_t commit_nr;
 	uintmax_t tree_nr;
 	uintmax_t blob_nr;
 };
@@ -33,6 +35,8 @@  static int emit_block(const char *path, struct oid_array *oids,
 		tdata->tree_nr += oids->nr;
 	else if (type == OBJ_BLOB)
 		tdata->blob_nr += oids->nr;
+	else if (type == OBJ_COMMIT)
+		tdata->commit_nr += oids->nr;
 	else
 		BUG("we do not understand this type");
 
@@ -54,6 +58,12 @@  int cmd__path_walk(int argc, const char **argv)
 	struct path_walk_info info = PATH_WALK_INFO_INIT;
 	struct path_walk_test_data data = { 0 };
 	struct option options[] = {
+		OPT_BOOL(0, "blobs", &info.blobs,
+			 N_("toggle inclusion of blob objects")),
+		OPT_BOOL(0, "commits", &info.commits,
+			 N_("toggle inclusion of commit objects")),
+		OPT_BOOL(0, "trees", &info.trees,
+			 N_("toggle inclusion of tree objects")),
 		OPT_END(),
 	};
 
@@ -75,9 +85,10 @@  int cmd__path_walk(int argc, const char **argv)
 
 	res = walk_objects_by_path(&info);
 
-	printf("trees:%" PRIuMAX "\n"
+	printf("commits:%" PRIuMAX "\n"
+	       "trees:%" PRIuMAX "\n"
 	       "blobs:%" PRIuMAX "\n",
-	       data.tree_nr, data.blob_nr);
+	       data.commit_nr, data.tree_nr, data.blob_nr);
 
 	release_revisions(&revs);
 	return res;
diff --git a/t/t6601-path-walk.sh b/t/t6601-path-walk.sh
index 4e052c09309..4a4939a1b02 100755
--- a/t/t6601-path-walk.sh
+++ b/t/t6601-path-walk.sh
@@ -33,22 +33,27 @@  test_expect_success 'all' '
 	test-tool path-walk -- --all >out &&
 
 	cat >expect <<-EOF &&
-	0:tree::$(git rev-parse topic^{tree})
-	0:tree::$(git rev-parse base^{tree})
-	0:tree::$(git rev-parse base~1^{tree})
-	0:tree::$(git rev-parse base~2^{tree})
-	1:tree:right/:$(git rev-parse topic:right)
-	1:tree:right/:$(git rev-parse base~1:right)
-	1:tree:right/:$(git rev-parse base~2:right)
-	2:blob:right/d:$(git rev-parse base~1:right/d)
-	3:blob:right/c:$(git rev-parse base~2:right/c)
-	3:blob:right/c:$(git rev-parse topic:right/c)
-	4:tree:left/:$(git rev-parse base:left)
-	4:tree:left/:$(git rev-parse base~2:left)
-	5:blob:left/b:$(git rev-parse base~2:left/b)
-	5:blob:left/b:$(git rev-parse base:left/b)
-	6:blob:a:$(git rev-parse base~2:a)
+	0:commit::$(git rev-parse topic)
+	0:commit::$(git rev-parse base)
+	0:commit::$(git rev-parse base~1)
+	0:commit::$(git rev-parse base~2)
+	1:tree::$(git rev-parse topic^{tree})
+	1:tree::$(git rev-parse base^{tree})
+	1:tree::$(git rev-parse base~1^{tree})
+	1:tree::$(git rev-parse base~2^{tree})
+	2:tree:right/:$(git rev-parse topic:right)
+	2:tree:right/:$(git rev-parse base~1:right)
+	2:tree:right/:$(git rev-parse base~2:right)
+	3:blob:right/d:$(git rev-parse base~1:right/d)
+	4:blob:right/c:$(git rev-parse base~2:right/c)
+	4:blob:right/c:$(git rev-parse topic:right/c)
+	5:tree:left/:$(git rev-parse base:left)
+	5:tree:left/:$(git rev-parse base~2:left)
+	6:blob:left/b:$(git rev-parse base~2:left/b)
+	6:blob:left/b:$(git rev-parse base:left/b)
+	7:blob:a:$(git rev-parse base~2:a)
 	blobs:6
+	commits:4
 	trees:9
 	EOF
 
@@ -59,19 +64,23 @@  test_expect_success 'topic only' '
 	test-tool path-walk -- topic >out &&
 
 	cat >expect <<-EOF &&
-	0:tree::$(git rev-parse topic^{tree})
-	0:tree::$(git rev-parse base~1^{tree})
-	0:tree::$(git rev-parse base~2^{tree})
-	1:tree:right/:$(git rev-parse topic:right)
-	1:tree:right/:$(git rev-parse base~1:right)
-	1:tree:right/:$(git rev-parse base~2:right)
-	2:blob:right/d:$(git rev-parse base~1:right/d)
-	3:blob:right/c:$(git rev-parse base~2:right/c)
-	3:blob:right/c:$(git rev-parse topic:right/c)
-	4:tree:left/:$(git rev-parse base~2:left)
-	5:blob:left/b:$(git rev-parse base~2:left/b)
-	6:blob:a:$(git rev-parse base~2:a)
+	0:commit::$(git rev-parse topic)
+	0:commit::$(git rev-parse base~1)
+	0:commit::$(git rev-parse base~2)
+	1:tree::$(git rev-parse topic^{tree})
+	1:tree::$(git rev-parse base~1^{tree})
+	1:tree::$(git rev-parse base~2^{tree})
+	2:tree:right/:$(git rev-parse topic:right)
+	2:tree:right/:$(git rev-parse base~1:right)
+	2:tree:right/:$(git rev-parse base~2:right)
+	3:blob:right/d:$(git rev-parse base~1:right/d)
+	4:blob:right/c:$(git rev-parse base~2:right/c)
+	4:blob:right/c:$(git rev-parse topic:right/c)
+	5:tree:left/:$(git rev-parse base~2:left)
+	6:blob:left/b:$(git rev-parse base~2:left/b)
+	7:blob:a:$(git rev-parse base~2:a)
 	blobs:5
+	commits:3
 	trees:7
 	EOF
 
@@ -82,15 +91,66 @@  test_expect_success 'topic, not base' '
 	test-tool path-walk -- topic --not base >out &&
 
 	cat >expect <<-EOF &&
+	0:commit::$(git rev-parse topic)
+	1:tree::$(git rev-parse topic^{tree})
+	2:tree:right/:$(git rev-parse topic:right)
+	3:blob:right/d:$(git rev-parse topic:right/d)
+	4:blob:right/c:$(git rev-parse topic:right/c)
+	5:tree:left/:$(git rev-parse topic:left)
+	6:blob:left/b:$(git rev-parse topic:left/b)
+	7:blob:a:$(git rev-parse topic:a)
+	blobs:4
+	commits:1
+	trees:3
+	EOF
+
+	test_cmp_sorted expect out
+'
+
+test_expect_success 'topic, not base, only blobs' '
+	test-tool path-walk --no-trees --no-commits \
+		-- topic --not base >out &&
+
+	cat >expect <<-EOF &&
+	commits:0
+	trees:0
+	0:blob:right/d:$(git rev-parse topic:right/d)
+	1:blob:right/c:$(git rev-parse topic:right/c)
+	2:blob:left/b:$(git rev-parse topic:left/b)
+	3:blob:a:$(git rev-parse topic:a)
+	blobs:4
+	EOF
+
+	test_cmp_sorted expect out
+'
+
+# No, this doesn't make a lot of sense for the path-walk API,
+# but it is possible to do.
+test_expect_success 'topic, not base, only commits' '
+	test-tool path-walk --no-blobs --no-trees \
+		-- topic --not base >out &&
+
+	cat >expect <<-EOF &&
+	0:commit::$(git rev-parse topic)
+	commits:1
+	trees:0
+	blobs:0
+	EOF
+
+	test_cmp_sorted expect out
+'
+
+test_expect_success 'topic, not base, only trees' '
+	test-tool path-walk --no-blobs --no-commits \
+		-- topic --not base >out &&
+
+	cat >expect <<-EOF &&
+	commits:0
 	0:tree::$(git rev-parse topic^{tree})
 	1:tree:right/:$(git rev-parse topic:right)
-	2:blob:right/d:$(git rev-parse topic:right/d)
-	3:blob:right/c:$(git rev-parse topic:right/c)
-	4:tree:left/:$(git rev-parse topic:left)
-	5:blob:left/b:$(git rev-parse topic:left/b)
-	6:blob:a:$(git rev-parse topic:a)
-	blobs:4
+	2:tree:left/:$(git rev-parse topic:left)
 	trees:3
+	blobs:0
 	EOF
 
 	test_cmp_sorted expect out
@@ -100,17 +160,20 @@  test_expect_success 'topic, not base, boundary' '
 	test-tool path-walk -- --boundary topic --not base >out &&
 
 	cat >expect <<-EOF &&
-	0:tree::$(git rev-parse topic^{tree})
-	0:tree::$(git rev-parse base~1^{tree})
-	1:tree:right/:$(git rev-parse topic:right)
-	1:tree:right/:$(git rev-parse base~1:right)
-	2:blob:right/d:$(git rev-parse base~1:right/d)
-	3:blob:right/c:$(git rev-parse base~1:right/c)
-	3:blob:right/c:$(git rev-parse topic:right/c)
-	4:tree:left/:$(git rev-parse base~1:left)
-	5:blob:left/b:$(git rev-parse base~1:left/b)
-	6:blob:a:$(git rev-parse base~1:a)
+	0:commit::$(git rev-parse topic)
+	0:commit::$(git rev-parse base~1)
+	1:tree::$(git rev-parse topic^{tree})
+	1:tree::$(git rev-parse base~1^{tree})
+	2:tree:right/:$(git rev-parse topic:right)
+	2:tree:right/:$(git rev-parse base~1:right)
+	3:blob:right/d:$(git rev-parse base~1:right/d)
+	4:blob:right/c:$(git rev-parse base~1:right/c)
+	4:blob:right/c:$(git rev-parse topic:right/c)
+	5:tree:left/:$(git rev-parse base~1:left)
+	6:blob:left/b:$(git rev-parse base~1:left/b)
+	7:blob:a:$(git rev-parse base~1:a)
 	blobs:5
+	commits:2
 	trees:5
 	EOF