@@ -15,7 +15,8 @@ SYNOPSIS
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
[--cruft] [--cruft-expiration=<time>]
[--stdout [--filter=<filter-spec>] | <base-name>]
- [--shallow] [--keep-true-parents] [--[no-]sparse] < <object-list>
+ [--shallow] [--keep-true-parents] [--[no-]sparse]
+ [--path-walk] < <object-list>
DESCRIPTION
@@ -345,6 +346,16 @@ raise an error.
Restrict delta matches based on "islands". See DELTA ISLANDS
below.
+--path-walk::
+ By default, `git pack-objects` walks objects in an order that
+ presents trees and blobs in an order unrelated to the path they
+ appear relative to a commit's root tree. The `--path-walk` option
+ enables a different walking algorithm that organizes trees and
+ blobs by path. This has the potential to improve delta compression
+ especially in the presence of filenames that cause collisions in
+ Git's default name-hash algorithm. Due to changing how the objects
+ are walked, this option is not compatible with `--delta-islands`,
+ `--shallow`, or `--filter`.
DELTA ISLANDS
-------------
@@ -69,4 +69,5 @@ Examples
--------
See example usages in:
- `t/helper/test-path-walk.c`
+ `t/helper/test-path-walk.c`,
+ `builtin/pack-objects.c`
@@ -39,6 +39,9 @@
#include "promisor-remote.h"
#include "pack-mtimes.h"
#include "parse-options.h"
+#include "blob.h"
+#include "tree.h"
+#include "path-walk.h"
/*
* Objects we are going to pack are collected in the `to_pack` structure.
@@ -215,6 +218,7 @@ static int delta_search_threads;
static int pack_to_stdout;
static int sparse;
static int thin;
+static int path_walk;
static int num_preferred_base;
static struct progress *progress_state;
@@ -4143,6 +4147,105 @@ static void mark_bitmap_preferred_tips(void)
}
}
+static inline int is_oid_interesting(struct repository *repo,
+ struct object_id *oid)
+{
+ struct object *o = lookup_object(repo, oid);
+ return o && !(o->flags & UNINTERESTING);
+}
+
+static int add_objects_by_path(const char *path,
+ struct oid_array *oids,
+ enum object_type type,
+ void *data)
+{
+ struct object_entry **delta_list;
+ size_t oe_start = to_pack.nr_objects;
+ size_t oe_end;
+ unsigned int sub_list_size;
+ unsigned int *processed = data;
+
+ /*
+ * First, add all objects to the packing data, including the ones
+ * marked UNINTERESTING (translated to 'exclude') as they can be
+ * used as delta bases.
+ */
+ for (size_t i = 0; i < oids->nr; i++) {
+ int exclude;
+ struct object_info oi = OBJECT_INFO_INIT;
+ struct object_id *oid = &oids->oid[i];
+
+ /* Skip objects that do not exist locally. */
+ if (exclude_promisor_objects &&
+ oid_object_info_extended(the_repository, oid, &oi,
+ OBJECT_INFO_FOR_PREFETCH) < 0)
+ continue;
+
+ exclude = !is_oid_interesting(the_repository, oid);
+
+ if (exclude && !thin)
+ continue;
+
+ add_object_entry(oid, type, path, exclude);
+ }
+
+ oe_end = to_pack.nr_objects;
+
+ /* We can skip delta calculations if it is a no-op. */
+ if (oe_end == oe_start || !window)
+ return 0;
+
+ sub_list_size = 0;
+ ALLOC_ARRAY(delta_list, oe_end - oe_start);
+
+ for (size_t i = 0; i < oe_end - oe_start; i++) {
+ struct object_entry *entry = to_pack.objects + oe_start + i;
+
+ if (!should_attempt_deltas(entry))
+ continue;
+
+ delta_list[sub_list_size++] = entry;
+ }
+
+ /*
+ * Find delta bases among this list of objects that all match the same
+ * path. This causes the delta compression to be interleaved in the
+ * object walk, which can lead to confusing progress indicators. This is
+ * also incompatible with threaded delta calculations. In the future,
+ * consider creating a list of regions in the full to_pack.objects array
+ * that could be picked up by the threaded delta computation.
+ */
+ if (sub_list_size && window) {
+ QSORT(delta_list, sub_list_size, type_size_sort);
+ find_deltas(delta_list, &sub_list_size, window, depth, processed);
+ }
+
+ free(delta_list);
+ return 0;
+}
+
+static void get_object_list_path_walk(struct rev_info *revs)
+{
+ struct path_walk_info info = PATH_WALK_INFO_INIT;
+ unsigned int processed = 0;
+
+ info.revs = revs;
+ info.path_fn = add_objects_by_path;
+ info.path_fn_data = &processed;
+ revs->tag_objects = 1;
+
+ /*
+ * Allow the --[no-]sparse option to be interesting here, if only
+ * for testing purposes. Paths with no interesting objects will not
+ * contribute to the resulting pack, but only create noisy preferred
+ * base objects.
+ */
+ info.prune_all_uninteresting = sparse;
+
+ if (walk_objects_by_path(&info))
+ die(_("failed to pack objects via path-walk"));
+}
+
static void get_object_list(struct rev_info *revs, int ac, const char **av)
{
struct setup_revision_opt s_r_opt = {
@@ -4189,7 +4292,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
warn_on_object_refname_ambiguity = save_warning;
- if (use_bitmap_index && !get_object_list_from_bitmap(revs))
+ if (use_bitmap_index && !path_walk && !get_object_list_from_bitmap(revs))
return;
if (use_delta_islands)
@@ -4198,15 +4301,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
if (write_bitmap_index)
mark_bitmap_preferred_tips();
- if (prepare_revision_walk(revs))
- die(_("revision walk setup failed"));
- mark_edges_uninteresting(revs, show_edge, sparse);
-
if (!fn_show_object)
fn_show_object = show_object;
- traverse_commit_list(revs,
- show_commit, fn_show_object,
- NULL);
+
+ if (path_walk) {
+ get_object_list_path_walk(revs);
+ } else {
+ if (prepare_revision_walk(revs))
+ die(_("revision walk setup failed"));
+ mark_edges_uninteresting(revs, show_edge, sparse);
+ traverse_commit_list(revs,
+ show_commit, fn_show_object,
+ NULL);
+ }
if (unpack_unreachable_expiration) {
revs->ignore_missing_links = 1;
@@ -4404,6 +4511,8 @@ int cmd_pack_objects(int argc,
N_("use the sparse reachability algorithm")),
OPT_BOOL(0, "thin", &thin,
N_("create thin packs")),
+ OPT_BOOL(0, "path-walk", &path_walk,
+ N_("use the path-walk API to walk objects when possible")),
OPT_BOOL(0, "shallow", &shallow,
N_("create packs suitable for shallow fetches")),
OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
@@ -4484,7 +4593,27 @@ int cmd_pack_objects(int argc,
window = 0;
strvec_push(&rp, "pack-objects");
- if (thin) {
+
+ if (path_walk && filter_options.choice) {
+ warning(_("cannot use --filter with --path-walk"));
+ path_walk = 0;
+ }
+ if (path_walk && use_delta_islands) {
+ warning(_("cannot use delta islands with --path-walk"));
+ path_walk = 0;
+ }
+ if (path_walk && shallow) {
+ warning(_("cannot use --shallow with --path-walk"));
+ path_walk = 0;
+ }
+ if (path_walk) {
+ strvec_push(&rp, "--boundary");
+ /*
+ * We must disable the bitmaps because we are removing
+ * the --objects / --objects-edge[-aggressive] options.
+ */
+ use_bitmap_index = 0;
+ } else if (thin) {
use_internal_rev_list = 1;
strvec_push(&rp, shallow
? "--objects-edge-aggressive"
@@ -674,4 +674,21 @@ do
'
done
+# Basic "repack everything" test
+test_expect_success '--path-walk pack everything' '
+ git -C server rev-parse HEAD >in &&
+ git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
+ git -C server index-pack --stdin <out.pack
+'
+
+# Basic "thin pack" test
+test_expect_success '--path-walk thin pack' '
+ cat >in <<-EOF &&
+ $(git -C server rev-parse HEAD)
+ ^$(git -C server rev-parse HEAD~2)
+ EOF
+ git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
+ git -C server index-pack --fix-thin --stdin <out.pack
+'
+
test_done