@@ -19,6 +19,10 @@
#include "tree-walk.h"
#include "pseudo-merge.h"
#include "oid-array.h"
+#include "config.h"
+#include "alloc.h"
+#include "refs.h"
+#include "strmap.h"
struct bitmapped_commit {
struct commit *commit;
@@ -443,6 +447,7 @@ static int fill_bitmap_tree(struct bitmap *bitmap,
}
static int reused_bitmaps_nr;
+static int reused_pseudo_merge_bitmaps_nr;
static int fill_bitmap_commit(struct bb_commit *ent,
struct commit *commit,
@@ -467,7 +472,7 @@ static int fill_bitmap_commit(struct bb_commit *ent,
struct bitmap *remapped = bitmap_new();
if (commit->object.flags & BITMAP_PSEUDO_MERGE)
- old = NULL;
+ old = pseudo_merge_bitmap_for_commit(old_bitmap, c);
else
old = bitmap_for_commit(old_bitmap, c);
/*
@@ -478,7 +483,10 @@ static int fill_bitmap_commit(struct bb_commit *ent,
if (old && !rebuild_bitmap(mapping, old, remapped)) {
bitmap_or(ent->bitmap, remapped);
bitmap_free(remapped);
- reused_bitmaps_nr++;
+ if (commit->object.flags & BITMAP_PSEUDO_MERGE)
+ reused_pseudo_merge_bitmaps_nr++;
+ else
+ reused_bitmaps_nr++;
continue;
}
bitmap_free(remapped);
@@ -604,6 +612,9 @@ int bitmap_writer_build(struct packing_data *to_pack)
the_repository);
trace2_data_intmax("pack-bitmap-write", the_repository,
"building_bitmaps_reused", reused_bitmaps_nr);
+ trace2_data_intmax("pack-bitmap-write", the_repository,
+ "building_bitmaps_pseudo_merge_reused",
+ reused_pseudo_merge_bitmaps_nr);
stop_progress(&writer.progress);
@@ -1316,6 +1316,37 @@ static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git,
return cb.base;
}
+struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
+ struct commit *commit)
+{
+ struct commit_list *p;
+ struct bitmap *parents;
+ struct pseudo_merge *match = NULL;
+
+ if (!bitmap_git->pseudo_merges.nr)
+ return NULL;
+
+ parents = bitmap_new();
+
+ for (p = commit->parents; p; p = p->next) {
+ int pos = bitmap_position(bitmap_git, &p->item->object.oid);
+ if (pos < 0 || pos >= bitmap_num_objects(bitmap_git))
+ goto done;
+
+ bitmap_set(parents, pos);
+ }
+
+ match = pseudo_merge_for_parents(&bitmap_git->pseudo_merges,
+ parents);
+
+done:
+ bitmap_free(parents);
+ if (match)
+ return pseudo_merge_bitmap(&bitmap_git->pseudo_merges, match);
+
+ return NULL;
+}
+
static void unsatisfy_all_pseudo_merges(struct bitmap_index *bitmap_git)
{
uint32_t i;
@@ -2808,6 +2839,7 @@ void free_bitmap_index(struct bitmap_index *b)
*/
close_midx_revindex(b->midx);
}
+ free_pseudo_merge_map(&b->pseudo_merges);
free(b);
}
@@ -119,6 +119,8 @@ int rebuild_bitmap(const uint32_t *reposition,
struct bitmap *dest);
struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git,
struct commit *commit);
+struct ewah_bitmap *pseudo_merge_bitmap_for_commit(struct bitmap_index *bitmap_git,
+ struct commit *commit);
void bitmap_writer_select_commits(struct commit **indexed_commits,
unsigned int indexed_commits_nr);
int bitmap_writer_build(struct packing_data *to_pack);
@@ -682,3 +682,58 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
return ret;
}
+
+struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
+ struct bitmap *parents)
+{
+ struct pseudo_merge *match = NULL;
+ size_t i;
+
+ if (!pm->nr)
+ return NULL;
+
+ /*
+ * NOTE: this loop is quadratic in the worst-case (where no
+ * matching pseudo-merge bitmaps are found), but in practice
+ * this is OK for a few reasons:
+ *
+ * - Rejecting pseudo-merge bitmaps that do not match the
+ * given commit is done quickly (i.e. `bitmap_equals_ewah()`
+ * returns early when we know the two bitmaps aren't equal.
+ *
+ * - Already matched pseudo-merge bitmaps (which we track with
+ * the `->satisfied` bit here) are skipped as potential
+ * candidates.
+ *
+ * - The number of pseudo-merges should be small (in the
+ * hundreds for most repositories).
+ *
+ * If in the future this semi-quadratic behavior does become a
+ * problem, another approach would be to keep track of which
+ * pseudo-merges are still "viable" after enumerating the
+ * pseudo-merge commit's parents:
+ *
+ * - A pseudo-merge bitmap becomes non-viable when the bit(s)
+ * corresponding to one or more parent(s) of the given
+ * commit are not set in a candidate pseudo-merge's commits
+ * bitmap.
+ *
+ * - After processing all bits, enumerate the remaining set of
+ * viable pseudo-merge bitmaps, and check that their
+ * popcount() matches the number of parents in the given
+ * commit.
+ */
+ for (i = 0; i < pm->nr; i++) {
+ struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]);
+ if (!candidate || candidate->satisfied)
+ continue;
+ if (!bitmap_equals_ewah(parents, candidate->commits))
+ continue;
+
+ match = candidate;
+ match->satisfied = 1;
+ break;
+ }
+
+ return match;
+}
@@ -208,4 +208,11 @@ int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
struct bitmap *result,
struct bitmap *roots);
+/*
+ * Returns a pseudo-merge which contains the exact set of commits
+ * listed in the "parents" bitamp, or NULL if none could be found.
+ */
+struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
+ struct bitmap *parents);
+
#endif
@@ -22,6 +22,10 @@ test_pseudo_merges_cascades () {
test_trace2_data bitmap pseudo_merges_cascades "$1"
}
+test_pseudo_merges_reused () {
+ test_trace2_data pack-bitmap-write building_bitmaps_pseudo_merge_reused "$1"
+}
+
tag_everything () {
git rev-list --all --no-object-names >in &&
perl -lne '
@@ -322,4 +326,64 @@ test_expect_success 'pseudo-merge overlap stale traversal' '
)
'
+test_expect_success 'pseudo-merge reuse' '
+ git init pseudo-merge-reuse &&
+ (
+ cd pseudo-merge-reuse &&
+
+ stable="1641013200" && # 2022-01-01
+ unstable="1672549200" && # 2023-01-01
+
+ for date in $stable $unstable
+ do
+ test_commit_bulk --date "$date +0000" 128 &&
+ test_tick || return 1
+ done &&
+
+ tag_everything &&
+
+ git \
+ -c bitmapPseudoMerge.test.pattern="refs/tags/" \
+ -c bitmapPseudoMerge.test.maxMerges=1 \
+ -c bitmapPseudoMerge.test.threshold=now \
+ -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \
+ -c bitmapPseudoMerge.test.stableSize=512 \
+ repack -adb &&
+
+ test_pseudo_merges >merges &&
+ test_line_count = 2 merges &&
+
+ test_pseudo_merge_commits 0 >stable-oids.before &&
+ test_pseudo_merge_commits 1 >unstable-oids.before &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT=$PWD/trace2.txt git \
+ -c bitmapPseudoMerge.test.pattern="refs/tags/" \
+ -c bitmapPseudoMerge.test.maxMerges=2 \
+ -c bitmapPseudoMerge.test.threshold=now \
+ -c bitmapPseudoMerge.test.stableThreshold=$(($unstable - 1)) \
+ -c bitmapPseudoMerge.test.stableSize=512 \
+ repack -adb &&
+
+ test_pseudo_merges_reused 1 <trace2.txt &&
+
+ test_pseudo_merges >merges &&
+ test_line_count = 3 merges &&
+
+ test_pseudo_merge_commits 0 >stable-oids.after &&
+ for i in 1 2
+ do
+ test_pseudo_merge_commits $i || return 1
+ done >unstable-oids.after &&
+
+ sort -u <stable-oids.before >expect &&
+ sort -u <stable-oids.after >actual &&
+ test_cmp expect actual &&
+
+ sort -u <unstable-oids.before >expect &&
+ sort -u <unstable-oids.after >actual &&
+ test_cmp expect actual
+ )
+'
+
test_done
This patch implements support for reusing existing pseudo-merge commits when writing bitmaps when there is an existing pseudo-merge bitmap which has exactly the same set of parents as one that we are about to write. Note that unstable pseudo-merges are likely to change between consecutive repacks, and so are generally poor candidates for reuse. However, stable pseudo-merges (see the configuration option 'bitmapPseudoMerge.<name>.stableThreshold') are by definition unlikely to change between runs (as they represent long-running branches). Because there is no index from a *set* of pseudo-merge parents to a matching pseudo-merge bitmap, we have to construct the bitmap corresponding to the set of parents for each pending pseudo-merge commit and see if a matching bitmap exists. This is technically quadratic in the number of pseudo-merges, but is OK in practice for a couple of reasons: - non-matching pseudo-merge bitmaps are rejected quickly as soon as they differ in a single bit - already-matched pseudo-merge bitmaps are discarded from subsequent rounds of search - the number of pseudo-merges is generally small, even for large repositories In order to do this, implement (a) a function that finds a matching pseudo-merge given some uncompressed bitset describing its parents, (b) a function that computes the bitset of parents for a given pseudo-merge commit, and (c) call that function before computing the set of reachable objects for some pending pseudo-merge. Signed-off-by: Taylor Blau <me@ttaylorr.com> --- pack-bitmap-write.c | 15 ++++++-- pack-bitmap.c | 32 +++++++++++++++++ pack-bitmap.h | 2 ++ pseudo-merge.c | 55 ++++++++++++++++++++++++++++ pseudo-merge.h | 7 ++++ t/t5333-pseudo-merge-bitmaps.sh | 64 +++++++++++++++++++++++++++++++++ 6 files changed, 173 insertions(+), 2 deletions(-)