Message ID | 9831c23eadbcd990ca09628e5846056e4879ee3d.1678468864.git.gitgitgadget@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | ref-filter: ahead/behind counting, faster --merged option | expand |
On Fri, Mar 10 2023, Derrick Stolee via GitGitGadget wrote: > From: Derrick Stolee <derrickstolee@github.com> > [...] > +void ahead_behind(struct commit **commits, size_t commits_nr, > + struct ahead_behind_count *counts, size_t counts_nr) > +{ > + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; I see we have some of this already in this file, so maybe we should leave it for a subsequent cleanup, but as DEVOPTS=extra-all notes here we're using a positional initializer. We could instead be more explicit, and do: { .compare = compare_commits_by_gen_then_commit_date } But as noted, maybe for asubsequent cleanup... > + size_t width = (commits_nr + BITS_IN_EWORD - 1) / BITS_IN_EWORD; > + size_t i; Nit: Consider dropping this "size_t i" line, and instead... > + > + if (!commits_nr || !counts_nr) > + return; > + > + for (i = 0; i < counts_nr; i++) { ...just make this "for (size_t i = 0; ...", ditto the 2x ones below. > +struct ahead_behind_count { > + /** > + * As input, the *_index members indicate which positions in > + * the 'tips' array correspond to the tip and base of this > + * comparison. > + */ > + size_t tip_index; > + size_t base_index; > + > + /** > + * These values store the computed counts for each side of the > + * symmetric difference: > + * > + * 'ahead' stores the number of commits reachable from the tip > + * and not reachable from the base. > + * > + * 'behind' stores the number of commits reachable from the base > + * and not reachable from the tip. > + */ > + unsigned int ahead; > + unsigned int behind; Even though this is the tip of the iceberg in terms of our codebase overall, can't we just use "size_t" for counts in new APIs? Trying to squash this into the end-state seems to work: diff --git a/commit-reach.h b/commit-reach.h index 2269fab8261..108651213d9 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -123,8 +123,8 @@ struct ahead_behind_count { * 'behind' stores the number of commits reachable from the base * and not reachable from the tip. */ - unsigned int ahead; - unsigned int behind; + size_t ahead; + size_t behind; }; /* diff --git a/ref-filter.c b/ref-filter.c index cdc054beede..b328db696bf 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -2013,7 +2013,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err) if (ref->counts) { const struct ahead_behind_count *count; count = ref->counts[ahead_behind_atoms++]; - v->s = xstrfmt("%d %d", count->ahead, count->behind); + v->s = xstrfmt("%"PRIuMAX" " "%"PRIuMAX, count->ahead, count->behind); } else { /* Not a commit. */ v->s = xstrdup("");
Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes: >> + /** >> + * These values store the computed counts for each side of the >> + * symmetric difference: >> + * >> + * 'ahead' stores the number of commits reachable from the tip >> + * and not reachable from the base. >> + * >> + * 'behind' stores the number of commits reachable from the base >> + * and not reachable from the tip. >> + */ >> + unsigned int ahead; >> + unsigned int behind; > > Even though this is the tip of the iceberg in terms of our codebase > overall, can't we just use "size_t" for counts in new APIs? I personally do not see a point in becoming so dogmatic. Plain (possibly) 32-bit integers have their places in the code.
On 3/15/2023 12:03 PM, Junio C Hamano wrote: > Ævar Arnfjörð Bjarmason <avarab@gmail.com> writes: > >>> + /** >>> + * These values store the computed counts for each side of the >>> + * symmetric difference: >>> + * >>> + * 'ahead' stores the number of commits reachable from the tip >>> + * and not reachable from the base. >>> + * >>> + * 'behind' stores the number of commits reachable from the base >>> + * and not reachable from the tip. >>> + */ >>> + unsigned int ahead; >>> + unsigned int behind; >> >> Even though this is the tip of the iceberg in terms of our codebase >> overall, can't we just use "size_t" for counts in new APIs? > > I personally do not see a point in becoming so dogmatic. Plain > (possibly) 32-bit integers have their places in the code. In particular, we have 32-bit limits on the commit-graph due to it being unreasonable to have billions of commits in a repository. Thanks, -Stolee
diff --git a/commit-reach.c b/commit-reach.c index 2e33c599a82..338ca8084b2 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -8,6 +8,7 @@ #include "revision.h" #include "tag.h" #include "commit-reach.h" +#include "ewah/ewok.h" /* Remember to update object flag allocation in object.h */ #define PARENT1 (1u<<16) @@ -941,3 +942,98 @@ struct commit_list *get_reachable_subset(struct commit **from, int nr_from, return found_commits; } + +define_commit_slab(bit_arrays, struct bitmap *); +static struct bit_arrays bit_arrays; + +static void insert_no_dup(struct prio_queue *queue, struct commit *c) +{ + if (c->object.flags & PARENT2) + return; + prio_queue_put(queue, c); + c->object.flags |= PARENT2; +} + +static struct bitmap *init_bit_array(struct commit *c, int width) +{ + struct bitmap **bitmap = bit_arrays_at(&bit_arrays, c); + if (!*bitmap) + *bitmap = bitmap_word_alloc(width); + return *bitmap; +} + +static void free_bit_array(struct commit *c) +{ + struct bitmap **bitmap = bit_arrays_at(&bit_arrays, c); + if (!*bitmap) + return; + bitmap_free(*bitmap); + *bitmap = NULL; +} + +void ahead_behind(struct commit **commits, size_t commits_nr, + struct ahead_behind_count *counts, size_t counts_nr) +{ + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + size_t width = (commits_nr + BITS_IN_EWORD - 1) / BITS_IN_EWORD; + size_t i; + + if (!commits_nr || !counts_nr) + return; + + for (i = 0; i < counts_nr; i++) { + counts[i].ahead = 0; + counts[i].behind = 0; + } + + ensure_generations_valid(commits, commits_nr); + + init_bit_arrays(&bit_arrays); + + for (i = 0; i < commits_nr; i++) { + struct commit *c = commits[i]; + struct bitmap *bitmap = init_bit_array(c, width); + + bitmap_set(bitmap, i); + insert_no_dup(&queue, c); + } + + while (queue_has_nonstale(&queue)) { + struct commit *c = prio_queue_get(&queue); + struct commit_list *p; + struct bitmap *bitmap_c = init_bit_array(c, width); + + for (i = 0; i < counts_nr; i++) { + int reach_from_tip = !!bitmap_get(bitmap_c, counts[i].tip_index); + int reach_from_base = !!bitmap_get(bitmap_c, counts[i].base_index); + + if (reach_from_tip ^ reach_from_base) { + if (reach_from_base) + counts[i].behind++; + else + counts[i].ahead++; + } + } + + for (p = c->parents; p; p = p->next) { + struct bitmap *bitmap_p; + + parse_commit(p->item); + + bitmap_p = init_bit_array(p->item, width); + bitmap_or(bitmap_p, bitmap_c); + + if (bitmap_popcount(bitmap_p) == commits_nr) + p->item->object.flags |= STALE; + + insert_no_dup(&queue, p->item); + } + + free_bit_array(c); + } + + /* STALE is used here, PARENT2 is used by insert_no_dup(). */ + repo_clear_commit_marks(the_repository, PARENT2 | STALE); + clear_bit_arrays(&bit_arrays); + clear_prio_queue(&queue); +} diff --git a/commit-reach.h b/commit-reach.h index 148b56fea50..f871b5dcce9 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -104,4 +104,34 @@ struct commit_list *get_reachable_subset(struct commit **from, int nr_from, struct commit **to, int nr_to, unsigned int reachable_flag); +struct ahead_behind_count { + /** + * As input, the *_index members indicate which positions in + * the 'tips' array correspond to the tip and base of this + * comparison. + */ + size_t tip_index; + size_t base_index; + + /** + * These values store the computed counts for each side of the + * symmetric difference: + * + * 'ahead' stores the number of commits reachable from the tip + * and not reachable from the base. + * + * 'behind' stores the number of commits reachable from the base + * and not reachable from the tip. + */ + unsigned int ahead; + unsigned int behind; +}; + +/* + * Given an array of commits and an array of ahead_behind_count pairs, + * compute the ahead/behind counts for each pair. + */ +void ahead_behind(struct commit **commits, size_t commits_nr, + struct ahead_behind_count *counts, size_t counts_nr); + #endif