@@ -1,3 +1,7 @@
+commitGraph.maxNewFilters::
+ Specifies the default value for the `--max-new-filters` option of `git
+ commit-graph write` (c.f., linkgit:git-commit-graph[1]).
+
commitGraph.readChangedPaths::
If true, then git will use the changed-path Bloom filters in the
commit-graph file (if it exists, and they are present). Defaults to
@@ -67,6 +67,10 @@ this option is given, future commit-graph writes will automatically assume
that this option was intended. Use `--no-changed-paths` to stop storing this
data.
+
+With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
+filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
+enforced. Overrides the `commitGraph.maxNewFilters` configuration.
++
With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in
`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
@@ -51,6 +51,21 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
else
start_index = 0;
+ if ((start_index == end_index) &&
+ (g->bloom_large.word_alloc && !bitmap_get(&g->bloom_large, lex_pos))) {
+ /*
+ * If the filter is zero-length, either (1) the filter has no
+ * changes, (2) the filter has too many changes, or (3) it
+ * wasn't computed (eg., due to '--max-new-filters').
+ *
+ * If either (1) or (2) is the case, the 'large' bit will be set
+ * for this Bloom filter. If it is unset, then it wasn't
+ * computed. In that case, return nothing, since we don't have
+ * that filter in the graph.
+ */
+ return 0;
+ }
+
filter->len = end_index - start_index;
filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index +
@@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
- "[--changed-paths] [--[no-]progress] <split options>"),
+ "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+ "<split options>"),
NULL
};
@@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = {
static const char * const builtin_commit_graph_write_usage[] = {
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
- "[--changed-paths] [--[no-]progress] <split options>"),
+ "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+ "<split options>"),
NULL
};
@@ -162,6 +164,23 @@ static int read_one_commit(struct oidset *commits, struct progress *progress,
return 0;
}
+static int write_option_max_new_filters(const struct option *opt,
+ const char *arg,
+ int unset)
+{
+ int *to = opt->value;
+ if (unset)
+ *to = -1;
+ else {
+ const char *s;
+ *to = strtol(arg, (char **)&s, 10);
+ if (*s)
+ return error(_("%s expects a numerical value"),
+ optname(opt, opt->flags));
+ }
+ return 0;
+}
+
static int graph_write(int argc, const char **argv)
{
struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@@ -197,6 +216,9 @@ static int graph_write(int argc, const char **argv)
N_("maximum ratio between two levels of a split commit-graph")),
OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
N_("only expire files older than a given date-time")),
+ OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
+ NULL, N_("maximum number of changed-path Bloom filters to compute"),
+ 0, write_option_max_new_filters),
OPT_END(),
};
@@ -205,6 +227,7 @@ static int graph_write(int argc, const char **argv)
write_opts.size_multiple = 2;
write_opts.max_commits = 0;
write_opts.expire_time = 0;
+ write_opts.max_new_filters = -1;
trace2_cmd_mode("write");
@@ -270,6 +293,16 @@ static int graph_write(int argc, const char **argv)
return result;
}
+static int git_commit_graph_config(const char *var, const char *value, void *cb)
+{
+ if (!strcmp(var, "commitgraph.maxnewfilters")) {
+ write_opts.max_new_filters = git_config_int(var, value);
+ return 0;
+ }
+
+ return git_default_config(var, value, cb);
+}
+
int cmd_commit_graph(int argc, const char **argv, const char *prefix)
{
static struct option builtin_commit_graph_options[] = {
@@ -283,7 +316,7 @@ int cmd_commit_graph(int argc, const char **argv, const char *prefix)
usage_with_options(builtin_commit_graph_usage,
builtin_commit_graph_options);
- git_config(git_default_config, NULL);
+ git_config(git_commit_graph_config, &opts);
argc = parse_options(argc, argv, prefix,
builtin_commit_graph_options,
builtin_commit_graph_usage,
@@ -948,7 +948,8 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
}
static int get_bloom_filter_large_in_graph(struct commit_graph *g,
- const struct commit *c)
+ const struct commit *c,
+ uint32_t max_changed_paths)
{
uint32_t graph_pos = commit_graph_position(c);
if (graph_pos == COMMIT_NOT_FROM_GRAPH)
@@ -1475,6 +1476,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
int i;
struct progress *progress = NULL;
int *sorted_commits;
+ int max_new_filters;
init_bloom_filters();
ctx->bloom_large = bitmap_word_alloc(ctx->commits.nr / BITS_IN_EWORD + 1);
@@ -1491,10 +1493,15 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->order_by_pack ? commit_pos_cmp : commit_gen_cmp,
&ctx->commits);
+ max_new_filters = ctx->opts->max_new_filters >= 0 ?
+ ctx->opts->max_new_filters : ctx->commits.nr;
+
for (i = 0; i < ctx->commits.nr; i++) {
int pos = sorted_commits[i];
struct commit *c = ctx->commits.list[pos];
- if (get_bloom_filter_large_in_graph(ctx->r->objects->commit_graph, c)) {
+ if (get_bloom_filter_large_in_graph(ctx->r->objects->commit_graph,
+ c,
+ ctx->bloom_settings->max_changed_paths)) {
bitmap_set(ctx->bloom_large, pos);
ctx->count_bloom_filter_known_large++;
} else {
@@ -1502,7 +1509,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
struct bloom_filter *filter = get_or_compute_bloom_filter(
ctx->r,
c,
- 1,
+ ctx->count_bloom_filter_computed < max_new_filters,
ctx->bloom_settings,
&computed);
if (computed) {
@@ -1512,7 +1519,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->count_bloom_filter_found_large++;
}
}
- ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
+ if (filter)
+ ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
}
display_progress(progress, i + 1);
}
@@ -115,6 +115,7 @@ struct commit_graph_opts {
int max_commits;
timestamp_t expire_time;
enum commit_graph_split_flags flags;
+ int max_new_filters;
};
/*
@@ -286,4 +286,23 @@ test_expect_success 'Bloom generation does not recompute too-large filters' '
)
'
+test_expect_success 'Bloom generation is limited by --max-new-filters' '
+ (
+ cd limits &&
+ test_commit c2 filter &&
+ test_commit c3 filter &&
+ test_commit c4 no-filter &&
+ test_bloom_filters_computed "--reachable --changed-paths --split=replace --max-new-filters=2" \
+ 2 0 2
+ )
+'
+
+test_expect_success 'Bloom generation backfills previously-skipped filters' '
+ (
+ cd limits &&
+ test_bloom_filters_computed "--reachable --changed-paths --split=replace --max-new-filters=1" \
+ 2 0 1
+ )
+'
+
test_done
Introduce a command-line flag and configuration variable to fill in the 'max_new_filters' variable introduced by the previous patch. The command-line option '--max-new-filters' takes precedence over 'commitGraph.maxNewFilters', which is the default value. '--no-max-new-filters' can also be provided, which sets the value back to '-1', indicating that an unlimited number of new Bloom filters may be generated. (OPT_INTEGER only allows setting the '--no-' variant back to '0', hence a custom callback was used instead). Signed-off-by: Taylor Blau <me@ttaylorr.com> --- Documentation/config/commitgraph.txt | 4 +++ Documentation/git-commit-graph.txt | 4 +++ bloom.c | 15 +++++++++++ builtin/commit-graph.c | 39 +++++++++++++++++++++++++--- commit-graph.c | 16 +++++++++--- commit-graph.h | 1 + t/t4216-log-bloom.sh | 19 ++++++++++++++ 7 files changed, 91 insertions(+), 7 deletions(-)