@@ -1297,6 +1297,73 @@ d0 | main | atexit | | 0.111026 | |
In this example, the "test1" timer was started 4 times and ran for
0.000393 seconds.
+Counter Events::
+
+ Trace2 also provides global counters as an alternative to regions
+ and data events. These make it possible to count an activity of
+ interest, such a call to a library routine, during the program
+ and get a single counter event at the end.
++
+At the end of the program, a single summary event is emitted; this
+value is aggregated across all threads. These events have "summary"
+as their thread name.
++
+For some counters, individual (per-thread) counter events are also
+generated. This may be helpful in understanding how work is balanced
+between threads in some circumstances.
++
+----------------
+static void *load_cache_entries_thread(void *_data)
+{
+ struct load_cache_entries_thread_data *p = _data;
+ int i;
+
+ trace2_thread_start("load_cache_entries");
+ ...
+ trace2_thread_exit();
+}
+
+static unsigned long load_cache_entry_block(struct index_state *istate,
+ struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap,
+ unsigned long start_offset, const struct cache_entry *previous_ce)
+{
+ int i;
+ unsigned long src_offset = start_offset;
+
+ trace2_counter_increment(TRACE2_COUNTER_ID_TEST2, nr);
+
+ for (i = offset; i < offset + nr; i++) {
+ ...
+ }
+}
+----------------
++
+----------------
+$ export GIT_TRACE2_PERF_BRIEF=1
+$ export GIT_TRACE2_PERF=~/log.perf
+$ git status
+...
+$ cat ~/log.perf
+...
+d0 | main | exit | | 53.977680 | | | code:0
+d0 | th12:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193220
+d0 | th11:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th10:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th09:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th08:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th07:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th06:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th05:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th04:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th03:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | th02:load_cache_entries | counter | | 53.977708 | | test | name:test2 value:193221
+d0 | summary | counter | | 53.977708 | | test | name:test2 value:2125430
+d0 | main | atexit | | 53.977708 | | | code:0
+----------------
++
+This example shows the value computed by each of the 11
+`load_cache_entries` threads and the total across all threads.
+
== Future Work
=== Relationship to the Existing Trace Api (api-trace.txt)
@@ -1020,6 +1020,7 @@ LIB_OBJS += trace.o
LIB_OBJS += trace2.o
LIB_OBJS += trace2/tr2_cfg.o
LIB_OBJS += trace2/tr2_cmd_name.o
+LIB_OBJS += trace2/tr2_ctr.o
LIB_OBJS += trace2/tr2_dst.o
LIB_OBJS += trace2/tr2_sid.o
LIB_OBJS += trace2/tr2_tmr.o
@@ -270,7 +270,7 @@ static int ut_009timer(int argc, const char **argv)
const char *usage_error =
"expect <count> <ms_delay> <threads>";
- struct ut_009_data data = { 0, 0 };
+ struct ut_009_data data = { 0 };
int nr_threads = 0;
int k;
pthread_t *pids = NULL;
@@ -301,6 +301,90 @@ static int ut_009timer(int argc, const char **argv)
return 0;
}
+/*
+ * Single-threaded counter test. Add several values to the TEST1 counter.
+ * The test script can verify that an aggregate Trace2 "counter" event is
+ * emitted containing the sum of the values provided.
+ */
+static int ut_010counter(int argc, const char **argv)
+{
+ const char *usage_error =
+ "expect <v1> [<v2> [...]]";
+ int value;
+ int k;
+
+ if (argc < 1)
+ die("%s", usage_error);
+
+ for (k = 0; k < argc; k++) {
+ if (get_i(&value, argv[k]))
+ die("invalid value[%s] -- %s",
+ argv[k], usage_error);
+ trace2_counter_add(TRACE2_COUNTER_ID_TEST1, value);
+ }
+
+ return 0;
+}
+
+struct ut_011_data {
+ int v1, v2;
+};
+
+static void *ut_011counter_thread_proc(void *_ut_011_data)
+{
+ struct ut_011_data *data = _ut_011_data;
+
+ trace2_thread_start("ut_011");
+
+ trace2_counter_add(TRACE2_COUNTER_ID_TEST2, data->v1);
+ trace2_counter_add(TRACE2_COUNTER_ID_TEST2, data->v2);
+
+ trace2_thread_exit();
+ return NULL;
+}
+
+/*
+ * Multi-threaded counter test. Create several threads that each
+ * increment the TEST2 global counter. The test script can verify
+ * that an individual Trace2 "counter" event for each thread and an
+ * aggregate "counter" event are generated.
+ */
+static int ut_011counter(int argc, const char **argv)
+{
+ const char *usage_error =
+ "expect <v1> <v2> <threads>";
+
+ struct ut_011_data data = { 0, 0 };
+ int nr_threads = 0;
+ int k;
+ pthread_t *pids = NULL;
+
+ if (argc != 3)
+ die("%s", usage_error);
+ if (get_i(&data.v1, argv[0]))
+ die("%s", usage_error);
+ if (get_i(&data.v2, argv[1]))
+ die("%s", usage_error);
+ if (get_i(&nr_threads, argv[2]))
+ die("%s", usage_error);
+
+ CALLOC_ARRAY(pids, nr_threads);
+
+ for (k = 0; k < nr_threads; k++) {
+ if (pthread_create(&pids[k], NULL, ut_011counter_thread_proc, &data))
+ die("failed to create thread[%d]", k);
+ }
+
+ for (k = 0; k < nr_threads; k++) {
+ if (pthread_join(pids[k], NULL))
+ die("failed to join thread[%d]", k);
+ }
+
+ free(pids);
+
+ return 0;
+}
+
/*
* Usage:
* test-tool trace2 <ut_name_1> <ut_usage_1>
@@ -320,6 +404,8 @@ static struct unit_test ut_table[] = {
{ ut_007bug, "007bug", "" },
{ ut_008timer, "008timer", "<count> <ms_delay>" },
{ ut_009timer, "009timer", "<count> <ms_delay> <threads>" },
+ { ut_010counter, "010counter","<v1> [<v2> [<v3> [...]]]" },
+ { ut_011counter, "011counter","<v1> <v2> <threads>" },
};
/* clang-format on */
@@ -219,4 +219,46 @@ test_expect_success 'test stopwatch timers - summary and threads' '
have_timer_event "summary" "test2" 15 actual
'
+# Exercise the global counter "test" in a loop and confirm that we get an
+# event with the sum.
+#
+
+have_counter_event () {
+ thread=$1
+ name=$2
+ value=$3
+ file=$4
+
+ pattern="d0|${thread}|counter||_T_ABS_||test"
+ pattern="${pattern}|name:${name}"
+ pattern="${pattern} value:${value}"
+
+ grep "${pattern}" ${file}
+
+ return $?
+}
+
+test_expect_success 'test global counters - global, single-thead' '
+ test_when_finished "rm trace.perf actual" &&
+ test_config_global trace2.perfBrief 1 &&
+ test_config_global trace2.perfTarget "$(pwd)/trace.perf" &&
+ test-tool trace2 010counter 2 3 5 7 11 13 &&
+ perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" <trace.perf >actual &&
+
+ have_counter_event "summary" "test1" 41 actual
+'
+
+test_expect_success 'test global counters - global+threads' '
+ test_when_finished "rm trace.perf actual" &&
+ test_config_global trace2.perfBrief 1 &&
+ test_config_global trace2.perfTarget "$(pwd)/trace.perf" &&
+ test-tool trace2 011counter 5 10 3 &&
+ perl "$TEST_DIRECTORY/t0211/scrub_perf.perl" <trace.perf >actual &&
+
+ have_counter_event "th01:ut_011" "test2" 15 actual &&
+ have_counter_event "th02:ut_011" "test2" 15 actual &&
+ have_counter_event "th03:ut_011" "test2" 15 actual &&
+ have_counter_event "summary" "test2" 45 actual
+'
+
test_done
@@ -368,4 +368,45 @@ test_expect_success 'test stopwatch timers - global+threads' '
have_timer_event "summary" "test2" 15 trace.event
'
+# Exercise the global counter in a loop and confirm that we get the
+# expected sum in an event record.
+#
+
+have_counter_event () {
+ thread=$1
+ name=$2
+ value=$3
+ file=$4
+
+ pattern="\"event\":\"counter\""
+ pattern="${pattern}.*\"thread\":\"${thread}\""
+ pattern="${pattern}.*\"name\":\"${name}\""
+ pattern="${pattern}.*\"value\":${value}"
+
+ grep "${pattern}" ${file}
+
+ return $?
+}
+
+test_expect_success 'test global counter - global, single-thread' '
+ test_when_finished "rm trace.event" &&
+ test_config_global trace2.eventBrief 1 &&
+ test_config_global trace2.eventTarget "$(pwd)/trace.event" &&
+ test-tool trace2 010counter 2 3 5 7 11 13 &&
+
+ have_counter_event "summary" "test1" 41 trace.event
+'
+
+test_expect_success 'test global counter - global+threads' '
+ test_when_finished "rm trace.event" &&
+ test_config_global trace2.eventBrief 1 &&
+ test_config_global trace2.eventTarget "$(pwd)/trace.event" &&
+ test-tool trace2 011counter 5 10 3 &&
+
+ have_counter_event "th01:ut_011" "test2" 15 trace.event &&
+ have_counter_event "th02:ut_011" "test2" 15 trace.event &&
+ have_counter_event "th03:ut_011" "test2" 15 trace.event &&
+ have_counter_event "summary" "test2" 45 trace.event
+'
+
test_done
@@ -8,6 +8,7 @@
#include "version.h"
#include "trace2/tr2_cfg.h"
#include "trace2/tr2_cmd_name.h"
+#include "trace2/tr2_ctr.h"
#include "trace2/tr2_dst.h"
#include "trace2/tr2_sid.h"
#include "trace2/tr2_sysenv.h"
@@ -114,6 +115,41 @@ static void tr2main_emit_thread_timers(uint64_t us_elapsed_absolute)
us_elapsed_absolute);
}
+static void tr2main_emit_summary_counters(uint64_t us_elapsed_absolute)
+{
+ struct tr2_tgt *tgt_j;
+ int j;
+ struct tr2_counter_block merged = { { { 0 } } };
+
+ /*
+ * Sum across all of the per-thread counter data into
+ * a single composite block of counter values.
+ */
+ tr2tls_aggregate_counter_blocks(&merged);
+
+ /*
+ * Emit "summary" counter events for each composite counter value
+ * that had activity.
+ */
+ for_each_wanted_builtin (j, tgt_j)
+ if (tgt_j->pfn_counter)
+ tr2_emit_counter_block(tgt_j->pfn_counter,
+ us_elapsed_absolute,
+ &merged, "summary");
+}
+
+static void tr2main_emit_thread_counters(uint64_t us_elapsed_absolute)
+{
+ struct tr2_tgt *tgt_j;
+ int j;
+
+ for_each_wanted_builtin (j, tgt_j)
+ if (tgt_j->pfn_counter)
+ tr2tls_emit_counter_blocks_by_thread(
+ tgt_j->pfn_counter,
+ us_elapsed_absolute);
+}
+
static int tr2main_exit_code;
/*
@@ -144,6 +180,9 @@ static void tr2main_atexit_handler(void)
tr2main_emit_thread_timers(us_elapsed_absolute);
tr2main_emit_summary_timers(us_elapsed_absolute);
+ tr2main_emit_thread_counters(us_elapsed_absolute);
+ tr2main_emit_summary_counters(us_elapsed_absolute);
+
for_each_wanted_builtin (j, tgt_j)
if (tgt_j->pfn_atexit)
tgt_j->pfn_atexit(us_elapsed_absolute,
@@ -897,3 +936,14 @@ void trace2_timer_stop(enum trace2_timer_id tid)
tr2_stop_timer(tid);
}
+
+void trace2_counter_add(enum trace2_counter_id cid, uint64_t value)
+{
+ if (!trace2_enabled)
+ return;
+
+ if (cid < 0 || cid >= TRACE2_NUMBER_OF_COUNTERS)
+ BUG("invalid counter id: %d", cid);
+
+ tr2_counter_increment(cid, value);
+}
@@ -52,6 +52,7 @@ struct json_writer;
* [] trace2_data* -- emit region/thread/repo data messages.
* [] trace2_printf* -- legacy trace[1] messages.
* [] trace2_timer* -- start/stop stopwatch timer (messages are deferred).
+ * [] trace2_counter* -- global counters (messages are deferrred).
*/
/*
@@ -573,4 +574,36 @@ enum trace2_timer_id {
void trace2_timer_start(enum trace2_timer_id tid);
void trace2_timer_stop(enum trace2_timer_id tid);
+/*
+ * Define the set of global counters.
+ *
+ * We can add more at any time, but they must be defined at compile
+ * time (to avoid the need to dynamically allocate and synchronize
+ * them between different threads).
+ *
+ * These must start at 0 and be contiguous (because we them elsewhere
+ * as array indexes).
+ *
+ * Any value added to this enum must also be added to the counter
+ * definitions array. See `trace2/tr2_ctr.c:tr2_counter_def_block[]`.
+ */
+enum trace2_counter_id {
+ /*
+ * Define two counters for testing. See `t/helper/test-trace2.c`.
+ * These can be used for ad hoc testing, but should not be used
+ * for permanent analysis code.
+ */
+ TRACE2_COUNTER_ID_TEST1 = 0, /* emits summary event only */
+ TRACE2_COUNTER_ID_TEST2, /* emits summary and thread events */
+
+
+ /* Add additional counter definitions before here. */
+ TRACE2_NUMBER_OF_COUNTERS
+};
+
+/*
+ * Increment global counter by value.
+ */
+void trace2_counter_add(enum trace2_counter_id cid, uint64_t value);
+
#endif /* TRACE2_H */
new file mode 100644
@@ -0,0 +1,67 @@
+#include "cache.h"
+#include "thread-utils.h"
+#include "trace2/tr2_tls.h"
+#include "trace2/tr2_ctr.h"
+
+/*
+ * Define metadata for each global counter. This list must match the
+ * set defined in "enum trace2_counter_id".
+ */
+struct tr2_counter_def {
+ const char *category;
+ const char *name;
+
+ unsigned int want_thread_events:1;
+};
+
+static struct tr2_counter_def tr2_counter_def_block[TRACE2_NUMBER_OF_COUNTERS] = {
+ [TRACE2_COUNTER_ID_TEST1] = { "test", "test1", 0 },
+ [TRACE2_COUNTER_ID_TEST2] = { "test", "test2", 1 },
+};
+
+void tr2_counter_increment(enum trace2_counter_id cid, uint64_t value)
+{
+ struct tr2tls_thread_ctx *ctx = tr2tls_get_self();
+ struct tr2_counter *c = &ctx->counters.counter[cid];
+
+ c->value += value;
+}
+
+void tr2_merge_counter_block(struct tr2_counter_block *merged,
+ const struct tr2_counter_block *src)
+{
+ enum trace2_counter_id cid;
+
+ for (cid = 0; cid < TRACE2_NUMBER_OF_COUNTERS; cid++) {
+ struct tr2_counter *c_merged = &merged->counter[cid];
+ const struct tr2_counter *c = &src->counter[cid];
+
+ c_merged->is_aggregate = 1;
+
+ c_merged->value += c->value;
+ }
+
+ merged->is_aggregate = 1;
+}
+
+void tr2_emit_counter_block(tr2_tgt_evt_counter_t *pfn,
+ uint64_t us_elapsed_absolute,
+ const struct tr2_counter_block *blk,
+ const char *thread_name)
+{
+ enum trace2_counter_id cid;
+
+ for (cid = 0; cid < TRACE2_NUMBER_OF_COUNTERS; cid++) {
+ const struct tr2_counter *c = &blk->counter[cid];
+ const struct tr2_counter_def *d = &tr2_counter_def_block[cid];
+
+ if (!c->value)
+ continue; /* counter was not used */
+
+ if (!d->want_thread_events && !c->is_aggregate)
+ continue; /* per-thread events not wanted */
+
+ pfn(us_elapsed_absolute, thread_name, d->category, d->name,
+ c->value);
+ }
+}
new file mode 100644
@@ -0,0 +1,79 @@
+#ifndef TR2_CTR_H
+#define TR2_CTR_H
+
+#include "trace2.h"
+#include "trace2/tr2_tgt.h"
+
+/*
+ * Define a mechanism to allow global "counters".
+ *
+ * Counters can be used count interesting activity that does not fit
+ * the "region and data" model, such as code called from many
+ * different regions and/or where you want to count a number of items,
+ * but don't have control of when the last item will be processed,
+ * such as counter the number of calls to `lstat()`.
+ *
+ * Counters differ from Trace2 "data" events. Data events are emitted
+ * immediately and are appropriate for documenting loop counters and
+ * etc. Counter values are accumulated during the program and the final
+ * counter value event is emitted at program exit.
+ *
+ * To make this model efficient, we define a compile-time fixed set
+ * of counters and counter ids. This lets us avoid the complexities
+ * of dynamically allocating a counter and sharing that definition
+ * with other threads.
+ *
+ * We define (at compile time) a set of "counter ids" to access the
+ * various counters inside of a fixed size "counter block".
+ *
+ * A counter defintion table provides the counter category and name
+ * so we can eliminate those arguments from the public counter API.
+ * These are defined in a parallel tabel in `tr2_ctr.c`.
+ *
+ * Each thread has a private block of counters in its thread local
+ * storage data so no locks are required for a thread to increment
+ * it's version of the counter. At program exit, the counter blocks
+ * from all of the per-thread counters are added together to give the
+ * final summary value for the each global counter.
+ */
+
+/*
+ * The definition of an individual counter.
+ */
+struct tr2_counter {
+ uint64_t value;
+
+ unsigned int is_aggregate:1;
+};
+
+/*
+ * Compile time fixed block of all defined counters.
+ */
+struct tr2_counter_block {
+ struct tr2_counter counter[TRACE2_NUMBER_OF_COUNTERS];
+
+ unsigned int is_aggregate:1;
+};
+
+/*
+ * Add "value" to the global counter.
+ */
+void tr2_counter_increment(enum trace2_counter_id cid, uint64_t value);
+
+/*
+ * Accumulate counter data from the source block into the merged block.
+ */
+void tr2_merge_counter_block(struct tr2_counter_block *merged,
+ const struct tr2_counter_block *src);
+
+/*
+ * Send counter data for all counters in this block to the target.
+ *
+ * This will generate an event record for each counter that had activity.
+ */
+void tr2_emit_counter_block(tr2_tgt_evt_counter_t *pfn,
+ uint64_t us_elapsed_absolute,
+ const struct tr2_counter_block *blk,
+ const char *thread_name);
+
+#endif /* TR2_CTR_H */
@@ -1,5 +1,6 @@
#include "cache.h"
#include "thread-utils.h"
+#include "trace2/tr2_ctr.h"
#include "trace2/tr2_tls.h"
#include "trace2/tr2_tmr.h"
@@ -233,3 +234,31 @@ void tr2_emit_timers_by_thread(tr2_tgt_evt_timer_t *pfn,
ctx = next;
}
}
+
+void tr2tls_aggregate_counter_blocks(struct tr2_counter_block *merged)
+{
+ struct tr2tls_thread_ctx *ctx = tr2tls_ctx_list;
+
+ while (ctx) {
+ struct tr2tls_thread_ctx *next = ctx->next_ctx;
+
+ tr2_merge_counter_block(merged, &ctx->counters);
+
+ ctx = next;
+ }
+}
+
+void tr2tls_emit_counter_blocks_by_thread(tr2_tgt_evt_counter_t *pfn,
+ uint64_t us_elapsed_absolute)
+{
+ struct tr2tls_thread_ctx *ctx = tr2tls_ctx_list;
+
+ while (ctx) {
+ struct tr2tls_thread_ctx *next = ctx->next_ctx;
+
+ tr2_emit_counter_block(pfn, us_elapsed_absolute, &ctx->counters,
+ ctx->thread_name);
+
+ ctx = next;
+ }
+}
@@ -2,6 +2,7 @@
#define TR2_TLS_H
#include "strbuf.h"
+#include "trace2/tr2_ctr.h"
#include "trace2/tr2_tmr.h"
struct tr2tls_thread_ctx {
@@ -12,10 +13,25 @@ struct tr2tls_thread_ctx {
int thread_id;
struct tr2_timer_block timers;
+ struct tr2_counter_block counters;
char thread_name[FLEX_ARRAY];
};
+/*
+ * Iterate over the global list of threads and aggregate the
+ * counter data into the given counter block. The resulting block
+ * will contain the global counter sums.
+ */
+void tr2tls_aggregate_counter_blocks(struct tr2_counter_block *merged);
+
+/*
+ * Iterate over the global list of threads and emit "per-thread"
+ * counter data for each.
+ */
+void tr2tls_emit_counter_blocks_by_thread(tr2_tgt_evt_counter_t *pfn,
+ uint64_t us_elapsed_absolute);
+
/*
* Iterate over the global list of threads and aggregate the timer
* data into the given timer block. The resulting block will contain