@@ -518,6 +518,11 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
+--merge-callchains::
+ Enable merging deferred user callchains if available. This is the
+ default behavior. If you want to see separate CALLCHAIN_DEFERRED
+ records for some reason, use --no-merge-callchains explicitly.
+
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]
@@ -4032,6 +4032,7 @@ int cmd_script(int argc, const char **argv)
bool header_only = false;
bool script_started = false;
bool unsorted_dump = false;
+ bool merge_deferred_callchains = true;
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
@@ -4185,6 +4186,8 @@ int cmd_script(int argc, const char **argv)
"Guest code can be found in hypervisor process"),
OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
"Enable LBR callgraph stitching approach"),
+ OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
+ "Enable merge deferred user callchains"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -4440,7 +4443,7 @@ int cmd_script(int argc, const char **argv)
script.tool.throttle = process_throttle_event;
script.tool.unthrottle = process_throttle_event;
script.tool.ordering_requires_timestamps = true;
- script.tool.merge_deferred_callchains = false;
+ script.tool.merge_deferred_callchains = merge_deferred_callchains;
session = perf_session__new(&data, &script.tool);
if (IS_ERR(session))
return PTR_ERR(session);
@@ -1832,3 +1832,27 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
}
return 0;
}
+
+int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
+ struct perf_sample *sample_callchain)
+{
+ u64 nr_orig = sample_orig->callchain->nr - 1;
+ u64 nr_deferred = sample_callchain->callchain->nr;
+ struct ip_callchain *callchain;
+
+ callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64));
+ if (callchain == NULL) {
+ sample_orig->deferred_callchain = false;
+ return -ENOMEM;
+ }
+
+ callchain->nr = nr_orig + nr_deferred;
+ /* copy except for the last PERF_CONTEXT_USER_DEFERRED */
+ memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64));
+ /* copy deferred use callchains */
+ memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips,
+ nr_deferred * sizeof(u64));
+
+ sample_orig->callchain = callchain;
+ return 0;
+}
@@ -317,4 +317,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
struct perf_sample *sample, int max_stack,
bool symbols, callchain_iter_fn cb, void *data);
+int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
+ struct perf_sample *sample_callchain);
+
#endif /* __PERF_CALLCHAIN_H */
@@ -82,6 +82,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
evlist->ctl_fd.ack = -1;
evlist->ctl_fd.pos = -1;
evlist->nr_br_cntr = -1;
+ INIT_LIST_HEAD(&evlist->deferred_samples);
}
struct evlist *evlist__new(void)
@@ -84,6 +84,7 @@ struct evlist {
int pos; /* index at evlist core object to check signals */
} ctl_fd;
struct event_enable_timer *eet;
+ struct list_head deferred_samples;
};
struct evsel_str_handler {
@@ -1266,6 +1266,56 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
per_thread);
}
+struct deferred_event {
+ struct list_head list;
+ union perf_event *event;
+};
+
+static int evlist__deliver_deferred_samples(struct evlist *evlist,
+ const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct deferred_event *de, *tmp;
+ struct evsel *evsel;
+ int ret = 0;
+
+ if (!tool->merge_deferred_callchains) {
+ evsel = evlist__id2evsel(evlist, sample->id);
+ return tool->callchain_deferred(tool, event, sample,
+ evsel, machine);
+ }
+
+ list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
+ struct perf_sample orig_sample;
+
+ ret = evlist__parse_sample(evlist, de->event, &orig_sample);
+ if (ret < 0) {
+ pr_err("failed to parse original sample\n");
+ break;
+ }
+
+ if (sample->tid != orig_sample.tid)
+ continue;
+
+ evsel = evlist__id2evsel(evlist, orig_sample.id);
+ sample__merge_deferred_callchain(&orig_sample, sample);
+ ret = evlist__deliver_sample(evlist, tool, de->event,
+ &orig_sample, evsel, machine);
+
+ if (orig_sample.deferred_callchain)
+ free(orig_sample.callchain);
+
+ list_del(&de->list);
+ free(de);
+
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
static int machines__deliver_event(struct machines *machines,
struct evlist *evlist,
union perf_event *event,
@@ -1294,6 +1344,16 @@ static int machines__deliver_event(struct machines *machines,
return 0;
}
dump_sample(evsel, event, sample, perf_env__arch(machine->env));
+ if (sample->deferred_callchain && tool->merge_deferred_callchains) {
+ struct deferred_event *de = malloc(sizeof(*de));
+
+ if (de == NULL)
+ return -ENOMEM;
+
+ de->event = event;
+ list_add_tail(&de->list, &evlist->deferred_samples);
+ return 0;
+ }
return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
@@ -1353,7 +1413,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->aux_output_hw_id(tool, event, sample, machine);
case PERF_RECORD_CALLCHAIN_DEFERRED:
dump_deferred_callchain(evsel, event, sample);
- return tool->callchain_deferred(tool, event, sample, evsel, machine);
+ return evlist__deliver_deferred_samples(evlist, tool, event,
+ sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
@@ -238,6 +238,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
tool->cgroup_events = false;
tool->no_warn = false;
tool->show_feat_hdr = SHOW_FEAT_NO_HEADER;
+ tool->merge_deferred_callchains = true;
tool->sample = process_event_sample_stub;
tool->mmap = process_event_stub;
@@ -87,6 +87,7 @@ struct perf_tool {
bool cgroup_events;
bool no_warn;
bool dont_split_sample_group;
+ bool merge_deferred_callchains;
enum show_feature_header show_feat_hdr;
};