@@ -709,7 +709,11 @@ stat.*::
intel-pt.*::
+ Variables that affect Intel PT.
+
intel-pt.cache-divisor::
+ If set, the decoder instruction cache size is based on DSO size
+ divided by this number. Default is 64.
intel-pt.mispred-all::
If set, Intel PT decoder will set the mispred flag on all
@@ -721,6 +725,20 @@ intel-pt.*::
the maximum is exceeded there will be a "Never-ending loop"
error. The default is 100000.
+ intel-pt.max_nonturbo_ratio::
+ The kernel provides /sys/bus/event_source/devices/intel_pt/max_nonturbo_ratio
+ which can be zero in a virtual machine. The decoder needs this
+ information to correctly interpret timing packets, so the value
+ can be provided by this variable in that case. Note in the absence
+ of VMCS TSC Scaling, this is probably the same as the host value.
+
+ intel-pt.tsc_art_ratio::
+ The kernel provides /sys/bus/event_source/devices/intel_pt/tsc_art_ratio
+ which can be 0:0 in a virtual machine. The decoder needs this
+ information to correctly interpret timing packets, so the value
+ can be provided by this variable in that case. Note in the absence
+ of VMCS TSC Scaling, this is probably the same as the host value.
+
auxtrace.*::
auxtrace.dumpdir::
@@ -24,6 +24,7 @@
#include "../../../util/parse-events.h"
#include "../../../util/pmu.h"
#include "../../../util/debug.h"
+#include "../../../util/config.h"
#include "../../../util/auxtrace.h"
#include "../../../util/perf_api_probe.h"
#include "../../../util/record.h"
@@ -328,15 +329,60 @@ intel_pt_info_priv_size(struct auxtrace_record *itr, struct evlist *evlist)
return ptr->priv_size;
}
+struct tsc_art_ratio {
+ u32 *n;
+ u32 *d;
+};
+
+static int intel_pt_tsc_art_ratio(const char *var, const char *value, void *data)
+{
+ if (!strcmp(var, "intel-pt.tsc_art_ratio")) {
+ struct tsc_art_ratio *r = data;
+
+ if (sscanf(value, "%u:%u", r->n, r->d) != 2)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void intel_pt_tsc_ctc_ratio_from_config(u32 *n, u32 *d)
+{
+ struct tsc_art_ratio data = { .n = n, .d = d };
+
+ *n = 0;
+ *d = 0;
+ perf_config(intel_pt_tsc_art_ratio, &data);
+}
+
static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
__get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx) {
+ intel_pt_tsc_ctc_ratio_from_config(n, d);
+ return;
+ }
*n = ebx;
*d = eax;
}
+static int intel_pt_max_nonturbo_ratio(const char *var, const char *value, void *data)
+{
+ if (!strcmp(var, "intel-pt.max_nonturbo_ratio")) {
+ unsigned int *max_nonturbo_ratio = data;
+
+ if (sscanf(value, "%u", max_nonturbo_ratio) != 1)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void intel_pt_max_nonturbo_ratio_from_config(unsigned int *max_non_turbo_ratio)
+{
+ perf_config(intel_pt_max_nonturbo_ratio, max_non_turbo_ratio);
+}
+
static int intel_pt_info_fill(struct auxtrace_record *itr,
struct perf_session *session,
struct perf_record_auxtrace_info *auxtrace_info,
@@ -350,7 +396,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
bool cap_user_time_zero = false, per_cpu_mmaps;
u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
- unsigned long max_non_turbo_ratio;
+ unsigned int max_non_turbo_ratio;
size_t filter_str_len;
const char *filter;
int event_trace;
@@ -374,8 +420,10 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
- "%lu", &max_non_turbo_ratio) != 1)
+ "%u", &max_non_turbo_ratio) != 1)
max_non_turbo_ratio = 0;
+ if (!max_non_turbo_ratio)
+ intel_pt_max_nonturbo_ratio_from_config(&max_non_turbo_ratio);
if (perf_pmu__scan_file(intel_pt_pmu, "caps/event_trace",
"%d", &event_trace) != 1)
event_trace = 0;
@@ -3934,6 +3934,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
INTEL_PT_CYC_BIT);
}
+ if (!pt->tsc_ctc_ratio_n || !pt->tsc_ctc_ratio_d)
+ intel_pt_tsc_ctc_ratio_from_config(&pt->tsc_ctc_ratio_n, &pt->tsc_ctc_ratio_d);
+
if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
pt->max_non_turbo_ratio =
auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
@@ -3942,6 +3945,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
INTEL_PT_MAX_NONTURBO_RATIO);
}
+ if (!pt->max_non_turbo_ratio)
+ intel_pt_max_nonturbo_ratio_from_config(&pt->max_non_turbo_ratio);
+
info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
info_end = (void *)auxtrace_info + auxtrace_info->header.size;
@@ -7,6 +7,8 @@
#ifndef INCLUDE__PERF_INTEL_PT_H__
#define INCLUDE__PERF_INTEL_PT_H__
+#include <linux/types.h>
+
#define INTEL_PT_PMU_NAME "intel_pt"
enum {
@@ -44,4 +46,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+void intel_pt_tsc_ctc_ratio_from_config(u32 *n, u32 *d);
+void intel_pt_max_nonturbo_ratio_from_config(unsigned int *max_non_turbo_ratio);
+
#endif
Parameters needed to correctly interpret timing packets might be missing in a virtual machine because the CPUID leaf or MSR is not supported by the hypervisor / KVM. Add perf config variables to overcome that for max_nonturbo_ratio (missing from MSR_PLATFORM_INFO) and tsc_art_ratio (missing from CPUID leaf 0x15), which were seen to be missing from QEMU / KVM. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> --- tools/perf/Documentation/perf-config.txt | 18 ++++++++ tools/perf/arch/x86/util/intel-pt.c | 52 +++++++++++++++++++++++- tools/perf/util/intel-pt.c | 6 +++ tools/perf/util/intel-pt.h | 5 +++ 4 files changed, 79 insertions(+), 2 deletions(-)