Message ID | 20240103031409.2504051-9-dapeng1.mi@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | pmu test bugs fix and improvements | expand |
On Wed, Jan 03, 2024, Dapeng Mi wrote: > If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in > __precise_count_loop(). Thus, instructions and branches events can be > verified against a precise count instead of a rough range. > > Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> > --- > x86/pmu.c | 26 ++++++++++++++++++++++++++ > 1 file changed, 26 insertions(+) > > diff --git a/x86/pmu.c b/x86/pmu.c > index 88b89ad889b9..b764827c1c3d 100644 > --- a/x86/pmu.c > +++ b/x86/pmu.c > @@ -25,6 +25,10 @@ > "nop; nop; nop; nop; nop; nop; nop;\n\t" \ > "loop 1b;\n\t" > > +/*Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */ > +#define PRECISE_EXTRA_INSTRNS (2 + 4) > +#define PRECISE_LOOP_INSTRNS (N * LOOP_INSTRNS + PRECISE_EXTRA_INSTRNS) > +#define PRECISE_LOOP_BRANCHES (N) > #define PRECISE_LOOP_ASM \ > "wrmsr;\n\t" \ > "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ > @@ -107,6 +111,24 @@ static inline void loop(u64 cntrs) > __precise_count_loop(cntrs); > } > > +static void adjust_events_range(struct pmu_event *gp_events, int branch_idx) > +{ > + /* > + * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are > + * moved in __precise_count_loop(). Thus, instructions and branches > + * events can be verified against a precise count instead of a rough > + * range. > + */ > + if (this_cpu_has_perf_global_ctrl()) { > + /* instructions event */ > + gp_events[0].min = PRECISE_LOOP_INSTRNS; > + gp_events[0].max = PRECISE_LOOP_INSTRNS; > + /* branches event */ > + gp_events[branch_idx].min = PRECISE_LOOP_BRANCHES; > + gp_events[branch_idx].max = PRECISE_LOOP_BRANCHES; > + } > +} > + > volatile uint64_t irq_received; > > static void cnt_overflow(isr_regs_t *regs) > @@ -771,6 +793,7 @@ static void check_invalid_rdpmc_gp(void) > > int main(int ac, char **av) > { > + int branch_idx; > setup_vm(); > handle_irq(PMI_VECTOR, cnt_overflow); > buf = malloc(N*64); > @@ -784,13 +807,16 @@ int main(int ac, char **av) > } > gp_events = (struct pmu_event *)intel_gp_events; > gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); > + branch_idx = 5; This (and the follow up one) hardcoded index is hacky and more importantly, error prone especially when code get refactored later. Please use a proper way via macro? Eg., checking INTEL_ARCH_BRANCHES_RETIRED_INDEX in pmu_counters_test.c might be a good one. > report_prefix_push("Intel"); > set_ref_cycle_expectations(); > } else { > gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); > gp_events = (struct pmu_event *)amd_gp_events; > + branch_idx = 2; > report_prefix_push("AMD"); > } > + adjust_events_range(gp_events, branch_idx); > > printf("PMU version: %d\n", pmu.version); > printf("GP counters: %d\n", pmu.nr_gp_counters); > -- > 2.34.1 >
On 3/27/2024 2:14 PM, Mingwei Zhang wrote: > On Wed, Jan 03, 2024, Dapeng Mi wrote: >> If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in >> __precise_count_loop(). Thus, instructions and branches events can be >> verified against a precise count instead of a rough range. >> >> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> >> --- >> x86/pmu.c | 26 ++++++++++++++++++++++++++ >> 1 file changed, 26 insertions(+) >> >> diff --git a/x86/pmu.c b/x86/pmu.c >> index 88b89ad889b9..b764827c1c3d 100644 >> --- a/x86/pmu.c >> +++ b/x86/pmu.c >> @@ -25,6 +25,10 @@ >> "nop; nop; nop; nop; nop; nop; nop;\n\t" \ >> "loop 1b;\n\t" >> >> +/*Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */ >> +#define PRECISE_EXTRA_INSTRNS (2 + 4) >> +#define PRECISE_LOOP_INSTRNS (N * LOOP_INSTRNS + PRECISE_EXTRA_INSTRNS) >> +#define PRECISE_LOOP_BRANCHES (N) >> #define PRECISE_LOOP_ASM \ >> "wrmsr;\n\t" \ >> "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ >> @@ -107,6 +111,24 @@ static inline void loop(u64 cntrs) >> __precise_count_loop(cntrs); >> } >> >> +static void adjust_events_range(struct pmu_event *gp_events, int branch_idx) >> +{ >> + /* >> + * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are >> + * moved in __precise_count_loop(). Thus, instructions and branches >> + * events can be verified against a precise count instead of a rough >> + * range. >> + */ >> + if (this_cpu_has_perf_global_ctrl()) { >> + /* instructions event */ >> + gp_events[0].min = PRECISE_LOOP_INSTRNS; >> + gp_events[0].max = PRECISE_LOOP_INSTRNS; >> + /* branches event */ >> + gp_events[branch_idx].min = PRECISE_LOOP_BRANCHES; >> + gp_events[branch_idx].max = PRECISE_LOOP_BRANCHES; >> + } >> +} >> + >> volatile uint64_t irq_received; >> >> static void cnt_overflow(isr_regs_t *regs) >> @@ -771,6 +793,7 @@ static void check_invalid_rdpmc_gp(void) >> >> int main(int ac, char **av) >> { >> + int branch_idx; >> setup_vm(); >> handle_irq(PMI_VECTOR, cnt_overflow); >> buf = malloc(N*64); >> @@ -784,13 +807,16 @@ int main(int ac, char **av) >> } >> gp_events = (struct pmu_event *)intel_gp_events; >> gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); >> + branch_idx = 5; > This (and the follow up one) hardcoded index is hacky and more > importantly, error prone especially when code get refactored later. > Please use a proper way via macro? Eg., checking > INTEL_ARCH_BRANCHES_RETIRED_INDEX in pmu_counters_test.c might be a good > one. Yeah, I would define an enum to enumerate these indexes. Thanks. >> report_prefix_push("Intel"); >> set_ref_cycle_expectations(); >> } else { >> gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); >> gp_events = (struct pmu_event *)amd_gp_events; >> + branch_idx = 2; >> report_prefix_push("AMD"); >> } >> + adjust_events_range(gp_events, branch_idx); >> >> printf("PMU version: %d\n", pmu.version); >> printf("GP counters: %d\n", pmu.nr_gp_counters); >> -- >> 2.34.1 >>
diff --git a/x86/pmu.c b/x86/pmu.c index 88b89ad889b9..b764827c1c3d 100644 --- a/x86/pmu.c +++ b/x86/pmu.c @@ -25,6 +25,10 @@ "nop; nop; nop; nop; nop; nop; nop;\n\t" \ "loop 1b;\n\t" +/*Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */ +#define PRECISE_EXTRA_INSTRNS (2 + 4) +#define PRECISE_LOOP_INSTRNS (N * LOOP_INSTRNS + PRECISE_EXTRA_INSTRNS) +#define PRECISE_LOOP_BRANCHES (N) #define PRECISE_LOOP_ASM \ "wrmsr;\n\t" \ "mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t" \ @@ -107,6 +111,24 @@ static inline void loop(u64 cntrs) __precise_count_loop(cntrs); } +static void adjust_events_range(struct pmu_event *gp_events, int branch_idx) +{ + /* + * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are + * moved in __precise_count_loop(). Thus, instructions and branches + * events can be verified against a precise count instead of a rough + * range. + */ + if (this_cpu_has_perf_global_ctrl()) { + /* instructions event */ + gp_events[0].min = PRECISE_LOOP_INSTRNS; + gp_events[0].max = PRECISE_LOOP_INSTRNS; + /* branches event */ + gp_events[branch_idx].min = PRECISE_LOOP_BRANCHES; + gp_events[branch_idx].max = PRECISE_LOOP_BRANCHES; + } +} + volatile uint64_t irq_received; static void cnt_overflow(isr_regs_t *regs) @@ -771,6 +793,7 @@ static void check_invalid_rdpmc_gp(void) int main(int ac, char **av) { + int branch_idx; setup_vm(); handle_irq(PMI_VECTOR, cnt_overflow); buf = malloc(N*64); @@ -784,13 +807,16 @@ int main(int ac, char **av) } gp_events = (struct pmu_event *)intel_gp_events; gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]); + branch_idx = 5; report_prefix_push("Intel"); set_ref_cycle_expectations(); } else { gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]); gp_events = (struct pmu_event *)amd_gp_events; + branch_idx = 2; report_prefix_push("AMD"); } + adjust_events_range(gp_events, branch_idx); printf("PMU version: %d\n", pmu.version); printf("GP counters: %d\n", pmu.nr_gp_counters);
If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in __precise_count_loop(). Thus, instructions and branches events can be verified against a precise count instead of a rough range. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> --- x86/pmu.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+)