diff mbox series

[kvm-unit-tests,v3,08/11] x86: pmu: Improve instruction and branches events verification

Message ID 20240103031409.2504051-9-dapeng1.mi@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series pmu test bugs fix and improvements | expand

Commit Message

Mi, Dapeng Jan. 3, 2024, 3:14 a.m. UTC
If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in
__precise_count_loop(). Thus, instructions and branches events can be
verified against a precise count instead of a rough range.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 x86/pmu.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

Comments

Mingwei Zhang March 27, 2024, 6:14 a.m. UTC | #1
On Wed, Jan 03, 2024, Dapeng Mi wrote:
> If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in
> __precise_count_loop(). Thus, instructions and branches events can be
> verified against a precise count instead of a rough range.
> 
> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
> ---
>  x86/pmu.c | 26 ++++++++++++++++++++++++++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/x86/pmu.c b/x86/pmu.c
> index 88b89ad889b9..b764827c1c3d 100644
> --- a/x86/pmu.c
> +++ b/x86/pmu.c
> @@ -25,6 +25,10 @@
>  	"nop; nop; nop; nop; nop; nop; nop;\n\t"	\
>  	"loop 1b;\n\t"
>  
> +/*Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
> +#define PRECISE_EXTRA_INSTRNS  (2 + 4)
> +#define PRECISE_LOOP_INSTRNS   (N * LOOP_INSTRNS + PRECISE_EXTRA_INSTRNS)
> +#define PRECISE_LOOP_BRANCHES  (N)
>  #define PRECISE_LOOP_ASM						\
>  	"wrmsr;\n\t"							\
>  	"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t"			\
> @@ -107,6 +111,24 @@ static inline void loop(u64 cntrs)
>  		__precise_count_loop(cntrs);
>  }
>  
> +static void adjust_events_range(struct pmu_event *gp_events, int branch_idx)
> +{
> +	/*
> +	 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are
> +	 * moved in __precise_count_loop(). Thus, instructions and branches
> +	 * events can be verified against a precise count instead of a rough
> +	 * range.
> +	 */
> +	if (this_cpu_has_perf_global_ctrl()) {
> +		/* instructions event */
> +		gp_events[0].min = PRECISE_LOOP_INSTRNS;
> +		gp_events[0].max = PRECISE_LOOP_INSTRNS;
> +		/* branches event */
> +		gp_events[branch_idx].min = PRECISE_LOOP_BRANCHES;
> +		gp_events[branch_idx].max = PRECISE_LOOP_BRANCHES;
> +	}
> +}
> +
>  volatile uint64_t irq_received;
>  
>  static void cnt_overflow(isr_regs_t *regs)
> @@ -771,6 +793,7 @@ static void check_invalid_rdpmc_gp(void)
>  
>  int main(int ac, char **av)
>  {
> +	int branch_idx;
>  	setup_vm();
>  	handle_irq(PMI_VECTOR, cnt_overflow);
>  	buf = malloc(N*64);
> @@ -784,13 +807,16 @@ int main(int ac, char **av)
>  		}
>  		gp_events = (struct pmu_event *)intel_gp_events;
>  		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
> +		branch_idx = 5;

This (and the follow up one) hardcoded index is hacky and more
importantly, error prone especially when code get refactored later.
Please use a proper way via macro? Eg., checking
INTEL_ARCH_BRANCHES_RETIRED_INDEX in pmu_counters_test.c might be a good
one.
>  		report_prefix_push("Intel");
>  		set_ref_cycle_expectations();
>  	} else {
>  		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
>  		gp_events = (struct pmu_event *)amd_gp_events;
> +		branch_idx = 2;
>  		report_prefix_push("AMD");
>  	}
> +	adjust_events_range(gp_events, branch_idx);
>  
>  	printf("PMU version:         %d\n", pmu.version);
>  	printf("GP counters:         %d\n", pmu.nr_gp_counters);
> -- 
> 2.34.1
>
Mi, Dapeng March 27, 2024, 8:59 a.m. UTC | #2
On 3/27/2024 2:14 PM, Mingwei Zhang wrote:
> On Wed, Jan 03, 2024, Dapeng Mi wrote:
>> If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are moved in
>> __precise_count_loop(). Thus, instructions and branches events can be
>> verified against a precise count instead of a rough range.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>   x86/pmu.c | 26 ++++++++++++++++++++++++++
>>   1 file changed, 26 insertions(+)
>>
>> diff --git a/x86/pmu.c b/x86/pmu.c
>> index 88b89ad889b9..b764827c1c3d 100644
>> --- a/x86/pmu.c
>> +++ b/x86/pmu.c
>> @@ -25,6 +25,10 @@
>>   	"nop; nop; nop; nop; nop; nop; nop;\n\t"	\
>>   	"loop 1b;\n\t"
>>   
>> +/*Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
>> +#define PRECISE_EXTRA_INSTRNS  (2 + 4)
>> +#define PRECISE_LOOP_INSTRNS   (N * LOOP_INSTRNS + PRECISE_EXTRA_INSTRNS)
>> +#define PRECISE_LOOP_BRANCHES  (N)
>>   #define PRECISE_LOOP_ASM						\
>>   	"wrmsr;\n\t"							\
>>   	"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t"			\
>> @@ -107,6 +111,24 @@ static inline void loop(u64 cntrs)
>>   		__precise_count_loop(cntrs);
>>   }
>>   
>> +static void adjust_events_range(struct pmu_event *gp_events, int branch_idx)
>> +{
>> +	/*
>> +	 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are
>> +	 * moved in __precise_count_loop(). Thus, instructions and branches
>> +	 * events can be verified against a precise count instead of a rough
>> +	 * range.
>> +	 */
>> +	if (this_cpu_has_perf_global_ctrl()) {
>> +		/* instructions event */
>> +		gp_events[0].min = PRECISE_LOOP_INSTRNS;
>> +		gp_events[0].max = PRECISE_LOOP_INSTRNS;
>> +		/* branches event */
>> +		gp_events[branch_idx].min = PRECISE_LOOP_BRANCHES;
>> +		gp_events[branch_idx].max = PRECISE_LOOP_BRANCHES;
>> +	}
>> +}
>> +
>>   volatile uint64_t irq_received;
>>   
>>   static void cnt_overflow(isr_regs_t *regs)
>> @@ -771,6 +793,7 @@ static void check_invalid_rdpmc_gp(void)
>>   
>>   int main(int ac, char **av)
>>   {
>> +	int branch_idx;
>>   	setup_vm();
>>   	handle_irq(PMI_VECTOR, cnt_overflow);
>>   	buf = malloc(N*64);
>> @@ -784,13 +807,16 @@ int main(int ac, char **av)
>>   		}
>>   		gp_events = (struct pmu_event *)intel_gp_events;
>>   		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
>> +		branch_idx = 5;
> This (and the follow up one) hardcoded index is hacky and more
> importantly, error prone especially when code get refactored later.
> Please use a proper way via macro? Eg., checking
> INTEL_ARCH_BRANCHES_RETIRED_INDEX in pmu_counters_test.c might be a good
> one.

Yeah, I would define an enum to enumerate these indexes. Thanks.


>>   		report_prefix_push("Intel");
>>   		set_ref_cycle_expectations();
>>   	} else {
>>   		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
>>   		gp_events = (struct pmu_event *)amd_gp_events;
>> +		branch_idx = 2;
>>   		report_prefix_push("AMD");
>>   	}
>> +	adjust_events_range(gp_events, branch_idx);
>>   
>>   	printf("PMU version:         %d\n", pmu.version);
>>   	printf("GP counters:         %d\n", pmu.nr_gp_counters);
>> -- 
>> 2.34.1
>>
diff mbox series

Patch

diff --git a/x86/pmu.c b/x86/pmu.c
index 88b89ad889b9..b764827c1c3d 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -25,6 +25,10 @@ 
 	"nop; nop; nop; nop; nop; nop; nop;\n\t"	\
 	"loop 1b;\n\t"
 
+/*Enable GLOBAL_CTRL + disable GLOBAL_CTRL instructions */
+#define PRECISE_EXTRA_INSTRNS  (2 + 4)
+#define PRECISE_LOOP_INSTRNS   (N * LOOP_INSTRNS + PRECISE_EXTRA_INSTRNS)
+#define PRECISE_LOOP_BRANCHES  (N)
 #define PRECISE_LOOP_ASM						\
 	"wrmsr;\n\t"							\
 	"mov %%ecx, %%edi; mov %%ebx, %%ecx;\n\t"			\
@@ -107,6 +111,24 @@  static inline void loop(u64 cntrs)
 		__precise_count_loop(cntrs);
 }
 
+static void adjust_events_range(struct pmu_event *gp_events, int branch_idx)
+{
+	/*
+	 * If HW supports GLOBAL_CTRL MSR, enabling and disabling PMCs are
+	 * moved in __precise_count_loop(). Thus, instructions and branches
+	 * events can be verified against a precise count instead of a rough
+	 * range.
+	 */
+	if (this_cpu_has_perf_global_ctrl()) {
+		/* instructions event */
+		gp_events[0].min = PRECISE_LOOP_INSTRNS;
+		gp_events[0].max = PRECISE_LOOP_INSTRNS;
+		/* branches event */
+		gp_events[branch_idx].min = PRECISE_LOOP_BRANCHES;
+		gp_events[branch_idx].max = PRECISE_LOOP_BRANCHES;
+	}
+}
+
 volatile uint64_t irq_received;
 
 static void cnt_overflow(isr_regs_t *regs)
@@ -771,6 +793,7 @@  static void check_invalid_rdpmc_gp(void)
 
 int main(int ac, char **av)
 {
+	int branch_idx;
 	setup_vm();
 	handle_irq(PMI_VECTOR, cnt_overflow);
 	buf = malloc(N*64);
@@ -784,13 +807,16 @@  int main(int ac, char **av)
 		}
 		gp_events = (struct pmu_event *)intel_gp_events;
 		gp_events_size = sizeof(intel_gp_events)/sizeof(intel_gp_events[0]);
+		branch_idx = 5;
 		report_prefix_push("Intel");
 		set_ref_cycle_expectations();
 	} else {
 		gp_events_size = sizeof(amd_gp_events)/sizeof(amd_gp_events[0]);
 		gp_events = (struct pmu_event *)amd_gp_events;
+		branch_idx = 2;
 		report_prefix_push("AMD");
 	}
+	adjust_events_range(gp_events, branch_idx);
 
 	printf("PMU version:         %d\n", pmu.version);
 	printf("GP counters:         %d\n", pmu.nr_gp_counters);