diff mbox

[kvm-unit-tests,PATCHv7,2/3] arm: pmu: Check cycle count increases

Message ID 1478125337-11770-3-git-send-email-wei@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wei Huang Nov. 2, 2016, 10:22 p.m. UTC
Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington <cov@codeaurora.org>
Signed-off-by: Wei Huang <wei@redhat.com>
---
 arm/pmu.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

Comments

Christopher Covington Nov. 3, 2016, 4:51 a.m. UTC | #1
Hi Wei,

Thanks for your work on this.

On 2016-11-02 16:22, Wei Huang wrote:
> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
> 
> Signed-off-by: Christopher Covington <cov@codeaurora.org>
> Signed-off-by: Wei Huang <wei@redhat.com>
> ---
>  arm/pmu.c | 100 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 100 insertions(+)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 42d0ee1..65b7df1 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -14,6 +14,9 @@
>   */
>  #include "libcflat.h"
> 
> +#define NR_SAMPLES 10
> +#define ARMV8_PMU_CYCLE_IDX 31
> +
>  #if defined(__arm__)
>  static inline uint32_t get_pmcr(void)
>  {
> @@ -22,6 +25,43 @@ static inline uint32_t get_pmcr(void)
>  	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>  	return ret;
>  }
> +
> +static inline void set_pmcr(uint32_t pmcr)
> +{
> +	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (pmcr));
> +}
> +
> +static inline void set_pmccfiltr(uint32_t filter)
> +{
> +	uint32_t cycle_idx = ARMV8_PMU_CYCLE_IDX;
> +
> +	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (cycle_idx));
> +	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (filter));
> +}

Down the road I'd like to add tests for the regular events. What if you 
added
separate PMSELR and PMXEVTYPER accessors now and used them (with 
PMSELR.SEL = 31)
for setting PMCCFILTR? Then we wouldn't need a specialized set_pmccfiltr
function for the cycle counter versus PMSELR and PMXEVTYPER for the 
regular events.

> +/*
> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, 
> returning 64
> + * bits doesn't seem worth the trouble when differential usage of the 
> result is
> + * expected (with differences that can easily fit in 32 bits). So just 
> return
> + * the lower 32 bits of the cycle count in AArch32.
> + */
> +static inline unsigned long get_pmccntr(void)
> +{
> +	unsigned long cycles;
> +
> +	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
> +	return cycles;
> +}
> +
> +static inline void enable_counter(uint32_t idx)
> +{
> +	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (1 << idx));
> +}

My personal preference, that I think would make this function look and
act like the other system register accessor functions, would be to
name the function "set_pmcntenset" and do a plain write of the input
parameter without a shift, letting the shift be done in the C code.
(As we scale up, the system register accessor functions should probably
be generated by macros from a concise table.)

> +static inline void disable_counter(uint32_t idx)
> +{
> +	asm volatile("mrc p15, 0, %0, c9, c12, 1" : : "r" (1 << idx));
> +}

This function doesn't seem to be used yet. Consider whether it might
make sense to defer introducing it until there is a user.

>  #elif defined(__aarch64__)
>  static inline uint32_t get_pmcr(void)
>  {
> @@ -30,6 +70,34 @@ static inline uint32_t get_pmcr(void)
>  	asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>  	return ret;
>  }
> +
> +static inline void set_pmcr(uint32_t pmcr)
> +{
> +	asm volatile("msr pmcr_el0, %0" : : "r" (pmcr));
> +}
> 
> +static inline void set_pmccfiltr(uint32_t filter)
> +{
> +	asm volatile("msr pmccfiltr_el0, %0" : : "r" (filter));
> +}

As above, consider whether using PMSELR and PMXEVTYPER might be a more
reusable pair of accessors.

> +static inline unsigned long get_pmccntr(void)
> +{
> +	unsigned long cycles;
> +
> +	asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
> +	return cycles;
> +}
> +
> +static inline void enable_counter(uint32_t idx)
> +{
> +	asm volatile("msr pmcntenset_el0, %0" : : "r" (1 << idx));
> +}

Same thought as above about uniformity and generatability.

> +static inline void disable_counter(uint32_t idx)
> +{
> +	asm volatile("msr pmcntensclr_el0, %0" : : "r" (1 << idx));
> +}

As above, this function doesn't seem to be used yet.

Thanks,
Cov
Andrew Jones Nov. 3, 2016, 10:35 a.m. UTC | #2
On Wed, Nov 02, 2016 at 05:22:16PM -0500, Wei Huang wrote:

Missing From: Christopher

> Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
> even for the smallest delta of two subsequent reads.
> 
> Signed-off-by: Christopher Covington <cov@codeaurora.org>
> Signed-off-by: Wei Huang <wei@redhat.com>
> ---
>  arm/pmu.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 100 insertions(+)
> 
> diff --git a/arm/pmu.c b/arm/pmu.c
> index 42d0ee1..65b7df1 100644
> --- a/arm/pmu.c
> +++ b/arm/pmu.c
> @@ -14,6 +14,9 @@
>   */
>  #include "libcflat.h"
>  
> +#define NR_SAMPLES 10
> +#define ARMV8_PMU_CYCLE_IDX 31

Now I'd really like to drop the bitfields and go to bit defines if we're
going to start mixing them anyway...

> +
>  #if defined(__arm__)
>  static inline uint32_t get_pmcr(void)
>  {
> @@ -22,6 +25,43 @@ static inline uint32_t get_pmcr(void)
>  	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
>  	return ret;
>  }
> +
> +static inline void set_pmcr(uint32_t pmcr)
> +{
> +	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (pmcr));
> +}
> +
> +static inline void set_pmccfiltr(uint32_t filter)
> +{
> +	uint32_t cycle_idx = ARMV8_PMU_CYCLE_IDX;
> +
> +	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (cycle_idx));
> +	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (filter));
> +}

I agree with Chistopher that we should create per-register accessors.
Enabling the test code the flexibility to do what it wants but without
requiring a bunch of armv7/armv8 sysreg clutter.

> +
> +/*
> + * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning 64
> + * bits doesn't seem worth the trouble when differential usage of the result is
> + * expected (with differences that can easily fit in 32 bits). So just return

Except for the fact this is a test and we should confirm those upper bits
are whatever we expect, even if we expect zero.

> + * the lower 32 bits of the cycle count in AArch32.
> + */
> +static inline unsigned long get_pmccntr(void)
> +{
> +	unsigned long cycles;
> +
> +	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
> +	return cycles;
> +}
> +
> +static inline void enable_counter(uint32_t idx)
> +{
> +	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (1 << idx));
> +}
> +
> +static inline void disable_counter(uint32_t idx)
> +{
> +	asm volatile("mrc p15, 0, %0, c9, c12, 1" : : "r" (1 << idx));
> +}

I'll echo Chistopher here as well with the opinion that the test
should supply the shifted value and the accessor should just be a
simply read or write.

Please name the accessors <regname>_read/write

>  #elif defined(__aarch64__)
>  static inline uint32_t get_pmcr(void)
>  {
> @@ -30,6 +70,34 @@ static inline uint32_t get_pmcr(void)
>  	asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
>  	return ret;
>  }
> +
> +static inline void set_pmcr(uint32_t pmcr)
> +{
> +	asm volatile("msr pmcr_el0, %0" : : "r" (pmcr));
> +}
> +
> +static inline void set_pmccfiltr(uint32_t filter)
> +{
> +	asm volatile("msr pmccfiltr_el0, %0" : : "r" (filter));
> +}
> +
> +static inline unsigned long get_pmccntr(void)
> +{
> +	unsigned long cycles;
> +
> +	asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
> +	return cycles;
> +}
> +
> +static inline void enable_counter(uint32_t idx)
> +{
> +	asm volatile("msr pmcntenset_el0, %0" : : "r" (1 << idx));
> +}
> +
> +static inline void disable_counter(uint32_t idx)
> +{
> +	asm volatile("msr pmcntensclr_el0, %0" : : "r" (1 << idx));
> +}
>  #endif
>  
>  struct pmu_data {
> @@ -72,11 +140,43 @@ static bool check_pmcr(void)
>  	return pmu.implementer != 0;
>  }
>  
> +/*
> + * Ensure that the cycle counter progresses between back-to-back reads.
> + */
> +static bool check_cycles_increase(void)
> +{
> +	struct pmu_data pmu = {{0}};
> +
> +	enable_counter(ARMV8_PMU_CYCLE_IDX);
> +	set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
> +
> +	pmu.enable = 1;
> +	set_pmcr(pmu.pmcr_el0);
> +
> +	for (int i = 0; i < NR_SAMPLES; i++) {
> +		unsigned long a, b;
> +
> +		a = get_pmccntr();
> +		b = get_pmccntr();
> +
> +		if (a >= b) {
> +			printf("Read %ld then %ld.\n", a, b);
> +			return false;
> +		}
> +	}
> +
> +	pmu.enable = 0;
> +	set_pmcr(pmu.pmcr_el0);
> +
> +	return true;
> +}
> +
>  int main(void)
>  {
>  	report_prefix_push("pmu");
>  
>  	report("Control register", check_pmcr());
> +	report("Monotonically increasing cycle count", check_cycles_increase());
>  
>  	return report_summary();
>  }

What happens with this test running on tcg? Do we just fail? Does it
explode? Is there a register we can probe and when it indicates things
won't work we can invoke a report_skip?

Thanks,
drew
Christopher Covington Nov. 3, 2016, 2:25 p.m. UTC | #3
On 2016-11-03 04:35, Andrew Jones wrote:

>> +/*
>> + * Ensure that the cycle counter progresses between back-to-back 
>> reads.
>> + */
>> +static bool check_cycles_increase(void)
>> +{
>> +	struct pmu_data pmu = {{0}};
>> +
>> +	enable_counter(ARMV8_PMU_CYCLE_IDX);
>> +	set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
>> +
>> +	pmu.enable = 1;
>> +	set_pmcr(pmu.pmcr_el0);
>> +
>> +	for (int i = 0; i < NR_SAMPLES; i++) {
>> +		unsigned long a, b;
>> +
>> +		a = get_pmccntr();
>> +		b = get_pmccntr();
>> +
>> +		if (a >= b) {
>> +			printf("Read %ld then %ld.\n", a, b);
>> +			return false;
>> +		}
>> +	}
>> +
>> +	pmu.enable = 0;
>> +	set_pmcr(pmu.pmcr_el0);
>> +
>> +	return true;
>> +}
>> +
>>  int main(void)
>>  {
>>  	report_prefix_push("pmu");
>> 
>>  	report("Control register", check_pmcr());
>> +	report("Monotonically increasing cycle count", 
>> check_cycles_increase());
>> 
>>  	return report_summary();
>>  }
> 
> What happens with this test running on tcg? Do we just fail? Does it
> explode? Is there a register we can probe and when it indicates things
> won't work we can invoke a report_skip?

A monotonically increasing value (but not any attempt at approximating 
actual cycle
values) in cycle counter is pretty much the only piece of the PMU it's 
had implemented
for the past while. We'll have to check whether TCG can handle the 
filter and enable set
registers, but if it doesn't yet, that's something we can improve :).

Thanks,
Cov
diff mbox

Patch

diff --git a/arm/pmu.c b/arm/pmu.c
index 42d0ee1..65b7df1 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -14,6 +14,9 @@ 
  */
 #include "libcflat.h"
 
+#define NR_SAMPLES 10
+#define ARMV8_PMU_CYCLE_IDX 31
+
 #if defined(__arm__)
 static inline uint32_t get_pmcr(void)
 {
@@ -22,6 +25,43 @@  static inline uint32_t get_pmcr(void)
 	asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
 	return ret;
 }
+
+static inline void set_pmcr(uint32_t pmcr)
+{
+	asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (pmcr));
+}
+
+static inline void set_pmccfiltr(uint32_t filter)
+{
+	uint32_t cycle_idx = ARMV8_PMU_CYCLE_IDX;
+
+	asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (cycle_idx));
+	asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (filter));
+}
+
+/*
+ * While PMCCNTR can be accessed as a 64 bit coprocessor register, returning 64
+ * bits doesn't seem worth the trouble when differential usage of the result is
+ * expected (with differences that can easily fit in 32 bits). So just return
+ * the lower 32 bits of the cycle count in AArch32.
+ */
+static inline unsigned long get_pmccntr(void)
+{
+	unsigned long cycles;
+
+	asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (cycles));
+	return cycles;
+}
+
+static inline void enable_counter(uint32_t idx)
+{
+	asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (1 << idx));
+}
+
+static inline void disable_counter(uint32_t idx)
+{
+	asm volatile("mrc p15, 0, %0, c9, c12, 1" : : "r" (1 << idx));
+}
 #elif defined(__aarch64__)
 static inline uint32_t get_pmcr(void)
 {
@@ -30,6 +70,34 @@  static inline uint32_t get_pmcr(void)
 	asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
 	return ret;
 }
+
+static inline void set_pmcr(uint32_t pmcr)
+{
+	asm volatile("msr pmcr_el0, %0" : : "r" (pmcr));
+}
+
+static inline void set_pmccfiltr(uint32_t filter)
+{
+	asm volatile("msr pmccfiltr_el0, %0" : : "r" (filter));
+}
+
+static inline unsigned long get_pmccntr(void)
+{
+	unsigned long cycles;
+
+	asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+	return cycles;
+}
+
+static inline void enable_counter(uint32_t idx)
+{
+	asm volatile("msr pmcntenset_el0, %0" : : "r" (1 << idx));
+}
+
+static inline void disable_counter(uint32_t idx)
+{
+	asm volatile("msr pmcntensclr_el0, %0" : : "r" (1 << idx));
+}
 #endif
 
 struct pmu_data {
@@ -72,11 +140,43 @@  static bool check_pmcr(void)
 	return pmu.implementer != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+	struct pmu_data pmu = {{0}};
+
+	enable_counter(ARMV8_PMU_CYCLE_IDX);
+	set_pmccfiltr(0); /* count cycles in EL0, EL1, but not EL2 */
+
+	pmu.enable = 1;
+	set_pmcr(pmu.pmcr_el0);
+
+	for (int i = 0; i < NR_SAMPLES; i++) {
+		unsigned long a, b;
+
+		a = get_pmccntr();
+		b = get_pmccntr();
+
+		if (a >= b) {
+			printf("Read %ld then %ld.\n", a, b);
+			return false;
+		}
+	}
+
+	pmu.enable = 0;
+	set_pmcr(pmu.pmcr_el0);
+
+	return true;
+}
+
 int main(void)
 {
 	report_prefix_push("pmu");
 
 	report("Control register", check_pmcr());
+	report("Monotonically increasing cycle count", check_cycles_increase());
 
 	return report_summary();
 }