diff mbox series

[kvm-riscv/for-next,2/2] drivers/perf: riscv: Fix RV32 snapshot overflow use case

Message ID 20240425232933.4111680-3-atishp@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series Fixes for kvm-riscv | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Atish Kumar Patra April 25, 2024, 11:29 p.m. UTC
The shadow copy alogirthm is implemented incorrectly. This patch fixes
the behavior by keeping a per cpu shadow copy of the counter values to
avoid clobbering for the cases where system more than XLEN counters and
the overflown counter index are beyond XLEN. This issue can only be
observed only in RV32 if an SBI implementation assigns logical counters
ids greater than XLEN or firmware counter overflow is supported in the
future.

Fixes : commit 22f5dac41004d ("drivers/perf: riscv: Implement SBI PMU snapshot function")

Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 drivers/perf/riscv_pmu_sbi.c   | 49 +++++++++++++++++++---------------
 include/linux/perf/riscv_pmu.h |  2 ++
 2 files changed, 30 insertions(+), 21 deletions(-)

Comments

Samuel Holland April 25, 2024, 8:18 p.m. UTC | #1
On 2024-04-25 6:29 PM, Atish Patra wrote:
> The shadow copy alogirthm is implemented incorrectly. This patch fixes
> the behavior by keeping a per cpu shadow copy of the counter values to
> avoid clobbering for the cases where system more than XLEN counters and
> the overflown counter index are beyond XLEN. This issue can only be
> observed only in RV32 if an SBI implementation assigns logical counters
> ids greater than XLEN or firmware counter overflow is supported in the
> future.
> 
> Fixes : commit 22f5dac41004d ("drivers/perf: riscv: Implement SBI PMU snapshot function")

Same comment as for patch 1. The logic looks correct as far as I can tell, so:

Reviewed-by: Samuel Holland <samuel.holland@sifive.com>

One minor comment below.

> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> ---
>  drivers/perf/riscv_pmu_sbi.c   | 49 +++++++++++++++++++---------------
>  include/linux/perf/riscv_pmu.h |  2 ++
>  2 files changed, 30 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
> index 2694110f1cff..98aaeb13e9db 100644
> --- a/drivers/perf/riscv_pmu_sbi.c
> +++ b/drivers/perf/riscv_pmu_sbi.c
> @@ -588,6 +588,7 @@ static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
>  		return sbi_err_map_linux_errno(ret.error);
>  	}
>  
> +	memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
>  	cpu_hw_evt->snapshot_set_done = true;
>  
>  	return 0;
> @@ -605,7 +606,7 @@ static u64 pmu_sbi_ctr_read(struct perf_event *event)
>  	union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
>  
>  	/* Read the value from the shared memory directly only if counter is stopped */
> -	if (sbi_pmu_snapshot_available() & (hwc->state & PERF_HES_STOPPED)) {
> +	if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
>  		val = sdata->ctr_values[idx];
>  		return val;
>  	}
> @@ -769,36 +770,38 @@ static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
>  	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
>  	struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
>  	unsigned long flag = 0;
> -	int i;
> +	int i, idx;
>  	struct sbiret ret;
> -	unsigned long temp_ctr_values[64] = {0};
> -	unsigned long ctr_val, temp_ctr_overflow_mask = 0;
> +	u64 temp_ctr_overflow_mask = 0;
>  
>  	if (sbi_pmu_snapshot_available())
>  		flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
>  
> +	/* Reset the shadow copy to avoid save/restore any value from previous overflow */
> +	memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
> +
>  	for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
>  		/* No need to check the error here as we can't do anything about the error */
>  		ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
>  				cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
>  		if (!ret.error && sbi_pmu_snapshot_available()) {
>  			/* Save the counter values to avoid clobbering */
> -			temp_ctr_values[i * BITS_PER_LONG + i] = sdata->ctr_values[i];
> -			/* Save the overflow mask to avoid clobbering */
> -			if (BIT(i) & sdata->ctr_overflow_mask)
> -				temp_ctr_overflow_mask |= BIT(i + i * BITS_PER_LONG);
> +			for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) {
> +				cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
> +							sdata->ctr_values[idx];
> +				/* Save the overflow mask to avoid clobbering */
> +				if (BIT(idx) & sdata->ctr_overflow_mask)
> +					temp_ctr_overflow_mask |= BIT(idx + i * BITS_PER_LONG);

This is equivalent to doing

  temp_ctr_overflow_mask |= sdata->ctr_overflow_mask << (i * BITS_PER_LONG);

outside the for_each_set_bit() loop.

> +			}
>  		}
>  	}
>  
> -	/* Restore the counter values to the shared memory */
> +	/* Restore the counter values to the shared memory for used hw counters */
>  	if (sbi_pmu_snapshot_available()) {
> -		for (i = 0; i < 64; i++) {
> -			ctr_val = temp_ctr_values[i];
> -			if (ctr_val)
> -				sdata->ctr_values[i] = ctr_val;
> -			if (temp_ctr_overflow_mask)
> -				sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
> -		}
> +		for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
> +			sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
> +		if (temp_ctr_overflow_mask)
> +			sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
>  	}
>  }
>  
> @@ -850,7 +853,7 @@ static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
>  static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
>  						   u64 ctr_ovf_mask)
>  {
> -	int idx = 0;
> +	int i, idx = 0;
>  	struct perf_event *event;
>  	unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
>  	u64 max_period, init_val = 0;
> @@ -863,7 +866,7 @@ static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_
>  			hwc = &event->hw;
>  			max_period = riscv_pmu_ctr_get_width_mask(event);
>  			init_val = local64_read(&hwc->prev_count) & max_period;
> -			sdata->ctr_values[idx] = init_val;
> +			cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
>  		}
>  		/*
>  		 * We do not need to update the non-overflow counters the previous
> @@ -871,10 +874,14 @@ static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_
>  		 */
>  	}
>  
> -	for (idx = 0; idx < BITS_TO_LONGS(RISCV_MAX_COUNTERS); idx++) {
> +	for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
> +		/* Restore the counter values to relative indices for used hw counters */
> +		for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
> +			sdata->ctr_values[idx] =
> +					cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
>  		/* Start all the counters in a single shot */
>  		sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
> -			  cpu_hw_evt->used_hw_ctrs[idx], flag, 0, 0, 0);
> +			  cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
>  	}
>  }
>  
> @@ -898,7 +905,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
>  	int lidx, hidx, fidx;
>  	struct riscv_pmu *pmu;
>  	struct perf_event *event;
> -	unsigned long overflow;
> +	u64 overflow;
>  	u64 overflowed_ctrs = 0;
>  	struct cpu_hw_events *cpu_hw_evt = dev;
>  	u64 start_clock = sched_clock();
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index c3fa90970042..701974639ff2 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -45,6 +45,8 @@ struct cpu_hw_events {
>  	phys_addr_t snapshot_addr_phys;
>  	/* Boolean flag to indicate setup is already done */
>  	bool snapshot_set_done;
> +	/* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */
> +	u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS];
>  };
>  
>  struct riscv_pmu {
diff mbox series

Patch

diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 2694110f1cff..98aaeb13e9db 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -588,6 +588,7 @@  static int pmu_sbi_snapshot_setup(struct riscv_pmu *pmu, int cpu)
 		return sbi_err_map_linux_errno(ret.error);
 	}
 
+	memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
 	cpu_hw_evt->snapshot_set_done = true;
 
 	return 0;
@@ -605,7 +606,7 @@  static u64 pmu_sbi_ctr_read(struct perf_event *event)
 	union sbi_pmu_ctr_info info = pmu_ctr_list[idx];
 
 	/* Read the value from the shared memory directly only if counter is stopped */
-	if (sbi_pmu_snapshot_available() & (hwc->state & PERF_HES_STOPPED)) {
+	if (sbi_pmu_snapshot_available() && (hwc->state & PERF_HES_STOPPED)) {
 		val = sdata->ctr_values[idx];
 		return val;
 	}
@@ -769,36 +770,38 @@  static inline void pmu_sbi_stop_hw_ctrs(struct riscv_pmu *pmu)
 	struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
 	struct riscv_pmu_snapshot_data *sdata = cpu_hw_evt->snapshot_addr;
 	unsigned long flag = 0;
-	int i;
+	int i, idx;
 	struct sbiret ret;
-	unsigned long temp_ctr_values[64] = {0};
-	unsigned long ctr_val, temp_ctr_overflow_mask = 0;
+	u64 temp_ctr_overflow_mask = 0;
 
 	if (sbi_pmu_snapshot_available())
 		flag = SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT;
 
+	/* Reset the shadow copy to avoid save/restore any value from previous overflow */
+	memset(cpu_hw_evt->snapshot_cval_shcopy, 0, sizeof(u64) * RISCV_MAX_COUNTERS);
+
 	for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
 		/* No need to check the error here as we can't do anything about the error */
 		ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, i * BITS_PER_LONG,
 				cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
 		if (!ret.error && sbi_pmu_snapshot_available()) {
 			/* Save the counter values to avoid clobbering */
-			temp_ctr_values[i * BITS_PER_LONG + i] = sdata->ctr_values[i];
-			/* Save the overflow mask to avoid clobbering */
-			if (BIT(i) & sdata->ctr_overflow_mask)
-				temp_ctr_overflow_mask |= BIT(i + i * BITS_PER_LONG);
+			for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG) {
+				cpu_hw_evt->snapshot_cval_shcopy[i * BITS_PER_LONG + idx] =
+							sdata->ctr_values[idx];
+				/* Save the overflow mask to avoid clobbering */
+				if (BIT(idx) & sdata->ctr_overflow_mask)
+					temp_ctr_overflow_mask |= BIT(idx + i * BITS_PER_LONG);
+			}
 		}
 	}
 
-	/* Restore the counter values to the shared memory */
+	/* Restore the counter values to the shared memory for used hw counters */
 	if (sbi_pmu_snapshot_available()) {
-		for (i = 0; i < 64; i++) {
-			ctr_val = temp_ctr_values[i];
-			if (ctr_val)
-				sdata->ctr_values[i] = ctr_val;
-			if (temp_ctr_overflow_mask)
-				sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
-		}
+		for_each_set_bit(idx, cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS)
+			sdata->ctr_values[idx] = cpu_hw_evt->snapshot_cval_shcopy[idx];
+		if (temp_ctr_overflow_mask)
+			sdata->ctr_overflow_mask = temp_ctr_overflow_mask;
 	}
 }
 
@@ -850,7 +853,7 @@  static inline void pmu_sbi_start_ovf_ctrs_sbi(struct cpu_hw_events *cpu_hw_evt,
 static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_evt,
 						   u64 ctr_ovf_mask)
 {
-	int idx = 0;
+	int i, idx = 0;
 	struct perf_event *event;
 	unsigned long flag = SBI_PMU_START_FLAG_INIT_SNAPSHOT;
 	u64 max_period, init_val = 0;
@@ -863,7 +866,7 @@  static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_
 			hwc = &event->hw;
 			max_period = riscv_pmu_ctr_get_width_mask(event);
 			init_val = local64_read(&hwc->prev_count) & max_period;
-			sdata->ctr_values[idx] = init_val;
+			cpu_hw_evt->snapshot_cval_shcopy[idx] = init_val;
 		}
 		/*
 		 * We do not need to update the non-overflow counters the previous
@@ -871,10 +874,14 @@  static inline void pmu_sbi_start_ovf_ctrs_snapshot(struct cpu_hw_events *cpu_hw_
 		 */
 	}
 
-	for (idx = 0; idx < BITS_TO_LONGS(RISCV_MAX_COUNTERS); idx++) {
+	for (i = 0; i < BITS_TO_LONGS(RISCV_MAX_COUNTERS); i++) {
+		/* Restore the counter values to relative indices for used hw counters */
+		for_each_set_bit(idx, &cpu_hw_evt->used_hw_ctrs[i], BITS_PER_LONG)
+			sdata->ctr_values[idx] =
+					cpu_hw_evt->snapshot_cval_shcopy[idx + i * BITS_PER_LONG];
 		/* Start all the counters in a single shot */
 		sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx * BITS_PER_LONG,
-			  cpu_hw_evt->used_hw_ctrs[idx], flag, 0, 0, 0);
+			  cpu_hw_evt->used_hw_ctrs[i], flag, 0, 0, 0);
 	}
 }
 
@@ -898,7 +905,7 @@  static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
 	int lidx, hidx, fidx;
 	struct riscv_pmu *pmu;
 	struct perf_event *event;
-	unsigned long overflow;
+	u64 overflow;
 	u64 overflowed_ctrs = 0;
 	struct cpu_hw_events *cpu_hw_evt = dev;
 	u64 start_clock = sched_clock();
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index c3fa90970042..701974639ff2 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -45,6 +45,8 @@  struct cpu_hw_events {
 	phys_addr_t snapshot_addr_phys;
 	/* Boolean flag to indicate setup is already done */
 	bool snapshot_set_done;
+	/* A shadow copy of the counter values to avoid clobbering during multiple SBI calls */
+	u64 snapshot_cval_shcopy[RISCV_MAX_COUNTERS];
 };
 
 struct riscv_pmu {