Message ID | 20250127222031.3078945-5-coltonlewis@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | PMU partitioning driver support | expand |
On 27/01/2025 22:20, Colton Lewis wrote: > If the PMU is partitioned, keep the driver out of the guest counter > partition and only use the host counter partition. Partitioning is > defined by the MDCR_EL2.HPMN register field and saved in > cpu_pmu->hpmn. The range 0..HPMN-1 is accessible by EL1 and EL0 while > HPMN..PMCR.N is reserved for EL2. > > Define some macros that take HPMN as an argument and construct > mutually exclusive bitmaps for testing which partition a particular > counter is in. Note that despite their different position in the > bitmap, the cycle and instruction counters are always in the guest > partition. > > Signed-off-by: Colton Lewis <coltonlewis@google.com> > --- > drivers/perf/arm_pmuv3.c | 72 +++++++++++++++++++++++++++++----- > include/linux/perf/arm_pmuv3.h | 8 ++++ > 2 files changed, 70 insertions(+), 10 deletions(-) > > diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c > index 55f9ae560715..c61845fad9d9 100644 > --- a/drivers/perf/arm_pmuv3.c > +++ b/drivers/perf/arm_pmuv3.c > @@ -754,15 +754,19 @@ static void armv8pmu_disable_event_irq(struct perf_event *event) > armv8pmu_disable_intens(BIT(event->hw.idx)); > } > > -static u64 armv8pmu_getreset_flags(void) > +static u64 armv8pmu_getreset_flags(struct arm_pmu *cpu_pmu) > { > u64 value; > > /* Read */ > value = read_pmovsclr(); > > + if (cpu_pmu->partitioned) > + value &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn); > + else > + value &= ARMV8_PMU_OVERFLOWED_MASK; > + > /* Write to clear flags */ > - value &= ARMV8_PMU_OVERFLOWED_MASK; > write_pmovsclr(value); > > return value; > @@ -789,6 +793,18 @@ static void armv8pmu_disable_user_access(void) > update_pmuserenr(0); > } > > +static bool armv8pmu_is_guest_part(struct arm_pmu *cpu_pmu, u8 idx) > +{ > + return cpu_pmu->partitioned && > + (BIT(idx) & ARMV8_PMU_GUEST_CNT_PART(cpu_pmu->hpmn)); > +} > + > +static bool armv8pmu_is_host_part(struct arm_pmu *cpu_pmu, u8 idx) > +{ > + return !cpu_pmu->partitioned || > + (BIT(idx) & ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn)); > +} > + > static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) > { > int i; > @@ -797,6 +813,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) > if (is_pmuv3p9(cpu_pmu->pmuver)) { > u64 mask = 0; > for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { > + if (armv8pmu_is_guest_part(cpu_pmu, i)) > + continue; > if (armv8pmu_event_has_user_read(cpuc->events[i])) > mask |= BIT(i); > } > @@ -805,6 +823,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) > /* Clear any unused counters to avoid leaking their contents */ > for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, > ARMPMU_MAX_HWEVENTS) { > + if (armv8pmu_is_guest_part(cpu_pmu, i)) > + continue; > if (i == ARMV8_PMU_CYCLE_IDX) > write_pmccntr(0); > else if (i == ARMV8_PMU_INSTR_IDX) > @@ -850,7 +870,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) > armv8pmu_disable_user_access(); > > /* Enable all counters */ > - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); > + if (cpu_pmu->partitioned) > + armv8pmu_mdcr_write(armv8pmu_mdcr_read() | ARMV8_PMU_MDCR_HPME); > + else > + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); > > kvm_vcpu_pmu_resync_el0(); > } > @@ -858,7 +881,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) > static void armv8pmu_stop(struct arm_pmu *cpu_pmu) > { > /* Disable all counters */ > - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); > + if (cpu_pmu->partitioned) > + armv8pmu_mdcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_MDCR_HPME); typo: s/armv8pmu_pmcr_read/armv8pmu_mdcr_read Suzuki > + else > + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); > } > > static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) > @@ -872,7 +898,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) > /* > * Get and reset the IRQ flags > */ > - pmovsr = armv8pmu_getreset_flags(); > + pmovsr = armv8pmu_getreset_flags(cpu_pmu); > > /* > * Did an overflow occur? > @@ -930,6 +956,8 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, > int idx; > > for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { > + if (armv8pmu_is_guest_part(cpu_pmu, idx)) > + continue; > if (!test_and_set_bit(idx, cpuc->used_mask)) > return idx; > } > @@ -946,6 +974,8 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, > * the lower idx must be even. > */ > for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { > + if (armv8pmu_is_guest_part(cpu_pmu, idx)) > + continue; > if (!(idx & 0x1)) > continue; > if (!test_and_set_bit(idx, cpuc->used_mask)) { > @@ -968,6 +998,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, > > /* Always prefer to place a cycle counter into the cycle counter. */ > if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && > + !cpu_pmu->partitioned && > !armv8pmu_event_get_threshold(&event->attr)) { > if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) > return ARMV8_PMU_CYCLE_IDX; > @@ -983,6 +1014,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, > * may not know how to handle it. > */ > if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) && > + !cpu_pmu->partitioned && > !armv8pmu_event_get_threshold(&event->attr) && > test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) && > !armv8pmu_event_want_user_access(event)) { > @@ -994,7 +1026,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, > * Otherwise use events counters > */ > if (armv8pmu_event_is_chained(event)) > - return armv8pmu_get_chain_idx(cpuc, cpu_pmu); > + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); > else > return armv8pmu_get_single_idx(cpuc, cpu_pmu); > } > @@ -1086,6 +1118,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, > return 0; > } > > +static void armv8pmu_reset_host_counters(struct arm_pmu *cpu_pmu) > +{ > + int idx; > + > + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) > + if (armv8pmu_is_host_part(cpu_pmu, idx)) > + armv8pmu_write_evcntr(idx, 0); > +} > + > static void armv8pmu_reset(void *info) > { > struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; > @@ -1093,8 +1134,10 @@ static void armv8pmu_reset(void *info) > > bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS); > > - if (cpu_pmu->partitioned) > + if (cpu_pmu->partitioned) { > armv8pmu_partition(cpu_pmu->hpmn); > + mask &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn); > + } > > /* The counter and interrupt enable registers are unknown at reset. */ > armv8pmu_disable_counter(mask); > @@ -1103,11 +1146,20 @@ static void armv8pmu_reset(void *info) > /* Clear the counters we flip at guest entry/exit */ > kvm_clr_pmu_events(mask); > > + > + pmcr = ARMV8_PMU_PMCR_LC; > + > /* > - * Initialize & Reset PMNC. Request overflow interrupt for > - * 64 bit cycle counter but cheat in armv8pmu_write_counter(). > + * Initialize & Reset PMNC. Request overflow interrupt for 64 > + * bit cycle counter but cheat in armv8pmu_write_counter(). > + * > + * When partitioned, there is no single bit to reset only the > + * host counters. so reset them individually. > */ > - pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; > + if (cpu_pmu->partitioned) > + armv8pmu_reset_host_counters(cpu_pmu); > + else > + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C; > > /* Enable long event counter support where available */ > if (armv8pmu_has_long_event(cpu_pmu)) > diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h > index 115ee39f693a..5f8b143794ce 100644 > --- a/include/linux/perf/arm_pmuv3.h > +++ b/include/linux/perf/arm_pmuv3.h > @@ -247,6 +247,14 @@ > #define ARMV8_PMU_OVSR_F ARMV8_PMU_CNT_MASK_F > /* Mask for writable bits is both P and C fields */ > #define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_CNT_MASK_ALL > + > +/* Masks for guest and host counter partitions */ > +#define ARMV8_PMU_HPMN_CNT_MASK(N) GENMASK((N) - 1, 0) > +#define ARMV8_PMU_GUEST_CNT_PART(N) (ARMV8_PMU_HPMN_CNT_MASK(N) | \ > + ARMV8_PMU_CNT_MASK_C | \ > + ARMV8_PMU_CNT_MASK_F) > +#define ARMV8_PMU_HOST_CNT_PART(N) (ARMV8_PMU_CNT_MASK_ALL & \ > + ~ARMV8_PMU_GUEST_CNT_PART(N)) > /* > * PMXEVTYPER: Event selection reg > */
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c index 55f9ae560715..c61845fad9d9 100644 --- a/drivers/perf/arm_pmuv3.c +++ b/drivers/perf/arm_pmuv3.c @@ -754,15 +754,19 @@ static void armv8pmu_disable_event_irq(struct perf_event *event) armv8pmu_disable_intens(BIT(event->hw.idx)); } -static u64 armv8pmu_getreset_flags(void) +static u64 armv8pmu_getreset_flags(struct arm_pmu *cpu_pmu) { u64 value; /* Read */ value = read_pmovsclr(); + if (cpu_pmu->partitioned) + value &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn); + else + value &= ARMV8_PMU_OVERFLOWED_MASK; + /* Write to clear flags */ - value &= ARMV8_PMU_OVERFLOWED_MASK; write_pmovsclr(value); return value; @@ -789,6 +793,18 @@ static void armv8pmu_disable_user_access(void) update_pmuserenr(0); } +static bool armv8pmu_is_guest_part(struct arm_pmu *cpu_pmu, u8 idx) +{ + return cpu_pmu->partitioned && + (BIT(idx) & ARMV8_PMU_GUEST_CNT_PART(cpu_pmu->hpmn)); +} + +static bool armv8pmu_is_host_part(struct arm_pmu *cpu_pmu, u8 idx) +{ + return !cpu_pmu->partitioned || + (BIT(idx) & ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn)); +} + static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) { int i; @@ -797,6 +813,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) if (is_pmuv3p9(cpu_pmu->pmuver)) { u64 mask = 0; for_each_set_bit(i, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { + if (armv8pmu_is_guest_part(cpu_pmu, i)) + continue; if (armv8pmu_event_has_user_read(cpuc->events[i])) mask |= BIT(i); } @@ -805,6 +823,8 @@ static void armv8pmu_enable_user_access(struct arm_pmu *cpu_pmu) /* Clear any unused counters to avoid leaking their contents */ for_each_andnot_bit(i, cpu_pmu->cntr_mask, cpuc->used_mask, ARMPMU_MAX_HWEVENTS) { + if (armv8pmu_is_guest_part(cpu_pmu, i)) + continue; if (i == ARMV8_PMU_CYCLE_IDX) write_pmccntr(0); else if (i == ARMV8_PMU_INSTR_IDX) @@ -850,7 +870,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) armv8pmu_disable_user_access(); /* Enable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); + if (cpu_pmu->partitioned) + armv8pmu_mdcr_write(armv8pmu_mdcr_read() | ARMV8_PMU_MDCR_HPME); + else + armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E); kvm_vcpu_pmu_resync_el0(); } @@ -858,7 +881,10 @@ static void armv8pmu_start(struct arm_pmu *cpu_pmu) static void armv8pmu_stop(struct arm_pmu *cpu_pmu) { /* Disable all counters */ - armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); + if (cpu_pmu->partitioned) + armv8pmu_mdcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_MDCR_HPME); + else + armv8pmu_pmcr_write(armv8pmu_pmcr_read() & ~ARMV8_PMU_PMCR_E); } static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) @@ -872,7 +898,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) /* * Get and reset the IRQ flags */ - pmovsr = armv8pmu_getreset_flags(); + pmovsr = armv8pmu_getreset_flags(cpu_pmu); /* * Did an overflow occur? @@ -930,6 +956,8 @@ static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, int idx; for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { + if (armv8pmu_is_guest_part(cpu_pmu, idx)) + continue; if (!test_and_set_bit(idx, cpuc->used_mask)) return idx; } @@ -946,6 +974,8 @@ static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, * the lower idx must be even. */ for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) { + if (armv8pmu_is_guest_part(cpu_pmu, idx)) + continue; if (!(idx & 0x1)) continue; if (!test_and_set_bit(idx, cpuc->used_mask)) { @@ -968,6 +998,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, /* Always prefer to place a cycle counter into the cycle counter. */ if ((evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) && + !cpu_pmu->partitioned && !armv8pmu_event_get_threshold(&event->attr)) { if (!test_and_set_bit(ARMV8_PMU_CYCLE_IDX, cpuc->used_mask)) return ARMV8_PMU_CYCLE_IDX; @@ -983,6 +1014,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, * may not know how to handle it. */ if ((evtype == ARMV8_PMUV3_PERFCTR_INST_RETIRED) && + !cpu_pmu->partitioned && !armv8pmu_event_get_threshold(&event->attr) && test_bit(ARMV8_PMU_INSTR_IDX, cpu_pmu->cntr_mask) && !armv8pmu_event_want_user_access(event)) { @@ -994,7 +1026,7 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, * Otherwise use events counters */ if (armv8pmu_event_is_chained(event)) - return armv8pmu_get_chain_idx(cpuc, cpu_pmu); + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); else return armv8pmu_get_single_idx(cpuc, cpu_pmu); } @@ -1086,6 +1118,15 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } +static void armv8pmu_reset_host_counters(struct arm_pmu *cpu_pmu) +{ + int idx; + + for_each_set_bit(idx, cpu_pmu->cntr_mask, ARMV8_PMU_MAX_GENERAL_COUNTERS) + if (armv8pmu_is_host_part(cpu_pmu, idx)) + armv8pmu_write_evcntr(idx, 0); +} + static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1093,8 +1134,10 @@ static void armv8pmu_reset(void *info) bitmap_to_arr64(&mask, cpu_pmu->cntr_mask, ARMPMU_MAX_HWEVENTS); - if (cpu_pmu->partitioned) + if (cpu_pmu->partitioned) { armv8pmu_partition(cpu_pmu->hpmn); + mask &= ARMV8_PMU_HOST_CNT_PART(cpu_pmu->hpmn); + } /* The counter and interrupt enable registers are unknown at reset. */ armv8pmu_disable_counter(mask); @@ -1103,11 +1146,20 @@ static void armv8pmu_reset(void *info) /* Clear the counters we flip at guest entry/exit */ kvm_clr_pmu_events(mask); + + pmcr = ARMV8_PMU_PMCR_LC; + /* - * Initialize & Reset PMNC. Request overflow interrupt for - * 64 bit cycle counter but cheat in armv8pmu_write_counter(). + * Initialize & Reset PMNC. Request overflow interrupt for 64 + * bit cycle counter but cheat in armv8pmu_write_counter(). + * + * When partitioned, there is no single bit to reset only the + * host counters. so reset them individually. */ - pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_LC; + if (cpu_pmu->partitioned) + armv8pmu_reset_host_counters(cpu_pmu); + else + pmcr = ARMV8_PMU_PMCR_P | ARMV8_PMU_PMCR_C; /* Enable long event counter support where available */ if (armv8pmu_has_long_event(cpu_pmu)) diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h index 115ee39f693a..5f8b143794ce 100644 --- a/include/linux/perf/arm_pmuv3.h +++ b/include/linux/perf/arm_pmuv3.h @@ -247,6 +247,14 @@ #define ARMV8_PMU_OVSR_F ARMV8_PMU_CNT_MASK_F /* Mask for writable bits is both P and C fields */ #define ARMV8_PMU_OVERFLOWED_MASK ARMV8_PMU_CNT_MASK_ALL + +/* Masks for guest and host counter partitions */ +#define ARMV8_PMU_HPMN_CNT_MASK(N) GENMASK((N) - 1, 0) +#define ARMV8_PMU_GUEST_CNT_PART(N) (ARMV8_PMU_HPMN_CNT_MASK(N) | \ + ARMV8_PMU_CNT_MASK_C | \ + ARMV8_PMU_CNT_MASK_F) +#define ARMV8_PMU_HOST_CNT_PART(N) (ARMV8_PMU_CNT_MASK_ALL & \ + ~ARMV8_PMU_GUEST_CNT_PART(N)) /* * PMXEVTYPER: Event selection reg */
If the PMU is partitioned, keep the driver out of the guest counter partition and only use the host counter partition. Partitioning is defined by the MDCR_EL2.HPMN register field and saved in cpu_pmu->hpmn. The range 0..HPMN-1 is accessible by EL1 and EL0 while HPMN..PMCR.N is reserved for EL2. Define some macros that take HPMN as an argument and construct mutually exclusive bitmaps for testing which partition a particular counter is in. Note that despite their different position in the bitmap, the cycle and instruction counters are always in the guest partition. Signed-off-by: Colton Lewis <coltonlewis@google.com> --- drivers/perf/arm_pmuv3.c | 72 +++++++++++++++++++++++++++++----- include/linux/perf/arm_pmuv3.h | 8 ++++ 2 files changed, 70 insertions(+), 10 deletions(-)