Message ID | 20230703124647.215952-6-alexghiti@rivosinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: Allow userspace to directly access perf counters | expand |
On Mon, Jul 3, 2023 at 5:51 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > Provide all the necessary bits in the generic riscv pmu driver to be > able to mmap perf events in userspace: the heavy lifting lies in the > driver backend, namely the legacy and sbi implementations. > > Note that arch_perf_update_userpage is almost a copy of arm64 code. > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > --- > drivers/perf/riscv_pmu.c | 105 +++++++++++++++++++++++++++++++++ > include/linux/perf/riscv_pmu.h | 4 ++ > 2 files changed, 109 insertions(+) > > diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c > index ebca5eab9c9b..432ad2e80ce3 100644 > --- a/drivers/perf/riscv_pmu.c > +++ b/drivers/perf/riscv_pmu.c > @@ -14,9 +14,73 @@ > #include <linux/perf/riscv_pmu.h> > #include <linux/printk.h> > #include <linux/smp.h> > +#include <linux/sched_clock.h> > > #include <asm/sbi.h> > > +static bool riscv_perf_user_access(struct perf_event *event) > +{ > + return ((event->attr.type == PERF_TYPE_HARDWARE) || > + (event->attr.type == PERF_TYPE_HW_CACHE) || > + (event->attr.type == PERF_TYPE_RAW)) && > + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); > +} > + > +void arch_perf_update_userpage(struct perf_event *event, > + struct perf_event_mmap_page *userpg, u64 now) > +{ > + struct clock_read_data *rd; > + unsigned int seq; > + u64 ns; > + > + userpg->cap_user_time = 0; > + userpg->cap_user_time_zero = 0; > + userpg->cap_user_time_short = 0; > + userpg->cap_user_rdpmc = riscv_perf_user_access(event); > + > + userpg->pmc_width = 64; > + > + do { > + rd = sched_clock_read_begin(&seq); > + > + userpg->time_mult = rd->mult; > + userpg->time_shift = rd->shift; > + userpg->time_zero = rd->epoch_ns; > + userpg->time_cycles = rd->epoch_cyc; > + userpg->time_mask = rd->sched_clock_mask; > + > + /* > + * Subtract the cycle base, such that software that > + * doesn't know about cap_user_time_short still 'works' > + * assuming no wraps. > + */ > + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); > + userpg->time_zero -= ns; > + > + } while (sched_clock_read_retry(seq)); > + > + userpg->time_offset = userpg->time_zero - now; > + > + /* > + * time_shift is not expected to be greater than 31 due to > + * the original published conversion algorithm shifting a > + * 32-bit value (now specifies a 64-bit value) - refer > + * perf_event_mmap_page documentation in perf_event.h. > + */ > + if (userpg->time_shift == 32) { > + userpg->time_shift = 31; > + userpg->time_mult >>= 1; > + } > + > + /* > + * Internal timekeeping for enabled/running/stopped times > + * is always computed with the sched_clock. > + */ > + userpg->cap_user_time = 1; > + userpg->cap_user_time_zero = 1; > + userpg->cap_user_time_short = 1; > +} > + > static unsigned long csr_read_num(int csr_num) > { > #define switchcase_csr_read(__csr_num, __val) {\ > @@ -171,6 +235,8 @@ int riscv_pmu_event_set_period(struct perf_event *event) > > local64_set(&hwc->prev_count, (u64)-left); > > + perf_event_update_userpage(event); > + > return overflow; > } > > @@ -267,6 +333,9 @@ static int riscv_pmu_event_init(struct perf_event *event) > hwc->idx = -1; > hwc->event_base = mapped_event; > > + if (rvpmu->event_init) > + rvpmu->event_init(event); > + > if (!is_sampling_event(event)) { > /* > * For non-sampling runs, limit the sample_period to half > @@ -283,6 +352,39 @@ static int riscv_pmu_event_init(struct perf_event *event) > return 0; > } > > +static int riscv_pmu_event_idx(struct perf_event *event) > +{ > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); > + > + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) > + return 0; > + > + if (rvpmu->csr_index) > + return rvpmu->csr_index(event) + 1; > + > + return 0; > +} > + > +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) > +{ > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); > + > + if (rvpmu->event_mapped) { > + rvpmu->event_mapped(event, mm); > + perf_event_update_userpage(event); > + } > +} > + > +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) > +{ > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); > + > + if (rvpmu->event_unmapped) { > + rvpmu->event_unmapped(event, mm); > + perf_event_update_userpage(event); > + } > +} > + > struct riscv_pmu *riscv_pmu_alloc(void) > { > struct riscv_pmu *pmu; > @@ -307,6 +409,9 @@ struct riscv_pmu *riscv_pmu_alloc(void) > } > pmu->pmu = (struct pmu) { > .event_init = riscv_pmu_event_init, > + .event_mapped = riscv_pmu_event_mapped, > + .event_unmapped = riscv_pmu_event_unmapped, > + .event_idx = riscv_pmu_event_idx, > .add = riscv_pmu_add, > .del = riscv_pmu_del, > .start = riscv_pmu_start, > diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h > index 5deeea0be7cb..43282e22ebe1 100644 > --- a/include/linux/perf/riscv_pmu.h > +++ b/include/linux/perf/riscv_pmu.h > @@ -55,6 +55,10 @@ struct riscv_pmu { > void (*ctr_start)(struct perf_event *event, u64 init_val); > void (*ctr_stop)(struct perf_event *event, unsigned long flag); > int (*event_map)(struct perf_event *event, u64 *config); > + void (*event_init)(struct perf_event *event); > + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm); > + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm); > + uint8_t (*csr_index)(struct perf_event *event); > > struct cpu_hw_events __percpu *hw_events; > struct hlist_node node; > -- > 2.39.2 > Reviewed-by: Atish Patra <atishp@rivosinc.com>
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index ebca5eab9c9b..432ad2e80ce3 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -14,9 +14,73 @@ #include <linux/perf/riscv_pmu.h> #include <linux/printk.h> #include <linux/smp.h> +#include <linux/sched_clock.h> #include <asm/sbi.h> +static bool riscv_perf_user_access(struct perf_event *event) +{ + return ((event->attr.type == PERF_TYPE_HARDWARE) || + (event->attr.type == PERF_TYPE_HW_CACHE) || + (event->attr.type == PERF_TYPE_RAW)) && + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); +} + +void arch_perf_update_userpage(struct perf_event *event, + struct perf_event_mmap_page *userpg, u64 now) +{ + struct clock_read_data *rd; + unsigned int seq; + u64 ns; + + userpg->cap_user_time = 0; + userpg->cap_user_time_zero = 0; + userpg->cap_user_time_short = 0; + userpg->cap_user_rdpmc = riscv_perf_user_access(event); + + userpg->pmc_width = 64; + + do { + rd = sched_clock_read_begin(&seq); + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + userpg->time_cycles = rd->epoch_cyc; + userpg->time_mask = rd->sched_clock_mask; + + /* + * Subtract the cycle base, such that software that + * doesn't know about cap_user_time_short still 'works' + * assuming no wraps. + */ + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); + userpg->time_zero -= ns; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; + + /* + * time_shift is not expected to be greater than 31 due to + * the original published conversion algorithm shifting a + * 32-bit value (now specifies a 64-bit value) - refer + * perf_event_mmap_page documentation in perf_event.h. + */ + if (userpg->time_shift == 32) { + userpg->time_shift = 31; + userpg->time_mult >>= 1; + } + + /* + * Internal timekeeping for enabled/running/stopped times + * is always computed with the sched_clock. + */ + userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; + userpg->cap_user_time_short = 1; +} + static unsigned long csr_read_num(int csr_num) { #define switchcase_csr_read(__csr_num, __val) {\ @@ -171,6 +235,8 @@ int riscv_pmu_event_set_period(struct perf_event *event) local64_set(&hwc->prev_count, (u64)-left); + perf_event_update_userpage(event); + return overflow; } @@ -267,6 +333,9 @@ static int riscv_pmu_event_init(struct perf_event *event) hwc->idx = -1; hwc->event_base = mapped_event; + if (rvpmu->event_init) + rvpmu->event_init(event); + if (!is_sampling_event(event)) { /* * For non-sampling runs, limit the sample_period to half @@ -283,6 +352,39 @@ static int riscv_pmu_event_init(struct perf_event *event) return 0; } +static int riscv_pmu_event_idx(struct perf_event *event) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) + return 0; + + if (rvpmu->csr_index) + return rvpmu->csr_index(event) + 1; + + return 0; +} + +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + if (rvpmu->event_mapped) { + rvpmu->event_mapped(event, mm); + perf_event_update_userpage(event); + } +} + +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) +{ + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); + + if (rvpmu->event_unmapped) { + rvpmu->event_unmapped(event, mm); + perf_event_update_userpage(event); + } +} + struct riscv_pmu *riscv_pmu_alloc(void) { struct riscv_pmu *pmu; @@ -307,6 +409,9 @@ struct riscv_pmu *riscv_pmu_alloc(void) } pmu->pmu = (struct pmu) { .event_init = riscv_pmu_event_init, + .event_mapped = riscv_pmu_event_mapped, + .event_unmapped = riscv_pmu_event_unmapped, + .event_idx = riscv_pmu_event_idx, .add = riscv_pmu_add, .del = riscv_pmu_del, .start = riscv_pmu_start, diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 5deeea0be7cb..43282e22ebe1 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -55,6 +55,10 @@ struct riscv_pmu { void (*ctr_start)(struct perf_event *event, u64 init_val); void (*ctr_stop)(struct perf_event *event, unsigned long flag); int (*event_map)(struct perf_event *event, u64 *config); + void (*event_init)(struct perf_event *event); + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm); + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm); + uint8_t (*csr_index)(struct perf_event *event); struct cpu_hw_events __percpu *hw_events; struct hlist_node node;