Message ID | 20211225054647.1750577-8-atishp@rivosinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve RISC-V Perf support using SBI PMU and sscofpmf extension | expand |
Hi Atish, > > From: Atish Patra <atish.patra@wdc.com> > > The sscofpmf extension allows counter overflow and filtering for > programmable counters. Enable the perf driver to handle the overflow > interrupt. The overflow interrupt is a hart local interrupt. > Thus, per cpu overflow interrupts are setup as a child under the root > INTC irq domain. > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > Signed-off-by: Atish Patra <atishp@rivosinc.com> > --- > arch/riscv/include/asm/csr.h | 8 +- > arch/riscv/include/asm/hwcap.h | 1 + > arch/riscv/kernel/cpufeature.c | 1 + > drivers/perf/riscv_pmu_sbi.c | 218 +++++++++++++++++++++++++++++++-- > include/linux/perf/riscv_pmu.h | 2 + > 5 files changed, 222 insertions(+), 8 deletions(-) > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > index e4d369830af4..8518eb0014bc 100644 > --- a/arch/riscv/include/asm/csr.h > +++ b/arch/riscv/include/asm/csr.h > @@ -63,6 +63,7 @@ > #define IRQ_M_TIMER 7 > #define IRQ_S_EXT 9 > #define IRQ_M_EXT 11 > +#define IRQ_PMU_OVF 13 > > /* Exception causes */ > #define EXC_INST_MISALIGNED 0 > @@ -151,6 +152,8 @@ > #define CSR_HPMCOUNTER30H 0xc9e > #define CSR_HPMCOUNTER31H 0xc9f > > +#define CSR_SSCOUNTOVF 0xda0 > + > #define CSR_SSTATUS 0x100 > #define CSR_SIE 0x104 > #define CSR_STVEC 0x105 > @@ -212,7 +215,10 @@ > # define RV_IRQ_SOFT IRQ_S_SOFT > # define RV_IRQ_TIMER IRQ_S_TIMER > # define RV_IRQ_EXT IRQ_S_EXT > -#endif /* CONFIG_RISCV_M_MODE */ > +# define RV_IRQ_PMU IRQ_PMU_OVF > +# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF) > + > +#endif /* !CONFIG_RISCV_M_MODE */ > > /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ > #define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > index 368ab0f330c8..417e0840647a 100644 > --- a/arch/riscv/include/asm/hwcap.h > +++ b/arch/riscv/include/asm/hwcap.h > @@ -50,6 +50,7 @@ extern unsigned long elf_hwcap; > * available logical extension id. > */ > enum riscv_isa_ext_id { > + RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, > RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, > }; > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index c70eeec17f5b..3eedfb9ecd48 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -71,6 +71,7 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); > } > > static struct riscv_isa_ext_data isa_ext_arr[] = { > + __RISCV_ISA_EXT_DATA(sscofpmf, sscofpmf, RISCV_ISA_EXT_SSCOFPMF), > __RISCV_ISA_EXT_DATA("", "", RISCV_ISA_EXT_MAX), > }; > > diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c > index f078d423a89a..ff8692ac43a5 100644 > --- a/drivers/perf/riscv_pmu_sbi.c > +++ b/drivers/perf/riscv_pmu_sbi.c > @@ -11,8 +11,13 @@ > #include <linux/mod_devicetable.h> > #include <linux/perf/riscv_pmu.h> > #include <linux/platform_device.h> > +#include <linux/irq.h> > +#include <linux/irqdomain.h> > +#include <linux/of_irq.h> > +#include <linux/of.h> > > #include <asm/sbi.h> > +#include <asm/hwcap.h> > > union sbi_pmu_ctr_info { > unsigned long value; > @@ -33,6 +38,7 @@ union sbi_pmu_ctr_info { > * per_cpu in case of harts with different pmu counters > */ > static union sbi_pmu_ctr_info *pmu_ctr_list; > +static unsigned int riscv_pmu_irq; > > struct pmu_event_data { > union { > @@ -450,33 +456,223 @@ static int pmu_sbi_get_ctrinfo(int nctr) > return 0; > } > > +static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) > +{ > + int idx = 0; > + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); > + unsigned long cmask = 0; > + union sbi_pmu_ctr_info *info; > + > + /* We should only stop the used hardware counters */ > + for_each_set_bit(idx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) { > + info = &pmu_ctr_list[idx]; > + if (info->type != SBI_PMU_CTR_TYPE_FW) > + cmask |= (1 << idx); > + } > + /* No need to check the error here as we can't do anything about the error */ > + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, cmask, 0, 0, 0, 0); > +} > + > +/** > + * This function starts all the used counters in two step approach. > + * Any counter that did not overflow can be start in a single step > + * while the overflowed counters need to be started with updated initialization > + * value. > + */ > +static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, > + unsigned long ctr_ovf_mask) > +{ > + int idx = 0; > + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); > + struct perf_event *event; > + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; > + unsigned long ctr_start_mask = 0; > + uint64_t max_period; > + struct hw_perf_event *hwc; > + u64 init_val = 0; > + > + ctr_start_mask = cpu_hw_evt->used_event_ctrs[0] & ~ctr_ovf_mask; > + > + /* Start all the counters that did not overflow in a single shot */ > + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, > + 0, 0, 0, 0); > + > + /* Reinitialize and start all the counter that overflowed */ > + while (ctr_ovf_mask) { > + if (ctr_ovf_mask & 0x01) { > + event = cpu_hw_evt->events[idx]; > + hwc = &event->hw; > + max_period = riscv_pmu_ctr_get_width_mask(event); > + init_val = local64_read(&hwc->prev_count) & max_period; > + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, > + flag, init_val, 0, 0); > + } > + ctr_ovf_mask = ctr_ovf_mask >> 1; > + idx++; > + } > +} > + > +static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) > +{ > + struct perf_sample_data data; > + struct pt_regs *regs; > + struct hw_perf_event *hw_evt; > + union sbi_pmu_ctr_info *info; > + int lidx, hidx, fidx; > + struct riscv_pmu *pmu; > + struct perf_event *event; > + struct cpu_hw_events *cpu_hw_evt = dev; > + unsigned long overflow; > + unsigned long overflowed_ctrs = 0; > + > + fidx = find_first_bit(cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS); > + event = cpu_hw_evt->events[fidx]; > + if (!event) { > + csr_clear(CSR_SIP, SIP_LCOFIP); > + return IRQ_NONE; > + } > + > + pmu = to_riscv_pmu(event->pmu); > + pmu_sbi_stop_all(pmu); > + > + /* Overflow status register should only be read after counter are stopped */ > + overflow = csr_read(CSR_SSCOUNTOVF); > + > + /** > + * Overflow interrupt pending bit should only be cleared after stopping > + * all the counters to avoid any race condition. > + */ > + csr_clear(CSR_SIP, SIP_LCOFIP); > + > + /* No overflow bit is set */ > + if (!overflow) > + return IRQ_NONE; > + > + regs = get_irq_regs(); > + > + for_each_set_bit(lidx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) { > + struct perf_event *event = cpu_hw_evt->events[lidx]; > + > + /* Skip if invalid event or user did not request a sampling */ > + if (!event || !is_sampling_event(event)) > + continue; > + > + info = &pmu_ctr_list[lidx]; > + /* Firmware counter don't support overflow yet */ > + if (!info || info->type == SBI_PMU_CTR_TYPE_FW) > + continue; > + > + /* compute hardware counter index */ > + hidx = info->csr - CSR_CYCLE; > + /* check if the corresponding bit is set in sscountovf */ > + if (!(overflow & (1 << hidx))) > + continue; > + > + /* > + * Keep a track of overflowed counters so that they can be started > + * with updated initial value. > + */ > + overflowed_ctrs |= 1 << lidx; > + hw_evt = &event->hw; > + riscv_pmu_event_update(event); > + perf_sample_data_init(&data, 0, hw_evt->last_period); > + if (riscv_pmu_event_set_period(event)) { > + /* > + * Unlike other ISAs, RISC-V don't have to disable interrupts > + * to avoid throttling here. As per the specification, the > + * interrupt remains disabled until the OF bit is set. > + * Interrupts are enabled again only during the start. > + * TODO: We will need to stop the guest counters once > + * virtualization support is added. > + */ > + perf_event_overflow(event, &data, regs); > + } > + } > + pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); > + > + return IRQ_HANDLED; > +} > + > static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) > { > struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); > + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; > > /* Enable the access for TIME csr only from the user mode now */ > csr_write(CSR_SCOUNTEREN, 0x2); > > /* Stop all the counters so that they can be enabled from perf */ > - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, > - 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); > + pmu_sbi_stop_all(pmu); > + > + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { > + hw_events->irq = riscv_pmu_irq; As I understand hw_events points to a per_cpu data, I think we should use per_cpu(hw_events->irq, cpu) to store per cpu data. Thanks. Eric Lin > + csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); > + csr_set(CSR_IE, BIT(RV_IRQ_PMU)); > + enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); > + } > > return 0; > } > > static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) > { > + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { > + disable_percpu_irq(riscv_pmu_irq); > + csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); > + } > + > /* Disable all counters access for user mode now */ > csr_write(CSR_SCOUNTEREN, 0x0); > > return 0; > } > > +static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) > +{ > + int ret; > + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; > + struct device_node *cpu, *child; > + struct irq_domain *domain = NULL; > + > + if (!riscv_isa_extension_available(NULL, SSCOFPMF)) > + return -EOPNOTSUPP; > + > + for_each_of_cpu_node(cpu) { > + child = of_get_compatible_child(cpu, "riscv,cpu-intc"); > + if (!child) { > + pr_err("Failed to find INTC node\n"); > + return -ENODEV; > + } > + domain = irq_find_host(child); > + of_node_put(child); > + if (domain) > + break; > + } > + if (!domain) { > + pr_err("Failed to find INTC IRQ root domain\n"); > + return -ENODEV; > + } > + > + riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); > + if (!riscv_pmu_irq) { > + pr_err("Failed to map PMU interrupt for node\n"); > + return -ENODEV; > + } > + > + ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); > + if (ret) { > + pr_err("registering percpu irq failed [%d]\n", ret); > + return ret; > + } > + > + return 0; > +} > + > static int pmu_sbi_device_probe(struct platform_device *pdev) > { > struct riscv_pmu *pmu = NULL; > int num_counters; > - int ret; > + int ret = -ENODEV; > > pr_info("SBI PMU extension is available\n"); > /* Notify legacy implementation that SBI pmu is available*/ > @@ -488,13 +684,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) > num_counters = pmu_sbi_find_num_ctrs(); > if (num_counters < 0) { > pr_err("SBI PMU extension doesn't provide any counters\n"); > - return -ENODEV; > + goto out_free; > } > > /* cache all the information about counters now */ > if (pmu_sbi_get_ctrinfo(num_counters)) > - return -ENODEV; > + goto out_free; > > + ret = pmu_sbi_setup_irqs(pmu, pdev); > + if (ret < 0) { > + pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); > + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; > + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; > + } > pmu->num_counters = num_counters; > pmu->ctr_start = pmu_sbi_ctr_start; > pmu->ctr_stop = pmu_sbi_ctr_stop; > @@ -515,6 +717,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) > } > > return 0; > + > +out_free: > + kfree(pmu); > + return ret; > } > > static struct platform_driver pmu_sbi_driver = { > @@ -544,8 +750,6 @@ static int __init pmu_sbi_devinit(void) > } > > ret = platform_driver_register(&pmu_sbi_driver); > - if (ret) > - return ret; > > pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0); > if (IS_ERR(pdev)) { > diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h > index 52672de540c2..c7e2d689063a 100644 > --- a/include/linux/perf/riscv_pmu.h > +++ b/include/linux/perf/riscv_pmu.h > @@ -29,6 +29,8 @@ > struct cpu_hw_events { > /* currently enabled events */ > int n_events; > + /* Counter overflow interrupt */ > + int irq; > /* currently enabled events */ > struct perf_event *events[RISCV_MAX_COUNTERS]; > /* currently enabled counters */ > -- > 2.33.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Tue, Jan 4, 2022 at 11:44 PM Eric Lin <eric.lin@sifive.com> wrote: > > Hi Atish, > > > > > From: Atish Patra <atish.patra@wdc.com> > > > > The sscofpmf extension allows counter overflow and filtering for > > programmable counters. Enable the perf driver to handle the overflow > > interrupt. The overflow interrupt is a hart local interrupt. > > Thus, per cpu overflow interrupts are setup as a child under the root > > INTC irq domain. > > > > Signed-off-by: Atish Patra <atish.patra@wdc.com> > > Signed-off-by: Atish Patra <atishp@rivosinc.com> > > --- > > arch/riscv/include/asm/csr.h | 8 +- > > arch/riscv/include/asm/hwcap.h | 1 + > > arch/riscv/kernel/cpufeature.c | 1 + > > drivers/perf/riscv_pmu_sbi.c | 218 +++++++++++++++++++++++++++++++-- > > include/linux/perf/riscv_pmu.h | 2 + > > 5 files changed, 222 insertions(+), 8 deletions(-) > > > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > > index e4d369830af4..8518eb0014bc 100644 > > --- a/arch/riscv/include/asm/csr.h > > +++ b/arch/riscv/include/asm/csr.h > > @@ -63,6 +63,7 @@ > > #define IRQ_M_TIMER 7 > > #define IRQ_S_EXT 9 > > #define IRQ_M_EXT 11 > > +#define IRQ_PMU_OVF 13 > > > > /* Exception causes */ > > #define EXC_INST_MISALIGNED 0 > > @@ -151,6 +152,8 @@ > > #define CSR_HPMCOUNTER30H 0xc9e > > #define CSR_HPMCOUNTER31H 0xc9f > > > > +#define CSR_SSCOUNTOVF 0xda0 > > + > > #define CSR_SSTATUS 0x100 > > #define CSR_SIE 0x104 > > #define CSR_STVEC 0x105 > > @@ -212,7 +215,10 @@ > > # define RV_IRQ_SOFT IRQ_S_SOFT > > # define RV_IRQ_TIMER IRQ_S_TIMER > > # define RV_IRQ_EXT IRQ_S_EXT > > -#endif /* CONFIG_RISCV_M_MODE */ > > +# define RV_IRQ_PMU IRQ_PMU_OVF > > +# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF) > > + > > +#endif /* !CONFIG_RISCV_M_MODE */ > > > > /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ > > #define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) > > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > > index 368ab0f330c8..417e0840647a 100644 > > --- a/arch/riscv/include/asm/hwcap.h > > +++ b/arch/riscv/include/asm/hwcap.h > > @@ -50,6 +50,7 @@ extern unsigned long elf_hwcap; > > * available logical extension id. > > */ > > enum riscv_isa_ext_id { > > + RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, > > RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, > > }; > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > index c70eeec17f5b..3eedfb9ecd48 100644 > > --- a/arch/riscv/kernel/cpufeature.c > > +++ b/arch/riscv/kernel/cpufeature.c > > @@ -71,6 +71,7 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); > > } > > > > static struct riscv_isa_ext_data isa_ext_arr[] = { > > + __RISCV_ISA_EXT_DATA(sscofpmf, sscofpmf, RISCV_ISA_EXT_SSCOFPMF), > > __RISCV_ISA_EXT_DATA("", "", RISCV_ISA_EXT_MAX), > > }; > > > > diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c > > index f078d423a89a..ff8692ac43a5 100644 > > --- a/drivers/perf/riscv_pmu_sbi.c > > +++ b/drivers/perf/riscv_pmu_sbi.c > > @@ -11,8 +11,13 @@ > > #include <linux/mod_devicetable.h> > > #include <linux/perf/riscv_pmu.h> > > #include <linux/platform_device.h> > > +#include <linux/irq.h> > > +#include <linux/irqdomain.h> > > +#include <linux/of_irq.h> > > +#include <linux/of.h> > > > > #include <asm/sbi.h> > > +#include <asm/hwcap.h> > > > > union sbi_pmu_ctr_info { > > unsigned long value; > > @@ -33,6 +38,7 @@ union sbi_pmu_ctr_info { > > * per_cpu in case of harts with different pmu counters > > */ > > static union sbi_pmu_ctr_info *pmu_ctr_list; > > +static unsigned int riscv_pmu_irq; > > > > struct pmu_event_data { > > union { > > @@ -450,33 +456,223 @@ static int pmu_sbi_get_ctrinfo(int nctr) > > return 0; > > } > > > > +static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) > > +{ > > + int idx = 0; > > + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); > > + unsigned long cmask = 0; > > + union sbi_pmu_ctr_info *info; > > + > > + /* We should only stop the used hardware counters */ > > + for_each_set_bit(idx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) { > > + info = &pmu_ctr_list[idx]; > > + if (info->type != SBI_PMU_CTR_TYPE_FW) > > + cmask |= (1 << idx); > > + } > > + /* No need to check the error here as we can't do anything about the error */ > > + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, cmask, 0, 0, 0, 0); > > +} > > + > > +/** > > + * This function starts all the used counters in two step approach. > > + * Any counter that did not overflow can be start in a single step > > + * while the overflowed counters need to be started with updated initialization > > + * value. > > + */ > > +static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, > > + unsigned long ctr_ovf_mask) > > +{ > > + int idx = 0; > > + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); > > + struct perf_event *event; > > + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; > > + unsigned long ctr_start_mask = 0; > > + uint64_t max_period; > > + struct hw_perf_event *hwc; > > + u64 init_val = 0; > > + > > + ctr_start_mask = cpu_hw_evt->used_event_ctrs[0] & ~ctr_ovf_mask; > > + > > + /* Start all the counters that did not overflow in a single shot */ > > + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, > > + 0, 0, 0, 0); > > + > > + /* Reinitialize and start all the counter that overflowed */ > > + while (ctr_ovf_mask) { > > + if (ctr_ovf_mask & 0x01) { > > + event = cpu_hw_evt->events[idx]; > > + hwc = &event->hw; > > + max_period = riscv_pmu_ctr_get_width_mask(event); > > + init_val = local64_read(&hwc->prev_count) & max_period; > > + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, > > + flag, init_val, 0, 0); > > + } > > + ctr_ovf_mask = ctr_ovf_mask >> 1; > > + idx++; > > + } > > +} > > + > > +static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) > > +{ > > + struct perf_sample_data data; > > + struct pt_regs *regs; > > + struct hw_perf_event *hw_evt; > > + union sbi_pmu_ctr_info *info; > > + int lidx, hidx, fidx; > > + struct riscv_pmu *pmu; > > + struct perf_event *event; > > + struct cpu_hw_events *cpu_hw_evt = dev; > > + unsigned long overflow; > > + unsigned long overflowed_ctrs = 0; > > + > > + fidx = find_first_bit(cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS); > > + event = cpu_hw_evt->events[fidx]; > > + if (!event) { > > + csr_clear(CSR_SIP, SIP_LCOFIP); > > + return IRQ_NONE; > > + } > > + > > + pmu = to_riscv_pmu(event->pmu); > > + pmu_sbi_stop_all(pmu); > > + > > + /* Overflow status register should only be read after counter are stopped */ > > + overflow = csr_read(CSR_SSCOUNTOVF); > > + > > + /** > > + * Overflow interrupt pending bit should only be cleared after stopping > > + * all the counters to avoid any race condition. > > + */ > > + csr_clear(CSR_SIP, SIP_LCOFIP); > > + > > + /* No overflow bit is set */ > > + if (!overflow) > > + return IRQ_NONE; > > + > > + regs = get_irq_regs(); > > + > > + for_each_set_bit(lidx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) { > > + struct perf_event *event = cpu_hw_evt->events[lidx]; > > + > > + /* Skip if invalid event or user did not request a sampling */ > > + if (!event || !is_sampling_event(event)) > > + continue; > > + > > + info = &pmu_ctr_list[lidx]; > > + /* Firmware counter don't support overflow yet */ > > + if (!info || info->type == SBI_PMU_CTR_TYPE_FW) > > + continue; > > + > > + /* compute hardware counter index */ > > + hidx = info->csr - CSR_CYCLE; > > + /* check if the corresponding bit is set in sscountovf */ > > + if (!(overflow & (1 << hidx))) > > + continue; > > + > > + /* > > + * Keep a track of overflowed counters so that they can be started > > + * with updated initial value. > > + */ > > + overflowed_ctrs |= 1 << lidx; > > + hw_evt = &event->hw; > > + riscv_pmu_event_update(event); > > + perf_sample_data_init(&data, 0, hw_evt->last_period); > > + if (riscv_pmu_event_set_period(event)) { > > + /* > > + * Unlike other ISAs, RISC-V don't have to disable interrupts > > + * to avoid throttling here. As per the specification, the > > + * interrupt remains disabled until the OF bit is set. > > + * Interrupts are enabled again only during the start. > > + * TODO: We will need to stop the guest counters once > > + * virtualization support is added. > > + */ > > + perf_event_overflow(event, &data, regs); > > + } > > + } > > + pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); > > + > > + return IRQ_HANDLED; > > +} > > + > > static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) > > { > > struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); > > + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; > > > > /* Enable the access for TIME csr only from the user mode now */ > > csr_write(CSR_SCOUNTEREN, 0x2); > > > > /* Stop all the counters so that they can be enabled from perf */ > > - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, > > - 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); > > + pmu_sbi_stop_all(pmu); > > + > > + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { > > + hw_events->irq = riscv_pmu_irq; > > As I understand hw_events points to a per_cpu data, I think we should > use per_cpu(hw_events->irq, cpu) to store per cpu data. > Thanks for catching it. Fixed it. > Thanks. > Eric Lin > > > + csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); > > + csr_set(CSR_IE, BIT(RV_IRQ_PMU)); > > + enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); > > + } > > > > return 0; > > } > > > > static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) > > { > > + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { > > + disable_percpu_irq(riscv_pmu_irq); > > + csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); > > + } > > + > > /* Disable all counters access for user mode now */ > > csr_write(CSR_SCOUNTEREN, 0x0); > > > > return 0; > > } > > > > +static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) > > +{ > > + int ret; > > + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; > > + struct device_node *cpu, *child; > > + struct irq_domain *domain = NULL; > > + > > + if (!riscv_isa_extension_available(NULL, SSCOFPMF)) > > + return -EOPNOTSUPP; > > + > > + for_each_of_cpu_node(cpu) { > > + child = of_get_compatible_child(cpu, "riscv,cpu-intc"); > > + if (!child) { > > + pr_err("Failed to find INTC node\n"); > > + return -ENODEV; > > + } > > + domain = irq_find_host(child); > > + of_node_put(child); > > + if (domain) > > + break; > > + } > > + if (!domain) { > > + pr_err("Failed to find INTC IRQ root domain\n"); > > + return -ENODEV; > > + } > > + > > + riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); > > + if (!riscv_pmu_irq) { > > + pr_err("Failed to map PMU interrupt for node\n"); > > + return -ENODEV; > > + } > > + > > + ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); > > + if (ret) { > > + pr_err("registering percpu irq failed [%d]\n", ret); > > + return ret; > > + } > > + > > + return 0; > > +} > > + > > static int pmu_sbi_device_probe(struct platform_device *pdev) > > { > > struct riscv_pmu *pmu = NULL; > > int num_counters; > > - int ret; > > + int ret = -ENODEV; > > > > pr_info("SBI PMU extension is available\n"); > > /* Notify legacy implementation that SBI pmu is available*/ > > @@ -488,13 +684,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) > > num_counters = pmu_sbi_find_num_ctrs(); > > if (num_counters < 0) { > > pr_err("SBI PMU extension doesn't provide any counters\n"); > > - return -ENODEV; > > + goto out_free; > > } > > > > /* cache all the information about counters now */ > > if (pmu_sbi_get_ctrinfo(num_counters)) > > - return -ENODEV; > > + goto out_free; > > > > + ret = pmu_sbi_setup_irqs(pmu, pdev); > > + if (ret < 0) { > > + pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); > > + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; > > + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; > > + } > > pmu->num_counters = num_counters; > > pmu->ctr_start = pmu_sbi_ctr_start; > > pmu->ctr_stop = pmu_sbi_ctr_stop; > > @@ -515,6 +717,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) > > } > > > > return 0; > > + > > +out_free: > > + kfree(pmu); > > + return ret; > > } > > > > static struct platform_driver pmu_sbi_driver = { > > @@ -544,8 +750,6 @@ static int __init pmu_sbi_devinit(void) > > } > > > > ret = platform_driver_register(&pmu_sbi_driver); > > - if (ret) > > - return ret; > > > > pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0); > > if (IS_ERR(pdev)) { > > diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h > > index 52672de540c2..c7e2d689063a 100644 > > --- a/include/linux/perf/riscv_pmu.h > > +++ b/include/linux/perf/riscv_pmu.h > > @@ -29,6 +29,8 @@ > > struct cpu_hw_events { > > /* currently enabled events */ > > int n_events; > > + /* Counter overflow interrupt */ > > + int irq; > > /* currently enabled events */ > > struct perf_event *events[RISCV_MAX_COUNTERS]; > > /* currently enabled counters */ > > -- > > 2.33.1 > > > > > > _______________________________________________ > > linux-riscv mailing list > > linux-riscv@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-riscv > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index e4d369830af4..8518eb0014bc 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -63,6 +63,7 @@ #define IRQ_M_TIMER 7 #define IRQ_S_EXT 9 #define IRQ_M_EXT 11 +#define IRQ_PMU_OVF 13 /* Exception causes */ #define EXC_INST_MISALIGNED 0 @@ -151,6 +152,8 @@ #define CSR_HPMCOUNTER30H 0xc9e #define CSR_HPMCOUNTER31H 0xc9f +#define CSR_SSCOUNTOVF 0xda0 + #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 #define CSR_STVEC 0x105 @@ -212,7 +215,10 @@ # define RV_IRQ_SOFT IRQ_S_SOFT # define RV_IRQ_TIMER IRQ_S_TIMER # define RV_IRQ_EXT IRQ_S_EXT -#endif /* CONFIG_RISCV_M_MODE */ +# define RV_IRQ_PMU IRQ_PMU_OVF +# define SIP_LCOFIP (_AC(0x1, UL) << IRQ_PMU_OVF) + +#endif /* !CONFIG_RISCV_M_MODE */ /* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */ #define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT) diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 368ab0f330c8..417e0840647a 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -50,6 +50,7 @@ extern unsigned long elf_hwcap; * available logical extension id. */ enum riscv_isa_ext_id { + RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE, RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c70eeec17f5b..3eedfb9ecd48 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -71,6 +71,7 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); } static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(sscofpmf, sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA("", "", RISCV_ISA_EXT_MAX), }; diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index f078d423a89a..ff8692ac43a5 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -11,8 +11,13 @@ #include <linux/mod_devicetable.h> #include <linux/perf/riscv_pmu.h> #include <linux/platform_device.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/of_irq.h> +#include <linux/of.h> #include <asm/sbi.h> +#include <asm/hwcap.h> union sbi_pmu_ctr_info { unsigned long value; @@ -33,6 +38,7 @@ union sbi_pmu_ctr_info { * per_cpu in case of harts with different pmu counters */ static union sbi_pmu_ctr_info *pmu_ctr_list; +static unsigned int riscv_pmu_irq; struct pmu_event_data { union { @@ -450,33 +456,223 @@ static int pmu_sbi_get_ctrinfo(int nctr) return 0; } +static inline void pmu_sbi_stop_all(struct riscv_pmu *pmu) +{ + int idx = 0; + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + unsigned long cmask = 0; + union sbi_pmu_ctr_info *info; + + /* We should only stop the used hardware counters */ + for_each_set_bit(idx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) { + info = &pmu_ctr_list[idx]; + if (info->type != SBI_PMU_CTR_TYPE_FW) + cmask |= (1 << idx); + } + /* No need to check the error here as we can't do anything about the error */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, 0, cmask, 0, 0, 0, 0); +} + +/** + * This function starts all the used counters in two step approach. + * Any counter that did not overflow can be start in a single step + * while the overflowed counters need to be started with updated initialization + * value. + */ +static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, + unsigned long ctr_ovf_mask) +{ + int idx = 0; + struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events); + struct perf_event *event; + unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; + unsigned long ctr_start_mask = 0; + uint64_t max_period; + struct hw_perf_event *hwc; + u64 init_val = 0; + + ctr_start_mask = cpu_hw_evt->used_event_ctrs[0] & ~ctr_ovf_mask; + + /* Start all the counters that did not overflow in a single shot */ + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, 0, ctr_start_mask, + 0, 0, 0, 0); + + /* Reinitialize and start all the counter that overflowed */ + while (ctr_ovf_mask) { + if (ctr_ovf_mask & 0x01) { + event = cpu_hw_evt->events[idx]; + hwc = &event->hw; + max_period = riscv_pmu_ctr_get_width_mask(event); + init_val = local64_read(&hwc->prev_count) & max_period; + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, + flag, init_val, 0, 0); + } + ctr_ovf_mask = ctr_ovf_mask >> 1; + idx++; + } +} + +static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) +{ + struct perf_sample_data data; + struct pt_regs *regs; + struct hw_perf_event *hw_evt; + union sbi_pmu_ctr_info *info; + int lidx, hidx, fidx; + struct riscv_pmu *pmu; + struct perf_event *event; + struct cpu_hw_events *cpu_hw_evt = dev; + unsigned long overflow; + unsigned long overflowed_ctrs = 0; + + fidx = find_first_bit(cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS); + event = cpu_hw_evt->events[fidx]; + if (!event) { + csr_clear(CSR_SIP, SIP_LCOFIP); + return IRQ_NONE; + } + + pmu = to_riscv_pmu(event->pmu); + pmu_sbi_stop_all(pmu); + + /* Overflow status register should only be read after counter are stopped */ + overflow = csr_read(CSR_SSCOUNTOVF); + + /** + * Overflow interrupt pending bit should only be cleared after stopping + * all the counters to avoid any race condition. + */ + csr_clear(CSR_SIP, SIP_LCOFIP); + + /* No overflow bit is set */ + if (!overflow) + return IRQ_NONE; + + regs = get_irq_regs(); + + for_each_set_bit(lidx, cpu_hw_evt->used_event_ctrs, RISCV_MAX_COUNTERS) { + struct perf_event *event = cpu_hw_evt->events[lidx]; + + /* Skip if invalid event or user did not request a sampling */ + if (!event || !is_sampling_event(event)) + continue; + + info = &pmu_ctr_list[lidx]; + /* Firmware counter don't support overflow yet */ + if (!info || info->type == SBI_PMU_CTR_TYPE_FW) + continue; + + /* compute hardware counter index */ + hidx = info->csr - CSR_CYCLE; + /* check if the corresponding bit is set in sscountovf */ + if (!(overflow & (1 << hidx))) + continue; + + /* + * Keep a track of overflowed counters so that they can be started + * with updated initial value. + */ + overflowed_ctrs |= 1 << lidx; + hw_evt = &event->hw; + riscv_pmu_event_update(event); + perf_sample_data_init(&data, 0, hw_evt->last_period); + if (riscv_pmu_event_set_period(event)) { + /* + * Unlike other ISAs, RISC-V don't have to disable interrupts + * to avoid throttling here. As per the specification, the + * interrupt remains disabled until the OF bit is set. + * Interrupts are enabled again only during the start. + * TODO: We will need to stop the guest counters once + * virtualization support is added. + */ + perf_event_overflow(event, &data, regs); + } + } + pmu_sbi_start_overflow_mask(pmu, overflowed_ctrs); + + return IRQ_HANDLED; +} + static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) { struct riscv_pmu *pmu = hlist_entry_safe(node, struct riscv_pmu, node); + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; /* Enable the access for TIME csr only from the user mode now */ csr_write(CSR_SCOUNTEREN, 0x2); /* Stop all the counters so that they can be enabled from perf */ - sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, - 0, GENMASK_ULL(pmu->num_counters - 1, 0), 0, 0, 0, 0); + pmu_sbi_stop_all(pmu); + + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + hw_events->irq = riscv_pmu_irq; + csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); + csr_set(CSR_IE, BIT(RV_IRQ_PMU)); + enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); + } return 0; } static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) { + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + disable_percpu_irq(riscv_pmu_irq); + csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); + } + /* Disable all counters access for user mode now */ csr_write(CSR_SCOUNTEREN, 0x0); return 0; } +static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pdev) +{ + int ret; + struct cpu_hw_events __percpu *hw_events = pmu->hw_events; + struct device_node *cpu, *child; + struct irq_domain *domain = NULL; + + if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + return -EOPNOTSUPP; + + for_each_of_cpu_node(cpu) { + child = of_get_compatible_child(cpu, "riscv,cpu-intc"); + if (!child) { + pr_err("Failed to find INTC node\n"); + return -ENODEV; + } + domain = irq_find_host(child); + of_node_put(child); + if (domain) + break; + } + if (!domain) { + pr_err("Failed to find INTC IRQ root domain\n"); + return -ENODEV; + } + + riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); + if (!riscv_pmu_irq) { + pr_err("Failed to map PMU interrupt for node\n"); + return -ENODEV; + } + + ret = request_percpu_irq(riscv_pmu_irq, pmu_sbi_ovf_handler, "riscv-pmu", hw_events); + if (ret) { + pr_err("registering percpu irq failed [%d]\n", ret); + return ret; + } + + return 0; +} + static int pmu_sbi_device_probe(struct platform_device *pdev) { struct riscv_pmu *pmu = NULL; int num_counters; - int ret; + int ret = -ENODEV; pr_info("SBI PMU extension is available\n"); /* Notify legacy implementation that SBI pmu is available*/ @@ -488,13 +684,19 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) num_counters = pmu_sbi_find_num_ctrs(); if (num_counters < 0) { pr_err("SBI PMU extension doesn't provide any counters\n"); - return -ENODEV; + goto out_free; } /* cache all the information about counters now */ if (pmu_sbi_get_ctrinfo(num_counters)) - return -ENODEV; + goto out_free; + ret = pmu_sbi_setup_irqs(pmu, pdev); + if (ret < 0) { + pr_info("Perf sampling/filtering is not supported as sscof extension is not available\n"); + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + pmu->pmu.capabilities |= PERF_PMU_CAP_NO_EXCLUDE; + } pmu->num_counters = num_counters; pmu->ctr_start = pmu_sbi_ctr_start; pmu->ctr_stop = pmu_sbi_ctr_stop; @@ -515,6 +717,10 @@ static int pmu_sbi_device_probe(struct platform_device *pdev) } return 0; + +out_free: + kfree(pmu); + return ret; } static struct platform_driver pmu_sbi_driver = { @@ -544,8 +750,6 @@ static int __init pmu_sbi_devinit(void) } ret = platform_driver_register(&pmu_sbi_driver); - if (ret) - return ret; pdev = platform_device_register_simple(RISCV_PMU_PDEV_NAME, -1, NULL, 0); if (IS_ERR(pdev)) { diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h index 52672de540c2..c7e2d689063a 100644 --- a/include/linux/perf/riscv_pmu.h +++ b/include/linux/perf/riscv_pmu.h @@ -29,6 +29,8 @@ struct cpu_hw_events { /* currently enabled events */ int n_events; + /* Counter overflow interrupt */ + int irq; /* currently enabled events */ struct perf_event *events[RISCV_MAX_COUNTERS]; /* currently enabled counters */