diff mbox series

[RFC,5/6] riscv: perf: Add driver for Control Transfer Records Ext.

Message ID 20240529185337.182722-6-rkanwal@rivosinc.com (mailing list archive)
State Changes Requested
Headers show
Series riscv: perf: Add support for Control Transfer Records Ext. | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Rajnesh Kanwal May 29, 2024, 6:53 p.m. UTC
This adds support for CTR Ext defined in [0]. The extension
allows to records a maximum for 256 last branch records.

CTR extension depends on s[m|s]csrind and Sscofpmf extensions.

Signed-off-by: Rajnesh Kanwal <rkanwal@rivosinc.com>
---
 MAINTAINERS                    |   1 +
 drivers/perf/Kconfig           |  11 +
 drivers/perf/Makefile          |   1 +
 drivers/perf/riscv_ctr.c       | 469 +++++++++++++++++++++++++++++++++
 include/linux/perf/riscv_pmu.h |  33 +++
 5 files changed, 515 insertions(+)
 create mode 100644 drivers/perf/riscv_ctr.c

Comments

Vincent Chen Aug. 27, 2024, 10:01 a.m. UTC | #1
> From: Rajnesh Kanwal <rkanwal@rivosinc.com>
> Date: Thu, May 30, 2024 at 2:56 AM
> Subject: [PATCH RFC 5/6] riscv: perf: Add driver for Control Transfer Records Ext.
> To: <linux-kernel@vger.kernel.org>
> Cc: <linux-perf-users@vger.kernel.org>, <linux-riscv@lists.infradead.org>, <adrian.hunter@intel.com>, <alexander.shishkin@linux.intel.com>, <ajones@ventanamicro.com>, <anup@brainfault.org>, <acme@kernel.org>, <atishp@rivosinc.com>, <beeman@rivosinc.com>, <brauner@kernel.org>, <conor@kernel.org>, <heiko@sntech.de>, <irogers@google.com>, <mingo@redhat.com>, <james.clark@arm.com>, <renyu.zj@linux.alibaba.com>, <jolsa@kernel.org>, <jisheng.teoh@starfivetech.com>, <palmer@dabbelt.com>, <tech-control-transfer-records@lists.riscv.org>, <will@kernel.org>, <kaiwenxue1@gmail.com>, Rajnesh Kanwal <rkanwal@rivosinc.com>
>
>
> This adds support for CTR Ext defined in [0]. The extension
> allows to records a maximum for 256 last branch records.
>
> CTR extension depends on s[m|s]csrind and Sscofpmf extensions.
>
> Signed-off-by: Rajnesh Kanwal <rkanwal@rivosinc.com>
> ---
>  MAINTAINERS                    |   1 +
>  drivers/perf/Kconfig           |  11 +
>  drivers/perf/Makefile          |   1 +
>  drivers/perf/riscv_ctr.c       | 469 +++++++++++++++++++++++++++++++++
>  include/linux/perf/riscv_pmu.h |  33 +++
>  5 files changed, 515 insertions(+)
>  create mode 100644 drivers/perf/riscv_ctr.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d6b42d5f62da..868e4b0808ab 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -19056,6 +19056,7 @@ M:      Atish Patra <atishp@atishpatra.org>
>  R:     Anup Patel <anup@brainfault.org>
>  L:     linux-riscv@lists.infradead.org
>  S:     Supported
> +F:     drivers/perf/riscv_ctr.c
>  F:     drivers/perf/riscv_pmu_common.c
>  F:     drivers/perf/riscv_pmu_dev.c
>  F:     drivers/perf/riscv_pmu_legacy.c
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index 3c37577b25f7..cca6598be739 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -110,6 +110,17 @@ config ANDES_CUSTOM_PMU
>
>           If you don't know what to do here, say "Y".
>
> +config RISCV_CTR
> +       bool "Enable support for Control Transfer Records (CTR)"
> +       depends on PERF_EVENTS && RISCV_PMU
> +       default y
> +       help
> +         Enable support for Control Transfer Records (CTR) which
> +         allows recording branches, Jumps, Calls, returns etc taken in an
> +         execution path. This also supports privilege based filtering. It
> +         captures additional relevant information such as cycle count,
> +         branch misprediction etc.
> +
>  config ARM_PMU_ACPI
>         depends on ARM_PMU && ACPI
>         def_bool y
> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
> index ba809cc069d5..364b1f66f410 100644
> --- a/drivers/perf/Makefile
> +++ b/drivers/perf/Makefile
> @@ -16,6 +16,7 @@ obj-$(CONFIG_RISCV_PMU_COMMON) += riscv_pmu_common.o
>  obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
>  obj-$(CONFIG_RISCV_PMU) += riscv_pmu_dev.o
>  obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
> +obj-$(CONFIG_RISCV_CTR) += riscv_ctr.o
>  obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>  obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
> diff --git a/drivers/perf/riscv_ctr.c b/drivers/perf/riscv_ctr.c
> new file mode 100644
> index 000000000000..95fda1edda4f
> --- /dev/null
> +++ b/drivers/perf/riscv_ctr.c
> @@ -0,0 +1,469 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Control transfer records extension Helpers.
> + *
> + * Copyright (C) 2024 Rivos Inc.
> + *
> + * Author: Rajnesh Kanwal <rkanwal@rivosinc.com>
> + */
> +
> +#define pr_fmt(fmt) "CTR: " fmt
> +
> +#include <linux/bitfield.h>
> +#include <linux/printk.h>
> +#include <linux/types.h>
> +#include <linux/perf_event.h>
> +#include <linux/perf/riscv_pmu.h>
> +#include <linux/cpufeature.h>
> +#include <asm/hwcap.h>
> +#include <asm/csr_ind.h>
> +#include <asm/csr.h>
> +
> +#define CTR_BRANCH_FILTERS_INH  (CTRCTL_EXCINH       | \
> +                                CTRCTL_INTRINH      | \
> +                                CTRCTL_TRETINH      | \
> +                                CTRCTL_TKBRINH      | \
> +                                CTRCTL_INDCALL_INH  | \
> +                                CTRCTL_DIRCALL_INH  | \
> +                                CTRCTL_INDJUMP_INH  | \
> +                                CTRCTL_DIRJUMP_INH  | \
> +                                CTRCTL_CORSWAP_INH  | \
> +                                CTRCTL_RET_INH      | \
> +                                CTRCTL_INDOJUMP_INH | \
> +                                CTRCTL_DIROJUMP_INH)
> +
> +#define CTR_BRANCH_ENABLE_BITS (CTRCTL_KERNEL_ENABLE | CTRCTL_U_ENABLE)
> +
> +/* Branch filters not-supported by CTR extension. */
> +#define CTR_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX                | \
> +                                   PERF_SAMPLE_BRANCH_IN_TX            | \
> +                                   PERF_SAMPLE_BRANCH_PRIV_SAVE        | \
> +                                   PERF_SAMPLE_BRANCH_NO_TX            | \
> +                                   PERF_SAMPLE_BRANCH_COUNTERS)
> +
> +/* Branch filters supported by CTR extension. */
> +#define CTR_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER            | \
> +                                   PERF_SAMPLE_BRANCH_KERNEL           | \
> +                                   PERF_SAMPLE_BRANCH_HV               | \
> +                                   PERF_SAMPLE_BRANCH_ANY              | \
> +                                   PERF_SAMPLE_BRANCH_ANY_CALL         | \
> +                                   PERF_SAMPLE_BRANCH_ANY_RETURN       | \
> +                                   PERF_SAMPLE_BRANCH_IND_CALL         | \
> +                                   PERF_SAMPLE_BRANCH_COND             | \
> +                                   PERF_SAMPLE_BRANCH_IND_JUMP         | \
> +                                   PERF_SAMPLE_BRANCH_HW_INDEX         | \
> +                                   PERF_SAMPLE_BRANCH_NO_FLAGS         | \
> +                                   PERF_SAMPLE_BRANCH_NO_CYCLES        | \
> +                                   PERF_SAMPLE_BRANCH_CALL_STACK       | \
> +                                   PERF_SAMPLE_BRANCH_CALL             | \
> +                                   PERF_SAMPLE_BRANCH_TYPE_SAVE)
> +
> +#define CTR_PERF_BRANCH_FILTERS    (CTR_ALLOWED_BRANCH_FILTERS         | \
> +                                   CTR_EXCLUDE_BRANCH_FILTERS)
> +
> +static u64 allowed_filters __read_mostly;
> +
> +struct ctr_regset {
> +       unsigned long src;
> +       unsigned long target;
> +       unsigned long ctr_data;
> +};
> +
> +static inline u64 get_ctr_src_reg(unsigned int ctr_idx)
> +{
> +       return csr_ind_read(CSR_IREG, CTR_ENTRIES_FIRST, ctr_idx);
> +}
> +
> +static inline u64 get_ctr_tgt_reg(unsigned int ctr_idx)
> +{
> +       return csr_ind_read(CSR_IREG2, CTR_ENTRIES_FIRST, ctr_idx);
> +}
> +
> +static inline u64 get_ctr_data_reg(unsigned int ctr_idx)
> +{
> +       return csr_ind_read(CSR_IREG3, CTR_ENTRIES_FIRST, ctr_idx);
> +}
> +
> +static inline bool ctr_record_valid(u64 ctr_src)
> +{
> +       return !!FIELD_GET(CTRSOURCE_VALID, ctr_src);
> +}
> +
> +static inline int ctr_get_mispredict(u64 ctr_target)
> +{
> +       return FIELD_GET(CTRTARGET_MISP, ctr_target);
> +}
> +
> +static inline unsigned int ctr_get_cycles(u64 ctr_data)
> +{
> +       const unsigned int cce = FIELD_GET(CTRDATA_CCE_MASK, ctr_data);
> +       const unsigned int ccm = FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
> +
> +       if (ctr_data & CTRDATA_CCV)
> +               return 0;
> +
> +       /* Formula to calculate cycles from spec: (2^12 + CCM) << CCE-1 */
> +       if (cce > 0)
> +               return (4096 + ccm) << (cce - 1);
> +
> +       return FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
> +}
> +
> +static inline unsigned int ctr_get_type(u64 ctr_data)
> +{
> +       return FIELD_GET(CTRDATA_TYPE_MASK, ctr_data);
> +}
> +
> +static inline unsigned int ctr_get_depth(u64 ctr_depth)
> +{
> +       /* Depth table from CTR Spec: 2.4 sctrdepth.
> +        *
> +        * sctrdepth.depth       Depth
> +        * 000                  - 16
> +        * 001                  - 32
> +        * 010                  - 64
> +        * 011                  - 128
> +        * 100                  - 256
> +        *
> +        * Depth = 16 * 2 ^ (ctrdepth.depth)
> +        * or
> +        * Depth = 16 << ctrdepth.depth.
> +        */
> +       return 16 << FIELD_GET(SCTRDEPTH_MASK, ctr_depth);
> +}
> +
> +/* Reads CTR entry at idx and stores it in entry struct. */
> +static bool capture_ctr_regset(struct ctr_regset *entry, unsigned int idx)
> +{
> +       entry->src = get_ctr_src_reg(idx);
> +
> +       if (!ctr_record_valid(entry->src))
> +               return false;
> +
> +       entry->src = entry->src & (~CTRSOURCE_VALID);
> +       entry->target = get_ctr_tgt_reg(idx);
> +       entry->ctr_data = get_ctr_data_reg(idx);
> +
> +       return true;
> +}
> +
> +static u64 branch_type_to_ctr(int branch_type)
> +{
> +       u64 config = CTR_BRANCH_FILTERS_INH | CTRCTL_LCOFIFRZ;
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_USER)
> +               config |= CTRCTL_U_ENABLE;
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
> +               config |= CTRCTL_KERNEL_ENABLE;
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_HV) {
> +               if (riscv_isa_extension_available(NULL, h))
> +                       config |= CTRCTL_KERNEL_ENABLE;
> +       }
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
> +               config &= ~CTR_BRANCH_FILTERS_INH;
> +               return config;
> +       }
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
> +               config &= ~CTRCTL_INDCALL_INH;
> +               config &= ~CTRCTL_DIRCALL_INH;
> +               config &= ~CTRCTL_EXCINH;
> +               config &= ~CTRCTL_INTRINH;
> +       }
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
> +               config &= ~(CTRCTL_RET_INH | CTRCTL_TRETINH);
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
> +               config &= ~CTRCTL_INDCALL_INH;
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_COND)
> +               config &= ~CTRCTL_TKBRINH;
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
> +               config &= ~(CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH |
> +                           CTRCTL_RET_INH);
> +               config |= CTRCTL_RASEMU;
> +       }
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) {
> +               config &= ~CTRCTL_INDJUMP_INH;
> +               config &= ~CTRCTL_INDOJUMP_INH;
> +       }
> +
> +       if (branch_type & PERF_SAMPLE_BRANCH_CALL)
> +               config &= ~CTRCTL_DIRCALL_INH;
> +
> +       return config;
> +}
> +
> +static const int ctr_perf_map[] = {
> +       [CTRDATA_TYPE_NONE]                     = PERF_BR_UNKNOWN,
> +       [CTRDATA_TYPE_EXCEPTION]                = PERF_BR_SYSCALL,
> +       [CTRDATA_TYPE_INTERRUPT]                = PERF_BR_IRQ,
> +       [CTRDATA_TYPE_TRAP_RET]                 = PERF_BR_ERET,
> +       [CTRDATA_TYPE_NONTAKEN_BRANCH]          = PERF_BR_COND,
> +       [CTRDATA_TYPE_TAKEN_BRANCH]             = PERF_BR_COND,
> +       [CTRDATA_TYPE_RESERVED_6]               = PERF_BR_UNKNOWN,
> +       [CTRDATA_TYPE_RESERVED_7]               = PERF_BR_UNKNOWN,
> +       [CTRDATA_TYPE_INDIRECT_CALL]            = PERF_BR_IND_CALL,
> +       [CTRDATA_TYPE_DIRECT_CALL]              = PERF_BR_CALL,
> +       [CTRDATA_TYPE_INDIRECT_JUMP]            = PERF_BR_UNCOND,
> +       [CTRDATA_TYPE_DIRECT_JUMP]              = PERF_BR_UNKNOWN,
> +       [CTRDATA_TYPE_CO_ROUTINE_SWAP]          = PERF_BR_UNKNOWN,
> +       [CTRDATA_TYPE_RETURN]                   = PERF_BR_RET,
> +       [CTRDATA_TYPE_OTHER_INDIRECT_JUMP]      = PERF_BR_IND,
> +       [CTRDATA_TYPE_OTHER_DIRECT_JUMP]        = PERF_BR_UNKNOWN,
> +};
> +
> +static void ctr_set_perf_entry_type(struct perf_branch_entry *entry,
> +                                   u64 ctr_data)
> +{
> +       int ctr_type = ctr_get_type(ctr_data);
> +
> +       entry->type = ctr_perf_map[ctr_type];
> +       if (entry->type == PERF_BR_UNKNOWN)
> +               pr_warn("%d - unknown branch type captured\n", ctr_type);
> +}
> +
> +static void capture_ctr_flags(struct perf_branch_entry *entry,
> +                             struct perf_event *event, u64 ctr_data,
> +                             u64 ctr_target)
> +{
> +       if (branch_sample_type(event))
> +               ctr_set_perf_entry_type(entry, ctr_data);
> +
> +       if (!branch_sample_no_cycles(event))
> +               entry->cycles = ctr_get_cycles(ctr_data);
> +
> +       if (!branch_sample_no_flags(event)) {
> +               entry->abort = 0;
> +               entry->mispred = ctr_get_mispredict(ctr_target);
> +               entry->predicted = !entry->mispred;
> +       }
> +
> +       if (branch_sample_priv(event))
> +               entry->priv = PERF_BR_PRIV_UNKNOWN;
> +}
> +
> +
> +static void ctr_regset_to_branch_entry(struct cpu_hw_events *cpuc,
> +                                      struct perf_event *event,
> +                                      struct ctr_regset *regset,
> +                                      unsigned int idx)
> +{
> +       struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx];
> +
> +       perf_clear_branch_entry_bitfields(entry);
> +       entry->from = regset->src;
> +       entry->to = regset->target & (~CTRTARGET_MISP);
> +       capture_ctr_flags(entry, event, regset->ctr_data, regset->target);
> +}
> +
> +static void ctr_read_entries(struct cpu_hw_events *cpuc,
> +                            struct perf_event *event,
> +                            unsigned int depth)
> +{
> +       struct ctr_regset entry = {};
> +       u64 ctr_ctl;
> +       int i;
> +
> +       ctr_ctl = csr_read_clear(CSR_CTRCTL, CTR_BRANCH_ENABLE_BITS);
> +
> +       for (i = 0; i < depth; i++) {
> +               if (!capture_ctr_regset(&entry, i))
> +                       break;
> +
> +               ctr_regset_to_branch_entry(cpuc, event, &entry, i);
> +       }
> +
> +       csr_set(CSR_CTRCTL, ctr_ctl & CTR_BRANCH_ENABLE_BITS);
> +
> +       cpuc->branches->branch_stack.nr = i;
> +       cpuc->branches->branch_stack.hw_idx = 0;
> +}
> +
> +bool riscv_pmu_ctr_valid(struct perf_event *event)
> +{
> +       u64 branch_type = event->attr.branch_sample_type;
> +
> +       if (branch_type & ~allowed_filters) {
> +               pr_debug_once("Requested branch filters not supported 0x%llx\n",
> +                               branch_type & ~allowed_filters);
> +               return false;
> +       }
> +
> +       return true;
> +}
> +
> +void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> +       unsigned int depth = to_riscv_pmu(event->pmu)->ctr_depth;
> +
> +       ctr_read_entries(cpuc, event, depth);
> +
> +       /* Clear frozen bit. */
> +       csr_clear(CSR_SCTRSTATUS, SCTRSTATUS_FROZEN);
> +}
> +
> +static void riscv_pmu_ctr_clear(void)
> +{
> +       /* FIXME: Replace with sctrclr instruction once support is merged
> +        * into toolchain.
> +        */
> +       asm volatile(".4byte 0x10400073\n" ::: "memory");
> +       csr_write(CSR_SCTRSTATUS, 0);
> +}
> +
> +/*
> + * On context switch in, we need to make sure no samples from previous user
> + * are left in the CTR.
> + *
> + * On ctxswin, sched_in = true, called after the PMU has started
> + * On ctxswout, sched_in = false, called before the PMU is stopped
> + */

Hi Rajnesh Kanwal,

Thank you for providing this patch set. I have a few questions and
findings about it and would appreciate your help in clarifying them.

> +void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx,
> +                             bool sched_in)
> +{
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(pmu_ctx->pmu);
> +       struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +
> +       if (cpuc->ctr_users && sched_in)
> +               riscv_pmu_ctr_clear();
> +}
> +

My first question is regarding the context save and restore for the
CTR log. If I understand correctly, I noticed that Intel's LBR
performs context save and restore when PERF_SAMPLE_BRANCH_CALL_STACK
is required. However, it seems that we don't have a similar
implementation. Does our CTR implementation not require context save
and restore for the RASEMU case?

> +void riscv_pmu_ctr_enable(struct perf_event *event)
> +{
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +       u64 branch_type = event->attr.branch_sample_type;
> +       u64 ctr;
> +
> +       if (!cpuc->ctr_users++ && !event->total_time_running)
> +               riscv_pmu_ctr_clear();

I ran the entire CTR environment on my side and noticed that the value
of cpuc->ctr_users is likely not 0 at the start of a new trace. I
suspect this might be because we increase cpuc->ctr_users in
riscv_pmu_ctr_enable() and decrease it in riscv_pmu_ctr_disable().
These two PMU CTR functions are called during the pmu->start and
pmu->stop processes. However, in Linux, the number of calls to
pmu->start may not equal the number of calls to pmu->stop, which could
result in cpuc->ctr_users not returning to 0 after a trace completes.
I noticed that in Intel's LBR implementation, cpuc->ctr_users++ is
incremented during the pmu->add process instead of pmu->start process.
Perhaps we could consider referencing their implementation to address
this issue.


> +
> +       ctr = branch_type_to_ctr(branch_type);
> +       csr_write(CSR_CTRCTL, ctr);
> +
> +       perf_sched_cb_inc(event->pmu);
> +}
> +
> +void riscv_pmu_ctr_disable(struct perf_event *event)
> +{
> +       struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +       struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +
> +       /* Clear CTRCTL to disable the recording. */
> +       csr_write(CSR_CTRCTL, 0);
> +
> +       cpuc->ctr_users--;
> +       WARN_ON_ONCE(cpuc->ctr_users < 0);
> +

When I tested this patch, I also encountered a situation where
cpuc->ctr_users became less than 0. The issue might be due to
riscv_pmu_del calling ctr_stop twice with different flags. However, in
rvpmu_ctr_stop, we call riscv_pmu_ctr_disable() without considering
the input flag. This could lead to cpuc->ctr_users becoming a negative
value.

Thanks,
Vincent
> +       perf_sched_cb_dec(event->pmu);
> +}
> +
> +/*
> + * Check for hardware supported perf filters here. To avoid missing
> + * any new added filter in perf, we do a BUILD_BUG_ON check, so make sure
> + * to update CTR_ALLOWED_BRANCH_FILTERS or CTR_EXCLUDE_BRANCH_FILTERS
> + * defines when adding support for it in below function.
> + */
> +static void __init check_available_filters(void)
> +{
> +       u64 ctr_ctl;
> +
> +       /*
> +        * Ensure both perf branch filter allowed and exclude
> +        * masks are always in sync with the generic perf ABI.
> +        */
> +       BUILD_BUG_ON(CTR_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1));
> +
> +       allowed_filters = PERF_SAMPLE_BRANCH_USER      |
> +                         PERF_SAMPLE_BRANCH_KERNEL    |
> +                         PERF_SAMPLE_BRANCH_ANY       |
> +                         PERF_SAMPLE_BRANCH_HW_INDEX  |
> +                         PERF_SAMPLE_BRANCH_NO_FLAGS  |
> +                         PERF_SAMPLE_BRANCH_NO_CYCLES |
> +                         PERF_SAMPLE_BRANCH_TYPE_SAVE;
> +
> +       csr_write(CSR_CTRCTL, ~0);
> +       ctr_ctl = csr_read(CSR_CTRCTL);
> +
> +       if (riscv_isa_extension_available(NULL, h))
> +               allowed_filters |= PERF_SAMPLE_BRANCH_HV;
> +
> +       if (ctr_ctl & (CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH))
> +               allowed_filters |= PERF_SAMPLE_BRANCH_ANY_CALL;
> +
> +       if (ctr_ctl & (CTRCTL_RET_INH | CTRCTL_TRETINH))
> +               allowed_filters |= PERF_SAMPLE_BRANCH_ANY_RETURN;
> +
> +       if (ctr_ctl & CTRCTL_INDCALL_INH)
> +               allowed_filters |= PERF_SAMPLE_BRANCH_IND_CALL;
> +
> +       if (ctr_ctl & CTRCTL_TKBRINH)
> +               allowed_filters |= PERF_SAMPLE_BRANCH_COND;
> +
> +       if (ctr_ctl & CTRCTL_RASEMU)
> +               allowed_filters |= PERF_SAMPLE_BRANCH_CALL_STACK;
> +
> +       if (ctr_ctl & (CTRCTL_INDOJUMP_INH | CTRCTL_INDJUMP_INH))
> +               allowed_filters |= PERF_SAMPLE_BRANCH_IND_JUMP;
> +
> +       if (ctr_ctl & CTRCTL_DIRCALL_INH)
> +               allowed_filters |= PERF_SAMPLE_BRANCH_CALL;
> +}
> +
> +void riscv_pmu_ctr_starting_cpu(void)
> +{
> +       if (!riscv_isa_extension_available(NULL, SxCTR) ||
> +           !riscv_isa_extension_available(NULL, SSCOFPMF) ||
> +           !riscv_isa_extension_available(NULL, SxCSRIND))
> +               return;
> +
> +       /* Set depth to maximum. */
> +       csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
> +}
> +
> +void riscv_pmu_ctr_dying_cpu(void)
> +{
> +       if (!riscv_isa_extension_available(NULL, SxCTR) ||
> +           !riscv_isa_extension_available(NULL, SSCOFPMF) ||
> +           !riscv_isa_extension_available(NULL, SxCSRIND))
> +               return;
> +
> +       /* Clear and reset CTR CSRs. */
> +       csr_write(CSR_SCTRDEPTH, 0);
> +       csr_write(CSR_CTRCTL, 0);
> +       riscv_pmu_ctr_clear();
> +}
> +
> +void __init riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu)
> +{
> +       if (!riscv_isa_extension_available(NULL, SxCTR) ||
> +           !riscv_isa_extension_available(NULL, SSCOFPMF) ||
> +           !riscv_isa_extension_available(NULL, SxCSRIND))
> +               return;
> +
> +       check_available_filters();
> +
> +       /* Set depth to maximum. */
> +       csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
> +       riscv_pmu->ctr_depth = ctr_get_depth(csr_read(CSR_SCTRDEPTH));
> +
> +       pr_info("Perf CTR available, with %d depth\n", riscv_pmu->ctr_depth);
> +}
> +
> +void __init riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu)
> +{
> +       if (!riscv_pmu_ctr_supported(riscv_pmu))
> +               return;
> +
> +       csr_write(CSR_SCTRDEPTH, 0);
> +       csr_write(CSR_CTRCTL, 0);
> +       riscv_pmu_ctr_clear();
> +       riscv_pmu->ctr_depth = 0;
> +}
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index 5a6b840018bd..455d2386936f 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -104,6 +104,39 @@ struct riscv_pmu *riscv_pmu_alloc(void);
>  int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
>  #endif
>
> +static inline bool riscv_pmu_ctr_supported(struct riscv_pmu *pmu)
> +{
> +       return !!pmu->ctr_depth;
> +}
> +
>  #endif /* CONFIG_RISCV_PMU_COMMON */
>
> +#ifdef CONFIG_RISCV_CTR
> +
> +bool riscv_pmu_ctr_valid(struct perf_event *event);
> +void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event);
> +void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
> +void riscv_pmu_ctr_enable(struct perf_event *event);
> +void riscv_pmu_ctr_disable(struct perf_event *event);
> +void riscv_pmu_ctr_dying_cpu(void);
> +void riscv_pmu_ctr_starting_cpu(void);
> +void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu);
> +void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu);
> +
> +#else
> +
> +static inline bool riscv_pmu_ctr_valid(struct perf_event *event) { return false; }
> +static inline void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc,
> +                                     struct perf_event *event) { }
> +static inline void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *,
> +                                           bool sched_in) { }
> +static inline void riscv_pmu_ctr_enable(struct perf_event *event) { }
> +static inline void riscv_pmu_ctr_disable(struct perf_event *event) { }
> +static inline void riscv_pmu_ctr_dying_cpu(void) { }
> +static inline void riscv_pmu_ctr_starting_cpu(void) { }
> +static inline void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu) { }
> +static inline void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu) { }
> +
> +#endif /* CONFIG_RISCV_CTR */
> +
>  #endif /* _RISCV_PMU_H */
> --
> 2.34.1
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Charlie Jenkins Sept. 5, 2024, 9:52 p.m. UTC | #2
On Wed, May 29, 2024 at 07:53:36PM +0100, Rajnesh Kanwal wrote:
> This adds support for CTR Ext defined in [0]. The extension
> allows to records a maximum for 256 last branch records.
> 
> CTR extension depends on s[m|s]csrind and Sscofpmf extensions.
> 
> Signed-off-by: Rajnesh Kanwal <rkanwal@rivosinc.com>
> ---
>  MAINTAINERS                    |   1 +
>  drivers/perf/Kconfig           |  11 +
>  drivers/perf/Makefile          |   1 +
>  drivers/perf/riscv_ctr.c       | 469 +++++++++++++++++++++++++++++++++
>  include/linux/perf/riscv_pmu.h |  33 +++
>  5 files changed, 515 insertions(+)
>  create mode 100644 drivers/perf/riscv_ctr.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d6b42d5f62da..868e4b0808ab 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -19056,6 +19056,7 @@ M:	Atish Patra <atishp@atishpatra.org>
>  R:	Anup Patel <anup@brainfault.org>
>  L:	linux-riscv@lists.infradead.org
>  S:	Supported
> +F:	drivers/perf/riscv_ctr.c
>  F:	drivers/perf/riscv_pmu_common.c
>  F:	drivers/perf/riscv_pmu_dev.c
>  F:	drivers/perf/riscv_pmu_legacy.c
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index 3c37577b25f7..cca6598be739 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -110,6 +110,17 @@ config ANDES_CUSTOM_PMU
>  
>  	  If you don't know what to do here, say "Y".
>  
> +config RISCV_CTR
> +       bool "Enable support for Control Transfer Records (CTR)"
> +       depends on PERF_EVENTS && RISCV_PMU
> +       default y
> +       help
> +         Enable support for Control Transfer Records (CTR) which
> +         allows recording branches, Jumps, Calls, returns etc taken in an
> +         execution path. This also supports privilege based filtering. It
> +         captures additional relevant information such as cycle count,
> +         branch misprediction etc.
> +
>  config ARM_PMU_ACPI
>  	depends on ARM_PMU && ACPI
>  	def_bool y
> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
> index ba809cc069d5..364b1f66f410 100644
> --- a/drivers/perf/Makefile
> +++ b/drivers/perf/Makefile
> @@ -16,6 +16,7 @@ obj-$(CONFIG_RISCV_PMU_COMMON) += riscv_pmu_common.o
>  obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
>  obj-$(CONFIG_RISCV_PMU) += riscv_pmu_dev.o
>  obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
> +obj-$(CONFIG_RISCV_CTR) += riscv_ctr.o
>  obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>  obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
> diff --git a/drivers/perf/riscv_ctr.c b/drivers/perf/riscv_ctr.c
> new file mode 100644
> index 000000000000..95fda1edda4f
> --- /dev/null
> +++ b/drivers/perf/riscv_ctr.c
> @@ -0,0 +1,469 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Control transfer records extension Helpers.
> + *
> + * Copyright (C) 2024 Rivos Inc.
> + *
> + * Author: Rajnesh Kanwal <rkanwal@rivosinc.com>
> + */
> +
> +#define pr_fmt(fmt) "CTR: " fmt
> +
> +#include <linux/bitfield.h>
> +#include <linux/printk.h>
> +#include <linux/types.h>
> +#include <linux/perf_event.h>
> +#include <linux/perf/riscv_pmu.h>
> +#include <linux/cpufeature.h>
> +#include <asm/hwcap.h>
> +#include <asm/csr_ind.h>
> +#include <asm/csr.h>
> +
> +#define CTR_BRANCH_FILTERS_INH  (CTRCTL_EXCINH       | \
> +				 CTRCTL_INTRINH      | \
> +				 CTRCTL_TRETINH      | \
> +				 CTRCTL_TKBRINH      | \
> +				 CTRCTL_INDCALL_INH  | \
> +				 CTRCTL_DIRCALL_INH  | \
> +				 CTRCTL_INDJUMP_INH  | \
> +				 CTRCTL_DIRJUMP_INH  | \
> +				 CTRCTL_CORSWAP_INH  | \
> +				 CTRCTL_RET_INH      | \
> +				 CTRCTL_INDOJUMP_INH | \
> +				 CTRCTL_DIROJUMP_INH)
> +
> +#define CTR_BRANCH_ENABLE_BITS (CTRCTL_KERNEL_ENABLE | CTRCTL_U_ENABLE)
> +
> +/* Branch filters not-supported by CTR extension. */
> +#define CTR_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX	        | \
> +				    PERF_SAMPLE_BRANCH_IN_TX		| \
> +				    PERF_SAMPLE_BRANCH_PRIV_SAVE        | \
> +				    PERF_SAMPLE_BRANCH_NO_TX            | \
> +				    PERF_SAMPLE_BRANCH_COUNTERS)
> +
> +/* Branch filters supported by CTR extension. */
> +#define CTR_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER		| \
> +				    PERF_SAMPLE_BRANCH_KERNEL		| \
> +				    PERF_SAMPLE_BRANCH_HV		| \
> +				    PERF_SAMPLE_BRANCH_ANY		| \
> +				    PERF_SAMPLE_BRANCH_ANY_CALL	        | \
> +				    PERF_SAMPLE_BRANCH_ANY_RETURN	| \
> +				    PERF_SAMPLE_BRANCH_IND_CALL	        | \
> +				    PERF_SAMPLE_BRANCH_COND		| \
> +				    PERF_SAMPLE_BRANCH_IND_JUMP	        | \
> +				    PERF_SAMPLE_BRANCH_HW_INDEX	        | \
> +				    PERF_SAMPLE_BRANCH_NO_FLAGS	        | \
> +				    PERF_SAMPLE_BRANCH_NO_CYCLES	| \
> +				    PERF_SAMPLE_BRANCH_CALL_STACK       | \
> +				    PERF_SAMPLE_BRANCH_CALL		| \
> +				    PERF_SAMPLE_BRANCH_TYPE_SAVE)
> +
> +#define CTR_PERF_BRANCH_FILTERS    (CTR_ALLOWED_BRANCH_FILTERS	        | \
> +				    CTR_EXCLUDE_BRANCH_FILTERS)
> +
> +static u64 allowed_filters __read_mostly;
> +
> +struct ctr_regset {
> +	unsigned long src;
> +	unsigned long target;
> +	unsigned long ctr_data;
> +};
> +
> +static inline u64 get_ctr_src_reg(unsigned int ctr_idx)
> +{
> +	return csr_ind_read(CSR_IREG, CTR_ENTRIES_FIRST, ctr_idx);
> +}
> +
> +static inline u64 get_ctr_tgt_reg(unsigned int ctr_idx)
> +{
> +	return csr_ind_read(CSR_IREG2, CTR_ENTRIES_FIRST, ctr_idx);
> +}
> +
> +static inline u64 get_ctr_data_reg(unsigned int ctr_idx)
> +{
> +	return csr_ind_read(CSR_IREG3, CTR_ENTRIES_FIRST, ctr_idx);
> +}
> +
> +static inline bool ctr_record_valid(u64 ctr_src)
> +{
> +	return !!FIELD_GET(CTRSOURCE_VALID, ctr_src);
> +}
> +
> +static inline int ctr_get_mispredict(u64 ctr_target)
> +{
> +	return FIELD_GET(CTRTARGET_MISP, ctr_target);
> +}
> +
> +static inline unsigned int ctr_get_cycles(u64 ctr_data)
> +{
> +	const unsigned int cce = FIELD_GET(CTRDATA_CCE_MASK, ctr_data);
> +	const unsigned int ccm = FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
> +
> +	if (ctr_data & CTRDATA_CCV)
> +		return 0;
> +
> +	/* Formula to calculate cycles from spec: (2^12 + CCM) << CCE-1 */
> +	if (cce > 0)
> +		return (4096 + ccm) << (cce - 1);
> +
> +	return FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
> +}
> +
> +static inline unsigned int ctr_get_type(u64 ctr_data)
> +{
> +	return FIELD_GET(CTRDATA_TYPE_MASK, ctr_data);
> +}
> +
> +static inline unsigned int ctr_get_depth(u64 ctr_depth)
> +{
> +	/* Depth table from CTR Spec: 2.4 sctrdepth.
> +	 *
> +	 * sctrdepth.depth       Depth
> +	 * 000			- 16
> +	 * 001			- 32
> +	 * 010			- 64
> +	 * 011			- 128
> +	 * 100			- 256
> +	 *
> +	 * Depth = 16 * 2 ^ (ctrdepth.depth)
> +	 * or
> +	 * Depth = 16 << ctrdepth.depth.
> +	 */
> +	return 16 << FIELD_GET(SCTRDEPTH_MASK, ctr_depth);
> +}
> +
> +/* Reads CTR entry at idx and stores it in entry struct. */
> +static bool capture_ctr_regset(struct ctr_regset *entry, unsigned int idx)
> +{
> +	entry->src = get_ctr_src_reg(idx);
> +
> +	if (!ctr_record_valid(entry->src))
> +		return false;
> +
> +	entry->src = entry->src & (~CTRSOURCE_VALID);
> +	entry->target = get_ctr_tgt_reg(idx);
> +	entry->ctr_data = get_ctr_data_reg(idx);
> +
> +	return true;
> +}
> +
> +static u64 branch_type_to_ctr(int branch_type)
> +{
> +	u64 config = CTR_BRANCH_FILTERS_INH | CTRCTL_LCOFIFRZ;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_USER)
> +		config |= CTRCTL_U_ENABLE;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
> +		config |= CTRCTL_KERNEL_ENABLE;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_HV) {
> +		if (riscv_isa_extension_available(NULL, h))
> +			config |= CTRCTL_KERNEL_ENABLE;
> +	}
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
> +		config &= ~CTR_BRANCH_FILTERS_INH;
> +		return config;
> +	}
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
> +		config &= ~CTRCTL_INDCALL_INH;
> +		config &= ~CTRCTL_DIRCALL_INH;
> +		config &= ~CTRCTL_EXCINH;
> +		config &= ~CTRCTL_INTRINH;
> +	}
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
> +		config &= ~(CTRCTL_RET_INH | CTRCTL_TRETINH);
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
> +		config &= ~CTRCTL_INDCALL_INH;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_COND)
> +		config &= ~CTRCTL_TKBRINH;
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
> +		config &= ~(CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH |
> +			    CTRCTL_RET_INH);
> +		config |= CTRCTL_RASEMU;
> +	}
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) {
> +		config &= ~CTRCTL_INDJUMP_INH;
> +		config &= ~CTRCTL_INDOJUMP_INH;
> +	}
> +
> +	if (branch_type & PERF_SAMPLE_BRANCH_CALL)
> +		config &= ~CTRCTL_DIRCALL_INH;
> +
> +	return config;
> +}
> +
> +static const int ctr_perf_map[] = {
> +	[CTRDATA_TYPE_NONE]			= PERF_BR_UNKNOWN,
> +	[CTRDATA_TYPE_EXCEPTION]		= PERF_BR_SYSCALL,
> +	[CTRDATA_TYPE_INTERRUPT]		= PERF_BR_IRQ,
> +	[CTRDATA_TYPE_TRAP_RET]			= PERF_BR_ERET,
> +	[CTRDATA_TYPE_NONTAKEN_BRANCH]		= PERF_BR_COND,
> +	[CTRDATA_TYPE_TAKEN_BRANCH]		= PERF_BR_COND,
> +	[CTRDATA_TYPE_RESERVED_6]		= PERF_BR_UNKNOWN,
> +	[CTRDATA_TYPE_RESERVED_7]		= PERF_BR_UNKNOWN,
> +	[CTRDATA_TYPE_INDIRECT_CALL]		= PERF_BR_IND_CALL,
> +	[CTRDATA_TYPE_DIRECT_CALL]		= PERF_BR_CALL,
> +	[CTRDATA_TYPE_INDIRECT_JUMP]		= PERF_BR_UNCOND,
> +	[CTRDATA_TYPE_DIRECT_JUMP]		= PERF_BR_UNKNOWN,
> +	[CTRDATA_TYPE_CO_ROUTINE_SWAP]		= PERF_BR_UNKNOWN,
> +	[CTRDATA_TYPE_RETURN]			= PERF_BR_RET,
> +	[CTRDATA_TYPE_OTHER_INDIRECT_JUMP]	= PERF_BR_IND,
> +	[CTRDATA_TYPE_OTHER_DIRECT_JUMP]	= PERF_BR_UNKNOWN,
> +};
> +
> +static void ctr_set_perf_entry_type(struct perf_branch_entry *entry,
> +				    u64 ctr_data)
> +{
> +	int ctr_type = ctr_get_type(ctr_data);
> +
> +	entry->type = ctr_perf_map[ctr_type];
> +	if (entry->type == PERF_BR_UNKNOWN)
> +		pr_warn("%d - unknown branch type captured\n", ctr_type);
> +}
> +
> +static void capture_ctr_flags(struct perf_branch_entry *entry,
> +			      struct perf_event *event, u64 ctr_data,
> +			      u64 ctr_target)
> +{
> +	if (branch_sample_type(event))
> +		ctr_set_perf_entry_type(entry, ctr_data);
> +
> +	if (!branch_sample_no_cycles(event))
> +		entry->cycles = ctr_get_cycles(ctr_data);
> +
> +	if (!branch_sample_no_flags(event)) {
> +		entry->abort = 0;
> +		entry->mispred = ctr_get_mispredict(ctr_target);
> +		entry->predicted = !entry->mispred;
> +	}
> +
> +	if (branch_sample_priv(event))
> +		entry->priv = PERF_BR_PRIV_UNKNOWN;
> +}
> +
> +
> +static void ctr_regset_to_branch_entry(struct cpu_hw_events *cpuc,
> +				       struct perf_event *event,
> +				       struct ctr_regset *regset,
> +				       unsigned int idx)
> +{
> +	struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx];
> +
> +	perf_clear_branch_entry_bitfields(entry);
> +	entry->from = regset->src;
> +	entry->to = regset->target & (~CTRTARGET_MISP);
> +	capture_ctr_flags(entry, event, regset->ctr_data, regset->target);
> +}
> +
> +static void ctr_read_entries(struct cpu_hw_events *cpuc,
> +			     struct perf_event *event,
> +			     unsigned int depth)
> +{
> +	struct ctr_regset entry = {};
> +	u64 ctr_ctl;
> +	int i;
> +
> +	ctr_ctl = csr_read_clear(CSR_CTRCTL, CTR_BRANCH_ENABLE_BITS);
> +
> +	for (i = 0; i < depth; i++) {
> +		if (!capture_ctr_regset(&entry, i))
> +			break;
> +
> +		ctr_regset_to_branch_entry(cpuc, event, &entry, i);
> +	}
> +
> +	csr_set(CSR_CTRCTL, ctr_ctl & CTR_BRANCH_ENABLE_BITS);
> +
> +	cpuc->branches->branch_stack.nr = i;
> +	cpuc->branches->branch_stack.hw_idx = 0;
> +}
> +
> +bool riscv_pmu_ctr_valid(struct perf_event *event)
> +{
> +	u64 branch_type = event->attr.branch_sample_type;
> +
> +	if (branch_type & ~allowed_filters) {
> +		pr_debug_once("Requested branch filters not supported 0x%llx\n",
> +				branch_type & ~allowed_filters);
> +		return false;
> +	}
> +
> +	return true;
> +}
> +
> +void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> +	unsigned int depth = to_riscv_pmu(event->pmu)->ctr_depth;
> +
> +	ctr_read_entries(cpuc, event, depth);
> +
> +	/* Clear frozen bit. */
> +	csr_clear(CSR_SCTRSTATUS, SCTRSTATUS_FROZEN);
> +}
> +
> +static void riscv_pmu_ctr_clear(void)
> +{
> +	/* FIXME: Replace with sctrclr instruction once support is merged
> +	 * into toolchain.
> +	 */
> +	asm volatile(".4byte 0x10400073\n" ::: "memory");
> +	csr_write(CSR_SCTRSTATUS, 0);
> +}
> +
> +/*
> + * On context switch in, we need to make sure no samples from previous user
> + * are left in the CTR.
> + *
> + * On ctxswin, sched_in = true, called after the PMU has started
> + * On ctxswout, sched_in = false, called before the PMU is stopped
> + */
> +void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx,
> +			      bool sched_in)
> +{
> +	struct riscv_pmu *rvpmu = to_riscv_pmu(pmu_ctx->pmu);
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +
> +	if (cpuc->ctr_users && sched_in)
> +		riscv_pmu_ctr_clear();
> +}
> +
> +void riscv_pmu_ctr_enable(struct perf_event *event)
> +{
> +	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +	u64 branch_type = event->attr.branch_sample_type;
> +	u64 ctr;
> +
> +	if (!cpuc->ctr_users++ && !event->total_time_running)
> +		riscv_pmu_ctr_clear();
> +
> +	ctr = branch_type_to_ctr(branch_type);
> +	csr_write(CSR_CTRCTL, ctr);
> +
> +	perf_sched_cb_inc(event->pmu);
> +}
> +
> +void riscv_pmu_ctr_disable(struct perf_event *event)
> +{
> +	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
> +
> +	/* Clear CTRCTL to disable the recording. */
> +	csr_write(CSR_CTRCTL, 0);
> +
> +	cpuc->ctr_users--;
> +	WARN_ON_ONCE(cpuc->ctr_users < 0);
> +
> +	perf_sched_cb_dec(event->pmu);
> +}
> +
> +/*
> + * Check for hardware supported perf filters here. To avoid missing
> + * any new added filter in perf, we do a BUILD_BUG_ON check, so make sure
> + * to update CTR_ALLOWED_BRANCH_FILTERS or CTR_EXCLUDE_BRANCH_FILTERS
> + * defines when adding support for it in below function.
> + */
> +static void __init check_available_filters(void)
> +{
> +	u64 ctr_ctl;
> +
> +	/*
> +	 * Ensure both perf branch filter allowed and exclude
> +	 * masks are always in sync with the generic perf ABI.
> +	 */
> +	BUILD_BUG_ON(CTR_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1));
> +
> +	allowed_filters = PERF_SAMPLE_BRANCH_USER      |
> +			  PERF_SAMPLE_BRANCH_KERNEL    |
> +			  PERF_SAMPLE_BRANCH_ANY       |
> +			  PERF_SAMPLE_BRANCH_HW_INDEX  |
> +			  PERF_SAMPLE_BRANCH_NO_FLAGS  |
> +			  PERF_SAMPLE_BRANCH_NO_CYCLES |
> +			  PERF_SAMPLE_BRANCH_TYPE_SAVE;
> +
> +	csr_write(CSR_CTRCTL, ~0);
> +	ctr_ctl = csr_read(CSR_CTRCTL);
> +
> +	if (riscv_isa_extension_available(NULL, h))
> +		allowed_filters |= PERF_SAMPLE_BRANCH_HV;
> +
> +	if (ctr_ctl & (CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH))
> +		allowed_filters |= PERF_SAMPLE_BRANCH_ANY_CALL;
> +
> +	if (ctr_ctl & (CTRCTL_RET_INH | CTRCTL_TRETINH))
> +		allowed_filters |= PERF_SAMPLE_BRANCH_ANY_RETURN;
> +
> +	if (ctr_ctl & CTRCTL_INDCALL_INH)
> +		allowed_filters |= PERF_SAMPLE_BRANCH_IND_CALL;
> +
> +	if (ctr_ctl & CTRCTL_TKBRINH)
> +		allowed_filters |= PERF_SAMPLE_BRANCH_COND;
> +
> +	if (ctr_ctl & CTRCTL_RASEMU)
> +		allowed_filters |= PERF_SAMPLE_BRANCH_CALL_STACK;
> +
> +	if (ctr_ctl & (CTRCTL_INDOJUMP_INH | CTRCTL_INDJUMP_INH))
> +		allowed_filters |= PERF_SAMPLE_BRANCH_IND_JUMP;
> +
> +	if (ctr_ctl & CTRCTL_DIRCALL_INH)
> +		allowed_filters |= PERF_SAMPLE_BRANCH_CALL;
> +}
> +
> +void riscv_pmu_ctr_starting_cpu(void)
> +{
> +	if (!riscv_isa_extension_available(NULL, SxCTR) ||
> +	    !riscv_isa_extension_available(NULL, SSCOFPMF) ||
> +	    !riscv_isa_extension_available(NULL, SxCSRIND))
> +		return;
> +
> +	/* Set depth to maximum. */
> +	csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
> +}
> +
> +void riscv_pmu_ctr_dying_cpu(void)
> +{
> +	if (!riscv_isa_extension_available(NULL, SxCTR) ||
> +	    !riscv_isa_extension_available(NULL, SSCOFPMF) ||
> +	    !riscv_isa_extension_available(NULL, SxCSRIND))
> +		return;
> +
> +	/* Clear and reset CTR CSRs. */
> +	csr_write(CSR_SCTRDEPTH, 0);
> +	csr_write(CSR_CTRCTL, 0);
> +	riscv_pmu_ctr_clear();
> +}
> +
> +void __init riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu)
> +{
> +	if (!riscv_isa_extension_available(NULL, SxCTR) ||
> +	    !riscv_isa_extension_available(NULL, SSCOFPMF) ||
> +	    !riscv_isa_extension_available(NULL, SxCSRIND))
> +		return;
> +
> +	check_available_filters();
> +
> +	/* Set depth to maximum. */
> +	csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
> +	riscv_pmu->ctr_depth = ctr_get_depth(csr_read(CSR_SCTRDEPTH));
> +
> +	pr_info("Perf CTR available, with %d depth\n", riscv_pmu->ctr_depth);
> +}
> +
> +void __init riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu)

I don't think it is valid for this function and riscv_pmu_ctr_init() to
be marked as __init since they are called from rvpmu_device_probe()
which is not marked as __init. modpost highlights this issue with the
warnings:

WARNING: modpost: vmlinux: section mismatch in reference: rvpmu_device_probe+0x18c (section: .text) -> riscv_pmu_ctr_init (section: .init.text)
WARNING: modpost: vmlinux: section mismatch in reference: rvpmu_device_probe+0x288 (section: .text) -> riscv_pmu_ctr_finish (section: .init.text)

- Charlie

> +{
> +	if (!riscv_pmu_ctr_supported(riscv_pmu))
> +		return;
> +
> +	csr_write(CSR_SCTRDEPTH, 0);
> +	csr_write(CSR_CTRCTL, 0);
> +	riscv_pmu_ctr_clear();
> +	riscv_pmu->ctr_depth = 0;
> +}
> diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
> index 5a6b840018bd..455d2386936f 100644
> --- a/include/linux/perf/riscv_pmu.h
> +++ b/include/linux/perf/riscv_pmu.h
> @@ -104,6 +104,39 @@ struct riscv_pmu *riscv_pmu_alloc(void);
>  int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
>  #endif
>  
> +static inline bool riscv_pmu_ctr_supported(struct riscv_pmu *pmu)
> +{
> +	return !!pmu->ctr_depth;
> +}
> +
>  #endif /* CONFIG_RISCV_PMU_COMMON */
>  
> +#ifdef CONFIG_RISCV_CTR
> +
> +bool riscv_pmu_ctr_valid(struct perf_event *event);
> +void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event);
> +void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
> +void riscv_pmu_ctr_enable(struct perf_event *event);
> +void riscv_pmu_ctr_disable(struct perf_event *event);
> +void riscv_pmu_ctr_dying_cpu(void);
> +void riscv_pmu_ctr_starting_cpu(void);
> +void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu);
> +void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu);
> +
> +#else
> +
> +static inline bool riscv_pmu_ctr_valid(struct perf_event *event) { return false; }
> +static inline void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc,
> +				      struct perf_event *event) { }
> +static inline void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *,
> +					    bool sched_in) { }
> +static inline void riscv_pmu_ctr_enable(struct perf_event *event) { }
> +static inline void riscv_pmu_ctr_disable(struct perf_event *event) { }
> +static inline void riscv_pmu_ctr_dying_cpu(void) { }
> +static inline void riscv_pmu_ctr_starting_cpu(void) { }
> +static inline void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu) { }
> +static inline void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu) { }
> +
> +#endif /* CONFIG_RISCV_CTR */
> +
>  #endif /* _RISCV_PMU_H */
> -- 
> 2.34.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index d6b42d5f62da..868e4b0808ab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19056,6 +19056,7 @@  M:	Atish Patra <atishp@atishpatra.org>
 R:	Anup Patel <anup@brainfault.org>
 L:	linux-riscv@lists.infradead.org
 S:	Supported
+F:	drivers/perf/riscv_ctr.c
 F:	drivers/perf/riscv_pmu_common.c
 F:	drivers/perf/riscv_pmu_dev.c
 F:	drivers/perf/riscv_pmu_legacy.c
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 3c37577b25f7..cca6598be739 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -110,6 +110,17 @@  config ANDES_CUSTOM_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config RISCV_CTR
+       bool "Enable support for Control Transfer Records (CTR)"
+       depends on PERF_EVENTS && RISCV_PMU
+       default y
+       help
+         Enable support for Control Transfer Records (CTR) which
+         allows recording branches, Jumps, Calls, returns etc taken in an
+         execution path. This also supports privilege based filtering. It
+         captures additional relevant information such as cycle count,
+         branch misprediction etc.
+
 config ARM_PMU_ACPI
 	depends on ARM_PMU && ACPI
 	def_bool y
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index ba809cc069d5..364b1f66f410 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -16,6 +16,7 @@  obj-$(CONFIG_RISCV_PMU_COMMON) += riscv_pmu_common.o
 obj-$(CONFIG_RISCV_PMU_LEGACY) += riscv_pmu_legacy.o
 obj-$(CONFIG_RISCV_PMU) += riscv_pmu_dev.o
 obj-$(CONFIG_STARFIVE_STARLINK_PMU) += starfive_starlink_pmu.o
+obj-$(CONFIG_RISCV_CTR) += riscv_ctr.o
 obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/riscv_ctr.c b/drivers/perf/riscv_ctr.c
new file mode 100644
index 000000000000..95fda1edda4f
--- /dev/null
+++ b/drivers/perf/riscv_ctr.c
@@ -0,0 +1,469 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Control transfer records extension Helpers.
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ *
+ * Author: Rajnesh Kanwal <rkanwal@rivosinc.com>
+ */
+
+#define pr_fmt(fmt) "CTR: " fmt
+
+#include <linux/bitfield.h>
+#include <linux/printk.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <linux/perf/riscv_pmu.h>
+#include <linux/cpufeature.h>
+#include <asm/hwcap.h>
+#include <asm/csr_ind.h>
+#include <asm/csr.h>
+
+#define CTR_BRANCH_FILTERS_INH  (CTRCTL_EXCINH       | \
+				 CTRCTL_INTRINH      | \
+				 CTRCTL_TRETINH      | \
+				 CTRCTL_TKBRINH      | \
+				 CTRCTL_INDCALL_INH  | \
+				 CTRCTL_DIRCALL_INH  | \
+				 CTRCTL_INDJUMP_INH  | \
+				 CTRCTL_DIRJUMP_INH  | \
+				 CTRCTL_CORSWAP_INH  | \
+				 CTRCTL_RET_INH      | \
+				 CTRCTL_INDOJUMP_INH | \
+				 CTRCTL_DIROJUMP_INH)
+
+#define CTR_BRANCH_ENABLE_BITS (CTRCTL_KERNEL_ENABLE | CTRCTL_U_ENABLE)
+
+/* Branch filters not-supported by CTR extension. */
+#define CTR_EXCLUDE_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_ABORT_TX	        | \
+				    PERF_SAMPLE_BRANCH_IN_TX		| \
+				    PERF_SAMPLE_BRANCH_PRIV_SAVE        | \
+				    PERF_SAMPLE_BRANCH_NO_TX            | \
+				    PERF_SAMPLE_BRANCH_COUNTERS)
+
+/* Branch filters supported by CTR extension. */
+#define CTR_ALLOWED_BRANCH_FILTERS (PERF_SAMPLE_BRANCH_USER		| \
+				    PERF_SAMPLE_BRANCH_KERNEL		| \
+				    PERF_SAMPLE_BRANCH_HV		| \
+				    PERF_SAMPLE_BRANCH_ANY		| \
+				    PERF_SAMPLE_BRANCH_ANY_CALL	        | \
+				    PERF_SAMPLE_BRANCH_ANY_RETURN	| \
+				    PERF_SAMPLE_BRANCH_IND_CALL	        | \
+				    PERF_SAMPLE_BRANCH_COND		| \
+				    PERF_SAMPLE_BRANCH_IND_JUMP	        | \
+				    PERF_SAMPLE_BRANCH_HW_INDEX	        | \
+				    PERF_SAMPLE_BRANCH_NO_FLAGS	        | \
+				    PERF_SAMPLE_BRANCH_NO_CYCLES	| \
+				    PERF_SAMPLE_BRANCH_CALL_STACK       | \
+				    PERF_SAMPLE_BRANCH_CALL		| \
+				    PERF_SAMPLE_BRANCH_TYPE_SAVE)
+
+#define CTR_PERF_BRANCH_FILTERS    (CTR_ALLOWED_BRANCH_FILTERS	        | \
+				    CTR_EXCLUDE_BRANCH_FILTERS)
+
+static u64 allowed_filters __read_mostly;
+
+struct ctr_regset {
+	unsigned long src;
+	unsigned long target;
+	unsigned long ctr_data;
+};
+
+static inline u64 get_ctr_src_reg(unsigned int ctr_idx)
+{
+	return csr_ind_read(CSR_IREG, CTR_ENTRIES_FIRST, ctr_idx);
+}
+
+static inline u64 get_ctr_tgt_reg(unsigned int ctr_idx)
+{
+	return csr_ind_read(CSR_IREG2, CTR_ENTRIES_FIRST, ctr_idx);
+}
+
+static inline u64 get_ctr_data_reg(unsigned int ctr_idx)
+{
+	return csr_ind_read(CSR_IREG3, CTR_ENTRIES_FIRST, ctr_idx);
+}
+
+static inline bool ctr_record_valid(u64 ctr_src)
+{
+	return !!FIELD_GET(CTRSOURCE_VALID, ctr_src);
+}
+
+static inline int ctr_get_mispredict(u64 ctr_target)
+{
+	return FIELD_GET(CTRTARGET_MISP, ctr_target);
+}
+
+static inline unsigned int ctr_get_cycles(u64 ctr_data)
+{
+	const unsigned int cce = FIELD_GET(CTRDATA_CCE_MASK, ctr_data);
+	const unsigned int ccm = FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
+
+	if (ctr_data & CTRDATA_CCV)
+		return 0;
+
+	/* Formula to calculate cycles from spec: (2^12 + CCM) << CCE-1 */
+	if (cce > 0)
+		return (4096 + ccm) << (cce - 1);
+
+	return FIELD_GET(CTRDATA_CCM_MASK, ctr_data);
+}
+
+static inline unsigned int ctr_get_type(u64 ctr_data)
+{
+	return FIELD_GET(CTRDATA_TYPE_MASK, ctr_data);
+}
+
+static inline unsigned int ctr_get_depth(u64 ctr_depth)
+{
+	/* Depth table from CTR Spec: 2.4 sctrdepth.
+	 *
+	 * sctrdepth.depth       Depth
+	 * 000			- 16
+	 * 001			- 32
+	 * 010			- 64
+	 * 011			- 128
+	 * 100			- 256
+	 *
+	 * Depth = 16 * 2 ^ (ctrdepth.depth)
+	 * or
+	 * Depth = 16 << ctrdepth.depth.
+	 */
+	return 16 << FIELD_GET(SCTRDEPTH_MASK, ctr_depth);
+}
+
+/* Reads CTR entry at idx and stores it in entry struct. */
+static bool capture_ctr_regset(struct ctr_regset *entry, unsigned int idx)
+{
+	entry->src = get_ctr_src_reg(idx);
+
+	if (!ctr_record_valid(entry->src))
+		return false;
+
+	entry->src = entry->src & (~CTRSOURCE_VALID);
+	entry->target = get_ctr_tgt_reg(idx);
+	entry->ctr_data = get_ctr_data_reg(idx);
+
+	return true;
+}
+
+static u64 branch_type_to_ctr(int branch_type)
+{
+	u64 config = CTR_BRANCH_FILTERS_INH | CTRCTL_LCOFIFRZ;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_USER)
+		config |= CTRCTL_U_ENABLE;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_KERNEL)
+		config |= CTRCTL_KERNEL_ENABLE;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_HV) {
+		if (riscv_isa_extension_available(NULL, h))
+			config |= CTRCTL_KERNEL_ENABLE;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY) {
+		config &= ~CTR_BRANCH_FILTERS_INH;
+		return config;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		config &= ~CTRCTL_INDCALL_INH;
+		config &= ~CTRCTL_DIRCALL_INH;
+		config &= ~CTRCTL_EXCINH;
+		config &= ~CTRCTL_INTRINH;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		config &= ~(CTRCTL_RET_INH | CTRCTL_TRETINH);
+
+	if (branch_type & PERF_SAMPLE_BRANCH_IND_CALL)
+		config &= ~CTRCTL_INDCALL_INH;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_COND)
+		config &= ~CTRCTL_TKBRINH;
+
+	if (branch_type & PERF_SAMPLE_BRANCH_CALL_STACK) {
+		config &= ~(CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH |
+			    CTRCTL_RET_INH);
+		config |= CTRCTL_RASEMU;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_IND_JUMP) {
+		config &= ~CTRCTL_INDJUMP_INH;
+		config &= ~CTRCTL_INDOJUMP_INH;
+	}
+
+	if (branch_type & PERF_SAMPLE_BRANCH_CALL)
+		config &= ~CTRCTL_DIRCALL_INH;
+
+	return config;
+}
+
+static const int ctr_perf_map[] = {
+	[CTRDATA_TYPE_NONE]			= PERF_BR_UNKNOWN,
+	[CTRDATA_TYPE_EXCEPTION]		= PERF_BR_SYSCALL,
+	[CTRDATA_TYPE_INTERRUPT]		= PERF_BR_IRQ,
+	[CTRDATA_TYPE_TRAP_RET]			= PERF_BR_ERET,
+	[CTRDATA_TYPE_NONTAKEN_BRANCH]		= PERF_BR_COND,
+	[CTRDATA_TYPE_TAKEN_BRANCH]		= PERF_BR_COND,
+	[CTRDATA_TYPE_RESERVED_6]		= PERF_BR_UNKNOWN,
+	[CTRDATA_TYPE_RESERVED_7]		= PERF_BR_UNKNOWN,
+	[CTRDATA_TYPE_INDIRECT_CALL]		= PERF_BR_IND_CALL,
+	[CTRDATA_TYPE_DIRECT_CALL]		= PERF_BR_CALL,
+	[CTRDATA_TYPE_INDIRECT_JUMP]		= PERF_BR_UNCOND,
+	[CTRDATA_TYPE_DIRECT_JUMP]		= PERF_BR_UNKNOWN,
+	[CTRDATA_TYPE_CO_ROUTINE_SWAP]		= PERF_BR_UNKNOWN,
+	[CTRDATA_TYPE_RETURN]			= PERF_BR_RET,
+	[CTRDATA_TYPE_OTHER_INDIRECT_JUMP]	= PERF_BR_IND,
+	[CTRDATA_TYPE_OTHER_DIRECT_JUMP]	= PERF_BR_UNKNOWN,
+};
+
+static void ctr_set_perf_entry_type(struct perf_branch_entry *entry,
+				    u64 ctr_data)
+{
+	int ctr_type = ctr_get_type(ctr_data);
+
+	entry->type = ctr_perf_map[ctr_type];
+	if (entry->type == PERF_BR_UNKNOWN)
+		pr_warn("%d - unknown branch type captured\n", ctr_type);
+}
+
+static void capture_ctr_flags(struct perf_branch_entry *entry,
+			      struct perf_event *event, u64 ctr_data,
+			      u64 ctr_target)
+{
+	if (branch_sample_type(event))
+		ctr_set_perf_entry_type(entry, ctr_data);
+
+	if (!branch_sample_no_cycles(event))
+		entry->cycles = ctr_get_cycles(ctr_data);
+
+	if (!branch_sample_no_flags(event)) {
+		entry->abort = 0;
+		entry->mispred = ctr_get_mispredict(ctr_target);
+		entry->predicted = !entry->mispred;
+	}
+
+	if (branch_sample_priv(event))
+		entry->priv = PERF_BR_PRIV_UNKNOWN;
+}
+
+
+static void ctr_regset_to_branch_entry(struct cpu_hw_events *cpuc,
+				       struct perf_event *event,
+				       struct ctr_regset *regset,
+				       unsigned int idx)
+{
+	struct perf_branch_entry *entry = &cpuc->branches->branch_entries[idx];
+
+	perf_clear_branch_entry_bitfields(entry);
+	entry->from = regset->src;
+	entry->to = regset->target & (~CTRTARGET_MISP);
+	capture_ctr_flags(entry, event, regset->ctr_data, regset->target);
+}
+
+static void ctr_read_entries(struct cpu_hw_events *cpuc,
+			     struct perf_event *event,
+			     unsigned int depth)
+{
+	struct ctr_regset entry = {};
+	u64 ctr_ctl;
+	int i;
+
+	ctr_ctl = csr_read_clear(CSR_CTRCTL, CTR_BRANCH_ENABLE_BITS);
+
+	for (i = 0; i < depth; i++) {
+		if (!capture_ctr_regset(&entry, i))
+			break;
+
+		ctr_regset_to_branch_entry(cpuc, event, &entry, i);
+	}
+
+	csr_set(CSR_CTRCTL, ctr_ctl & CTR_BRANCH_ENABLE_BITS);
+
+	cpuc->branches->branch_stack.nr = i;
+	cpuc->branches->branch_stack.hw_idx = 0;
+}
+
+bool riscv_pmu_ctr_valid(struct perf_event *event)
+{
+	u64 branch_type = event->attr.branch_sample_type;
+
+	if (branch_type & ~allowed_filters) {
+		pr_debug_once("Requested branch filters not supported 0x%llx\n",
+				branch_type & ~allowed_filters);
+		return false;
+	}
+
+	return true;
+}
+
+void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	unsigned int depth = to_riscv_pmu(event->pmu)->ctr_depth;
+
+	ctr_read_entries(cpuc, event, depth);
+
+	/* Clear frozen bit. */
+	csr_clear(CSR_SCTRSTATUS, SCTRSTATUS_FROZEN);
+}
+
+static void riscv_pmu_ctr_clear(void)
+{
+	/* FIXME: Replace with sctrclr instruction once support is merged
+	 * into toolchain.
+	 */
+	asm volatile(".4byte 0x10400073\n" ::: "memory");
+	csr_write(CSR_SCTRSTATUS, 0);
+}
+
+/*
+ * On context switch in, we need to make sure no samples from previous user
+ * are left in the CTR.
+ *
+ * On ctxswin, sched_in = true, called after the PMU has started
+ * On ctxswout, sched_in = false, called before the PMU is stopped
+ */
+void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx,
+			      bool sched_in)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(pmu_ctx->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+
+	if (cpuc->ctr_users && sched_in)
+		riscv_pmu_ctr_clear();
+}
+
+void riscv_pmu_ctr_enable(struct perf_event *event)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+	u64 branch_type = event->attr.branch_sample_type;
+	u64 ctr;
+
+	if (!cpuc->ctr_users++ && !event->total_time_running)
+		riscv_pmu_ctr_clear();
+
+	ctr = branch_type_to_ctr(branch_type);
+	csr_write(CSR_CTRCTL, ctr);
+
+	perf_sched_cb_inc(event->pmu);
+}
+
+void riscv_pmu_ctr_disable(struct perf_event *event)
+{
+	struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+	struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
+
+	/* Clear CTRCTL to disable the recording. */
+	csr_write(CSR_CTRCTL, 0);
+
+	cpuc->ctr_users--;
+	WARN_ON_ONCE(cpuc->ctr_users < 0);
+
+	perf_sched_cb_dec(event->pmu);
+}
+
+/*
+ * Check for hardware supported perf filters here. To avoid missing
+ * any new added filter in perf, we do a BUILD_BUG_ON check, so make sure
+ * to update CTR_ALLOWED_BRANCH_FILTERS or CTR_EXCLUDE_BRANCH_FILTERS
+ * defines when adding support for it in below function.
+ */
+static void __init check_available_filters(void)
+{
+	u64 ctr_ctl;
+
+	/*
+	 * Ensure both perf branch filter allowed and exclude
+	 * masks are always in sync with the generic perf ABI.
+	 */
+	BUILD_BUG_ON(CTR_PERF_BRANCH_FILTERS != (PERF_SAMPLE_BRANCH_MAX - 1));
+
+	allowed_filters = PERF_SAMPLE_BRANCH_USER      |
+			  PERF_SAMPLE_BRANCH_KERNEL    |
+			  PERF_SAMPLE_BRANCH_ANY       |
+			  PERF_SAMPLE_BRANCH_HW_INDEX  |
+			  PERF_SAMPLE_BRANCH_NO_FLAGS  |
+			  PERF_SAMPLE_BRANCH_NO_CYCLES |
+			  PERF_SAMPLE_BRANCH_TYPE_SAVE;
+
+	csr_write(CSR_CTRCTL, ~0);
+	ctr_ctl = csr_read(CSR_CTRCTL);
+
+	if (riscv_isa_extension_available(NULL, h))
+		allowed_filters |= PERF_SAMPLE_BRANCH_HV;
+
+	if (ctr_ctl & (CTRCTL_INDCALL_INH | CTRCTL_DIRCALL_INH))
+		allowed_filters |= PERF_SAMPLE_BRANCH_ANY_CALL;
+
+	if (ctr_ctl & (CTRCTL_RET_INH | CTRCTL_TRETINH))
+		allowed_filters |= PERF_SAMPLE_BRANCH_ANY_RETURN;
+
+	if (ctr_ctl & CTRCTL_INDCALL_INH)
+		allowed_filters |= PERF_SAMPLE_BRANCH_IND_CALL;
+
+	if (ctr_ctl & CTRCTL_TKBRINH)
+		allowed_filters |= PERF_SAMPLE_BRANCH_COND;
+
+	if (ctr_ctl & CTRCTL_RASEMU)
+		allowed_filters |= PERF_SAMPLE_BRANCH_CALL_STACK;
+
+	if (ctr_ctl & (CTRCTL_INDOJUMP_INH | CTRCTL_INDJUMP_INH))
+		allowed_filters |= PERF_SAMPLE_BRANCH_IND_JUMP;
+
+	if (ctr_ctl & CTRCTL_DIRCALL_INH)
+		allowed_filters |= PERF_SAMPLE_BRANCH_CALL;
+}
+
+void riscv_pmu_ctr_starting_cpu(void)
+{
+	if (!riscv_isa_extension_available(NULL, SxCTR) ||
+	    !riscv_isa_extension_available(NULL, SSCOFPMF) ||
+	    !riscv_isa_extension_available(NULL, SxCSRIND))
+		return;
+
+	/* Set depth to maximum. */
+	csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
+}
+
+void riscv_pmu_ctr_dying_cpu(void)
+{
+	if (!riscv_isa_extension_available(NULL, SxCTR) ||
+	    !riscv_isa_extension_available(NULL, SSCOFPMF) ||
+	    !riscv_isa_extension_available(NULL, SxCSRIND))
+		return;
+
+	/* Clear and reset CTR CSRs. */
+	csr_write(CSR_SCTRDEPTH, 0);
+	csr_write(CSR_CTRCTL, 0);
+	riscv_pmu_ctr_clear();
+}
+
+void __init riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu)
+{
+	if (!riscv_isa_extension_available(NULL, SxCTR) ||
+	    !riscv_isa_extension_available(NULL, SSCOFPMF) ||
+	    !riscv_isa_extension_available(NULL, SxCSRIND))
+		return;
+
+	check_available_filters();
+
+	/* Set depth to maximum. */
+	csr_write(CSR_SCTRDEPTH, SCTRDEPTH_MASK);
+	riscv_pmu->ctr_depth = ctr_get_depth(csr_read(CSR_SCTRDEPTH));
+
+	pr_info("Perf CTR available, with %d depth\n", riscv_pmu->ctr_depth);
+}
+
+void __init riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu)
+{
+	if (!riscv_pmu_ctr_supported(riscv_pmu))
+		return;
+
+	csr_write(CSR_SCTRDEPTH, 0);
+	csr_write(CSR_CTRCTL, 0);
+	riscv_pmu_ctr_clear();
+	riscv_pmu->ctr_depth = 0;
+}
diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h
index 5a6b840018bd..455d2386936f 100644
--- a/include/linux/perf/riscv_pmu.h
+++ b/include/linux/perf/riscv_pmu.h
@@ -104,6 +104,39 @@  struct riscv_pmu *riscv_pmu_alloc(void);
 int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr);
 #endif
 
+static inline bool riscv_pmu_ctr_supported(struct riscv_pmu *pmu)
+{
+	return !!pmu->ctr_depth;
+}
+
 #endif /* CONFIG_RISCV_PMU_COMMON */
 
+#ifdef CONFIG_RISCV_CTR
+
+bool riscv_pmu_ctr_valid(struct perf_event *event);
+void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc, struct perf_event *event);
+void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
+void riscv_pmu_ctr_enable(struct perf_event *event);
+void riscv_pmu_ctr_disable(struct perf_event *event);
+void riscv_pmu_ctr_dying_cpu(void);
+void riscv_pmu_ctr_starting_cpu(void);
+void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu);
+void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu);
+
+#else
+
+static inline bool riscv_pmu_ctr_valid(struct perf_event *event) { return false; }
+static inline void riscv_pmu_ctr_consume(struct cpu_hw_events *cpuc,
+				      struct perf_event *event) { }
+static inline void riscv_pmu_ctr_sched_task(struct perf_event_pmu_context *,
+					    bool sched_in) { }
+static inline void riscv_pmu_ctr_enable(struct perf_event *event) { }
+static inline void riscv_pmu_ctr_disable(struct perf_event *event) { }
+static inline void riscv_pmu_ctr_dying_cpu(void) { }
+static inline void riscv_pmu_ctr_starting_cpu(void) { }
+static inline void riscv_pmu_ctr_init(struct riscv_pmu *riscv_pmu) { }
+static inline void riscv_pmu_ctr_finish(struct riscv_pmu *riscv_pmu) { }
+
+#endif /* CONFIG_RISCV_CTR */
+
 #endif /* _RISCV_PMU_H */