diff mbox series

perf: ampere: Add support for Ampere SoC PMUs

Message ID 20230427215325.1067752-1-ilkka@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series perf: ampere: Add support for Ampere SoC PMUs | expand

Commit Message

Ilkka Koskinen April 27, 2023, 9:53 p.m. UTC
Add support for Ampere SoC PMUs. This driver supports MCU PMU
available in the AmpereOne SoC.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
---
 .../admin-guide/perf/ampere-soc-pmu.rst       |  36 +
 Documentation/admin-guide/perf/index.rst      |   1 +
 drivers/perf/Kconfig                          |  10 +
 drivers/perf/Makefile                         |   1 +
 drivers/perf/ampere_soc_pmu.c                 | 724 ++++++++++++++++++
 5 files changed, 772 insertions(+)
 create mode 100644 Documentation/admin-guide/perf/ampere-soc-pmu.rst
 create mode 100644 drivers/perf/ampere_soc_pmu.c

Comments

Robin Murphy April 28, 2023, 12:24 a.m. UTC | #1
On 2023-04-27 22:53, Ilkka Koskinen wrote:
> Add support for Ampere SoC PMUs. This driver supports MCU PMU
> available in the AmpereOne SoC.
> 
> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
> ---
>   .../admin-guide/perf/ampere-soc-pmu.rst       |  36 +
>   Documentation/admin-guide/perf/index.rst      |   1 +
>   drivers/perf/Kconfig                          |  10 +
>   drivers/perf/Makefile                         |   1 +
>   drivers/perf/ampere_soc_pmu.c                 | 724 ++++++++++++++++++
>   5 files changed, 772 insertions(+)
>   create mode 100644 Documentation/admin-guide/perf/ampere-soc-pmu.rst
>   create mode 100644 drivers/perf/ampere_soc_pmu.c
> 
> diff --git a/Documentation/admin-guide/perf/ampere-soc-pmu.rst b/Documentation/admin-guide/perf/ampere-soc-pmu.rst
> new file mode 100644
> index 000000000000..5161fbd1c548
> --- /dev/null
> +++ b/Documentation/admin-guide/perf/ampere-soc-pmu.rst
> @@ -0,0 +1,36 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +============================================
> +Ampere SoC Performance Monitoring Unit (PMU)
> +============================================
> +
> +Ampere SoC PMU is a generic PMU IP. At the first phase it's used for counting
> +MCU events on AmpereOne.
> +
> +MCU PMU events
> +--------------
> +
> +The PMU driver registers a PMU device for every supported PMU instance on each
> +SoC. See /sys/devices/AMPC0100:<nn>/.
> +
> +The PMU driver supports setting filters for "rank", "bank", and "threshold". The
> +filter settings are device specific and shared between all the relevant events.
> +The default value for all the filters is zero. The filters can be modified by
> +setting them with the last event of the particular device. All the previous
> +settings are overwritten.

Yeah, that doesn't really work... what is the "last event" relative to? 
Order of arguments to arbitrary tools? Order of perf_event_open 
syscalls? Order in which events are actually scheduled on the PMU? 
(which users can't even control - think event rotation)

To be practical I think you'll have to handle this the same way as 
arm_smmuv3_pmu's global filtering, and only allow events with matching 
filter configs to be scheduled together.

[...]
> +#define SOC_PMEVCNTR0_LO	0x000
> +#define SOC_PMEVCNTR0_HI	0x004
> +#define SOC_PMCCNTR_LO		0x0F8
> +#define SOC_PMCCNTR_HI		0x0FC
> +
> +#define SOC_PMEVTYPER0		0x400
> +
> +#define SOC_PMELCSR		0xA10
> +
> +#define SOC_PMCNTENSET		0xC00
> +#define SOC_PMCNTENCLR		0xC20
> +#define SOC_PMINTENSET		0xC40
> +#define SOC_PMINTENCLR		0xC60
> +#define SOC_PMOVSCLR		0xC80
> +#define SOC_PMOVSSET		0xCC0
> +
> +#define SOC_PMAUXR0		0xD80
> +#define SOC_PMAUXR1		0xD84
> +#define SOC_PMAUXR2		0xD88
> +#define SOC_PMAUXR3		0xD8C
> +
> +#define SOC_PMCFGR		0xE00
> +#define SOC_PMCR		0xE04
> +#define PMU_PMCR_E		BIT(0)
> +#define PMU_PMCR_P		BIT(1)
> +
> +#define SOC_PMAUTHSTATUS	0xFB8
> +#define SOC_PMDEVARCH		0xFBC
> +#define SOC_PMDEVTYPE		0xFCC
> +#define SOC_PMPIDR4		0xFD0
> +#define SOC_PMPIDR0		0xFE0
> +#define SOC_PMPIDR1		0xFE4
> +#define SOC_PMPIDR2		0xFE8
> +#define SOC_PMPIDR3		0xFEC
> +#define SOC_PMCIDR0		0xFF0
> +#define SOC_PMCIDR1		0xFF4
> +#define SOC_PMCIDR2		0xFF8
> +#define SOC_PMCIDR3		0xFFC

This register map quite clearly follows the Arm CoreSight PMU 
architecture. Nice to see it being used, but would you mind having a go 
at hooking up your imp-def bits to the existing arm_cspmu driver?

Thanks,
Robin.
Ilkka Koskinen May 1, 2023, 5:43 a.m. UTC | #2
Hi Robin,

On Fri, 28 Apr 2023, Robin Murphy wrote:
> On 2023-04-27 22:53, Ilkka Koskinen wrote:
>> Add support for Ampere SoC PMUs. This driver supports MCU PMU
>> available in the AmpereOne SoC.
>> 
>> Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
>> ---
>>   .../admin-guide/perf/ampere-soc-pmu.rst       |  36 +
>>   Documentation/admin-guide/perf/index.rst      |   1 +
>>   drivers/perf/Kconfig                          |  10 +
>>   drivers/perf/Makefile                         |   1 +
>>   drivers/perf/ampere_soc_pmu.c                 | 724 ++++++++++++++++++
>>   5 files changed, 772 insertions(+)
>>   create mode 100644 Documentation/admin-guide/perf/ampere-soc-pmu.rst
>>   create mode 100644 drivers/perf/ampere_soc_pmu.c
>> 
>> diff --git a/Documentation/admin-guide/perf/ampere-soc-pmu.rst 
>> b/Documentation/admin-guide/perf/ampere-soc-pmu.rst
>> new file mode 100644
>> index 000000000000..5161fbd1c548
>> --- /dev/null
>> +++ b/Documentation/admin-guide/perf/ampere-soc-pmu.rst
>> @@ -0,0 +1,36 @@
>> +.. SPDX-License-Identifier: GPL-2.0
>> +
>> +============================================
>> +Ampere SoC Performance Monitoring Unit (PMU)
>> +============================================
>> +
>> +Ampere SoC PMU is a generic PMU IP. At the first phase it's used for 
>> counting
>> +MCU events on AmpereOne.
>> +
>> +MCU PMU events
>> +--------------
>> +
>> +The PMU driver registers a PMU device for every supported PMU instance on 
>> each
>> +SoC. See /sys/devices/AMPC0100:<nn>/.
>> +
>> +The PMU driver supports setting filters for "rank", "bank", and 
>> "threshold". The
>> +filter settings are device specific and shared between all the relevant 
>> events.
>> +The default value for all the filters is zero. The filters can be modified 
>> by
>> +setting them with the last event of the particular device. All the 
>> previous
>> +settings are overwritten.
>
> Yeah, that doesn't really work... what is the "last event" relative to? Order 
> of arguments to arbitrary tools? Order of perf_event_open syscalls? Order in 
> which events are actually scheduled on the PMU? (which users can't even 
> control - think event rotation)
>
> To be practical I think you'll have to handle this the same way as 
> arm_smmuv3_pmu's global filtering, and only allow events with matching filter 
> configs to be scheduled together.

Thanks for pointing out that to me! I thought my implementation had an 
issue with the shared filters but when I tried to look, if any of 
the supported PMUs was using them the same way, I somehow missed 
arm_smmuv3_pmu. I fix the driver.

>
> [...]
>> +#define SOC_PMEVCNTR0_LO	0x000
>> +#define SOC_PMEVCNTR0_HI	0x004
>> +#define SOC_PMCCNTR_LO		0x0F8
>> +#define SOC_PMCCNTR_HI		0x0FC
>> +
>> +#define SOC_PMEVTYPER0		0x400
>> +
>> +#define SOC_PMELCSR		0xA10
>> +
>> +#define SOC_PMCNTENSET		0xC00
>> +#define SOC_PMCNTENCLR		0xC20
>> +#define SOC_PMINTENSET		0xC40
>> +#define SOC_PMINTENCLR		0xC60
>> +#define SOC_PMOVSCLR		0xC80
>> +#define SOC_PMOVSSET		0xCC0
>> +
>> +#define SOC_PMAUXR0		0xD80
>> +#define SOC_PMAUXR1		0xD84
>> +#define SOC_PMAUXR2		0xD88
>> +#define SOC_PMAUXR3		0xD8C
>> +
>> +#define SOC_PMCFGR		0xE00
>> +#define SOC_PMCR		0xE04
>> +#define PMU_PMCR_E		BIT(0)
>> +#define PMU_PMCR_P		BIT(1)
>> +
>> +#define SOC_PMAUTHSTATUS	0xFB8
>> +#define SOC_PMDEVARCH		0xFBC
>> +#define SOC_PMDEVTYPE		0xFCC
>> +#define SOC_PMPIDR4		0xFD0
>> +#define SOC_PMPIDR0		0xFE0
>> +#define SOC_PMPIDR1		0xFE4
>> +#define SOC_PMPIDR2		0xFE8
>> +#define SOC_PMPIDR3		0xFEC
>> +#define SOC_PMCIDR0		0xFF0
>> +#define SOC_PMCIDR1		0xFF4
>> +#define SOC_PMCIDR2		0xFF8
>> +#define SOC_PMCIDR3		0xFFC
>
> This register map quite clearly follows the Arm CoreSight PMU architecture. 
> Nice to see it being used, but would you mind having a go at hooking up your 
> imp-def bits to the existing arm_cspmu driver?

I didn't think about that. I'll take a look at the coresight pmu driver 
and see how I could do it properly.

Thanks, Ilkka
diff mbox series

Patch

diff --git a/Documentation/admin-guide/perf/ampere-soc-pmu.rst b/Documentation/admin-guide/perf/ampere-soc-pmu.rst
new file mode 100644
index 000000000000..5161fbd1c548
--- /dev/null
+++ b/Documentation/admin-guide/perf/ampere-soc-pmu.rst
@@ -0,0 +1,36 @@ 
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Ampere SoC Performance Monitoring Unit (PMU)
+============================================
+
+Ampere SoC PMU is a generic PMU IP. At the first phase it's used for counting
+MCU events on AmpereOne.
+
+MCU PMU events
+--------------
+
+The PMU driver registers a PMU device for every supported PMU instance on each
+SoC. See /sys/devices/AMPC0100:<nn>/.
+
+The PMU driver supports setting filters for "rank", "bank", and "threshold". The
+filter settings are device specific and shared between all the relevant events.
+The default value for all the filters is zero. The filters can be modified by
+setting them with the last event of the particular device. All the previous
+settings are overwritten.
+
+
+Example for perf tool use::
+
+  / # perf list mcu
+    mcu0/cycle-count                                  [Kernel PMU event]
+  <...>
+    mcu16/cycle-count                                 [Kernel PMU event]
+  <...>
+
+  / # perf stat -a -e mcu0/cycle-count,bank=5/,mcu0/dfi-cmd,threshold=16,rank=8/,mcu1/dfi-cmd/ \
+        sleep 1
+
+Note, as none of the filters was set for mcu1, the default value is assigned to
+them. For mcu0, bank setting will be overwritten with the default value since
+it wasn't set with the last mcu0 event.
diff --git a/Documentation/admin-guide/perf/index.rst b/Documentation/admin-guide/perf/index.rst
index 9de64a40adab..8de9d253ae81 100644
--- a/Documentation/admin-guide/perf/index.rst
+++ b/Documentation/admin-guide/perf/index.rst
@@ -21,3 +21,4 @@  Performance monitor support
    alibaba_pmu
    nvidia-pmu
    meson-ddr-pmu
+   ampere-soc-pmu
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 711f82400086..6551005aaa04 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -209,6 +209,16 @@  config MARVELL_CN10K_DDR_PMU
 	  Enable perf support for Marvell DDR Performance monitoring
 	  event on CN10K platform.
 
+config AMPERE_SOC_PMU
+	tristate "Ampere SoC PMU"
+	depends on ARM64 && ACPI
+	help
+	  Enable support for Ampere SoC performance counters used on
+	  Ampere SoCs.
+
+	  The SoC PMU can support multiple different PMU types. At the first
+	  phase it supports MCU PMU available on AmpereOne.
+
 source "drivers/perf/arm_cspmu/Kconfig"
 
 source "drivers/perf/amlogic/Kconfig"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index dabc859540ce..17be9b0f75f8 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -24,3 +24,4 @@  obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
 obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
 obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
+obj-$(CONFIG_AMPERE_SOC_PMU) += ampere_soc_pmu.o
diff --git a/drivers/perf/ampere_soc_pmu.c b/drivers/perf/ampere_soc_pmu.c
new file mode 100644
index 000000000000..0591e0e4fbd2
--- /dev/null
+++ b/drivers/perf/ampere_soc_pmu.c
@@ -0,0 +1,724 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+
+#include <linux/acpi.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define SOC_PMEVCNTR0_LO	0x000
+#define SOC_PMEVCNTR0_HI	0x004
+#define SOC_PMCCNTR_LO		0x0F8
+#define SOC_PMCCNTR_HI		0x0FC
+
+#define SOC_PMEVTYPER0		0x400
+
+#define SOC_PMELCSR		0xA10
+
+#define SOC_PMCNTENSET		0xC00
+#define SOC_PMCNTENCLR		0xC20
+#define SOC_PMINTENSET		0xC40
+#define SOC_PMINTENCLR		0xC60
+#define SOC_PMOVSCLR		0xC80
+#define SOC_PMOVSSET		0xCC0
+
+#define SOC_PMAUXR0		0xD80
+#define SOC_PMAUXR1		0xD84
+#define SOC_PMAUXR2		0xD88
+#define SOC_PMAUXR3		0xD8C
+
+#define SOC_PMCFGR		0xE00
+#define SOC_PMCR		0xE04
+#define PMU_PMCR_E		BIT(0)
+#define PMU_PMCR_P		BIT(1)
+
+#define SOC_PMAUTHSTATUS	0xFB8
+#define SOC_PMDEVARCH		0xFBC
+#define SOC_PMDEVTYPE		0xFCC
+#define SOC_PMPIDR4		0xFD0
+#define SOC_PMPIDR0		0xFE0
+#define SOC_PMPIDR1		0xFE4
+#define SOC_PMPIDR2		0xFE8
+#define SOC_PMPIDR3		0xFEC
+#define SOC_PMCIDR0		0xFF0
+#define SOC_PMCIDR1		0xFF4
+#define SOC_PMCIDR2		0xFF8
+#define SOC_PMCIDR3		0xFFC
+
+#define SOC_ID_MASK		0xFFF
+#define SOC_VERSION_MASK	0xFF000
+#define SOC_CYCLE_COUNTER	BIT(14)
+#define SOC_NR_COUNTERS_MASK	0xF
+
+#define SOC_PMEVCNTR_LOn(n)	(SOC_PMEVCNTR0_LO + (n) * 8)
+#define SOC_PMEVCNTR_HIn(n)	(SOC_PMEVCNTR0_HI + (n) * 8)
+#define SOC_PMEVTYPERn(n)	(SOC_PMEVTYPER0 + (n) * 4)
+
+#define SOC_CYCLE_COUNTER_BIT	BIT(31)
+#define SOC_MAX_COUNTERS	4
+#define SOC_CYCLE_COUNTER_IDX	SOC_MAX_COUNTERS
+#define SOC_CYCLE_COUNTER_EVENT	0x100
+
+#define SOC_MAX_PERIOD		0xFFFFFFFFFFFFFFFFULL
+
+struct ampere_pmu {
+	int id;
+	int version;
+
+	void __iomem *base;
+	int irq;
+
+	int nr_counters;
+
+	/* Number of generic counters + optional cycle counter */
+	DECLARE_BITMAP(counter_mask, SOC_MAX_COUNTERS + 1);
+	struct perf_event *counters[SOC_MAX_COUNTERS + 1];
+	bool cycle_exists;
+
+	int cpu;
+	struct hlist_node node;
+	struct pmu pmu;
+	struct device *dev;
+};
+
+static int cpuhp_state_num;
+
+#define to_ampere_pmu(_pmu) container_of(_pmu, struct ampere_pmu, pmu)
+
+#define SOC_CONFIG_EVENTID		GENMASK(8, 0)
+#define SOC_CONFIG_THRESHOLD		GENMASK(7, 0)
+#define SOC_CONFIG_RANK			GENMASK(23, 8)
+#define SOC_CONFIG_BANK			GENMASK(31, 0)
+
+#define SOC_EVENT_EVENTID(event)	FIELD_GET(SOC_CONFIG_EVENTID, (event)->attr.config)
+#define SOC_EVENT_THRESHOLD(event)	FIELD_GET(SOC_CONFIG_THRESHOLD, (event)->attr.config1)
+#define SOC_EVENT_RANK(event)		FIELD_GET(SOC_CONFIG_RANK, (event)->attr.config1)
+#define SOC_EVENT_BANK(event)		FIELD_GET(SOC_CONFIG_BANK, (event)->attr.config2)
+
+#define AMPERE_PMU_EVENT_ATTR(_name, _event)			\
+	PMU_EVENT_ATTR_ID(_name, ampere_pmu_event_show, _event)
+
+#define AMPERE_PMU_EVENT_CYCLE(_name)				\
+	AMPERE_PMU_EVENT_ATTR(_name, SOC_CYCLE_COUNTER_EVENT)
+
+#define AMPERE_PMU_FORMAT_ATTR(_name, _config)					\
+	(&((struct dev_ext_attribute[]) {{					\
+		.attr = __ATTR(_name, 0444, ampere_pmu_format_show, NULL),	\
+		.var = (void *) _config						\
+	}})[0].attr.attr)
+
+static ssize_t ampere_pmu_format_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr =
+		container_of(attr, struct dev_ext_attribute, attr);
+
+	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
+}
+
+static struct attribute *ampere_pmu_format_attrs[] = {
+	AMPERE_PMU_FORMAT_ATTR(event, "config:0-8"),
+	AMPERE_PMU_FORMAT_ATTR(threshold, "config1:0-7"),
+	AMPERE_PMU_FORMAT_ATTR(rank, "config1:8-23"),
+	AMPERE_PMU_FORMAT_ATTR(bank, "config2:0-31"),
+	NULL,
+};
+
+static const struct attribute_group ampere_pmu_format_attr_group = {
+	.name = "format",
+	.attrs = ampere_pmu_format_attrs,
+};
+
+static ssize_t ampere_pmu_event_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	return  sysfs_emit(buf, "event=0x%llx\n", pmu_attr->id);
+}
+
+static struct attribute *ampere_mcu_pmu_events_attrs[] = {
+	AMPERE_PMU_EVENT_ATTR(cycle_count,		0x00),
+	AMPERE_PMU_EVENT_ATTR(act_sent,			0x01),
+	AMPERE_PMU_EVENT_ATTR(pre_sent,			0x02),
+	AMPERE_PMU_EVENT_ATTR(rd_sent,			0x03),
+	AMPERE_PMU_EVENT_ATTR(rda_sent,			0x04),
+	AMPERE_PMU_EVENT_ATTR(wr_sent,			0x05),
+	AMPERE_PMU_EVENT_ATTR(wra_sent,			0x06),
+	AMPERE_PMU_EVENT_ATTR(pd_entry_vld,		0x07),
+	AMPERE_PMU_EVENT_ATTR(sref_entry_vld,		0x08),
+	AMPERE_PMU_EVENT_ATTR(prea_sent,		0x09),
+	AMPERE_PMU_EVENT_ATTR(pre_sb_sent,		0x0a),
+	AMPERE_PMU_EVENT_ATTR(ref_sent,			0x0b),
+	AMPERE_PMU_EVENT_ATTR(rfm_sent,			0x0c),
+	AMPERE_PMU_EVENT_ATTR(ref_sb_sent,		0x0d),
+	AMPERE_PMU_EVENT_ATTR(rfm_sb_sent,		0x0e),
+	AMPERE_PMU_EVENT_ATTR(rd_rda_sent,		0x0f),
+	AMPERE_PMU_EVENT_ATTR(wr_wra_sent,		0x10),
+	AMPERE_PMU_EVENT_ATTR(raw_hazard,		0x11),
+	AMPERE_PMU_EVENT_ATTR(war_hazard,		0x12),
+	AMPERE_PMU_EVENT_ATTR(waw_hazard,		0x13),
+	AMPERE_PMU_EVENT_ATTR(rar_hazard,		0x14),
+	AMPERE_PMU_EVENT_ATTR(raw_war_waw_hazard,	0x15),
+	AMPERE_PMU_EVENT_ATTR(hprd_lprd_wr_req_vld,	0x16),
+	AMPERE_PMU_EVENT_ATTR(lprd_req_vld,		0x17),
+	AMPERE_PMU_EVENT_ATTR(hprd_req_vld,		0x18),
+	AMPERE_PMU_EVENT_ATTR(hprd_lprd_req_vld,	0x19),
+	AMPERE_PMU_EVENT_ATTR(prefetch_tgt,		0x1a),
+	AMPERE_PMU_EVENT_ATTR(wr_req_vld,		0x1b),
+	AMPERE_PMU_EVENT_ATTR(partial_wr_req_vld,	0x1c),
+	AMPERE_PMU_EVENT_ATTR(rd_retry,			0x1d),
+	AMPERE_PMU_EVENT_ATTR(wr_retry,			0x1e),
+	AMPERE_PMU_EVENT_ATTR(retry_gnt,		0x1f),
+	AMPERE_PMU_EVENT_ATTR(rank_change,		0x20),
+	AMPERE_PMU_EVENT_ATTR(dir_change,		0x21),
+	AMPERE_PMU_EVENT_ATTR(rank_dir_change,		0x22),
+	AMPERE_PMU_EVENT_ATTR(rank_active,		0x23),
+	AMPERE_PMU_EVENT_ATTR(rank_idle,		0x24),
+	AMPERE_PMU_EVENT_ATTR(rank_pd,			0x25),
+	AMPERE_PMU_EVENT_ATTR(rank_sref,		0x26),
+	AMPERE_PMU_EVENT_ATTR(queue_fill_gt_thresh,	0x27),
+	AMPERE_PMU_EVENT_ATTR(queue_rds_gt_thresh,	0x28),
+	AMPERE_PMU_EVENT_ATTR(queue_wrs_gt_thresh,	0x29),
+	AMPERE_PMU_EVENT_ATTR(phy_updt_complt,		0x2a),
+	AMPERE_PMU_EVENT_ATTR(tz_fail,			0x2b),
+	AMPERE_PMU_EVENT_ATTR(dram_errc,		0x2c),
+	AMPERE_PMU_EVENT_ATTR(dram_errd,		0x2d),
+	AMPERE_PMU_EVENT_ATTR(read_data_return,		0x32),
+	AMPERE_PMU_EVENT_ATTR(chi_wr_data_delta,	0x33),
+	AMPERE_PMU_EVENT_ATTR(zq_start,			0x34),
+	AMPERE_PMU_EVENT_ATTR(zq_latch,			0x35),
+	AMPERE_PMU_EVENT_ATTR(wr_fifo_full,		0x36),
+	AMPERE_PMU_EVENT_ATTR(info_fifo_full,		0x37),
+	AMPERE_PMU_EVENT_ATTR(cmd_fifo_full,		0x38),
+	AMPERE_PMU_EVENT_ATTR(dfi_nop,			0x39),
+	AMPERE_PMU_EVENT_ATTR(dfi_cmd,			0x3a),
+	AMPERE_PMU_EVENT_ATTR(rd_run_len,		0x3b),
+	AMPERE_PMU_EVENT_ATTR(wr_run_len,		0x3c),
+
+	AMPERE_PMU_EVENT_CYCLE(cycle),
+	NULL,
+};
+
+static umode_t ampere_pmu_event_is_visible(struct kobject *kobj,
+					   struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct ampere_pmu *apmu = to_ampere_pmu(dev_get_drvdata(dev));
+	struct perf_pmu_events_attr *eattr =
+		container_of(attr, struct perf_pmu_events_attr, attr.attr);
+
+	if (eattr->id == SOC_CYCLE_COUNTER_EVENT && !apmu->cycle_exists)
+		return 0;
+
+	return attr->mode;
+}
+
+static const struct attribute_group ampere_pmu_events_attrs_group = {
+	.name = "events",
+	.attrs = ampere_mcu_pmu_events_attrs,
+	.is_visible = ampere_pmu_event_is_visible,
+};
+
+static ssize_t ampere_cpumask_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(dev_get_drvdata(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(apmu->cpu));
+}
+
+static struct device_attribute ampere_pmu_cpumask_attr =
+	__ATTR(cpumask, 0444, ampere_cpumask_show, NULL);
+
+static struct attribute *ampere_pmu_cpumask_attrs[] = {
+	&ampere_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group ampere_pmu_cpumask_attr_group = {
+	.attrs = ampere_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *ampere_pmu_attr_groups[] = {
+	&ampere_pmu_events_attrs_group,
+	&ampere_pmu_format_attr_group,
+	&ampere_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static void ampere_pmu_event_config(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+
+	writel(SOC_EVENT_THRESHOLD(event), apmu->base + SOC_PMAUXR0);
+	writel(SOC_EVENT_RANK(event), apmu->base + SOC_PMAUXR1);
+	writel(SOC_EVENT_BANK(event), apmu->base + SOC_PMAUXR2);
+}
+
+static inline u64 ampere_pmu_read_counter(struct ampere_pmu *apmu, int idx)
+{
+	u32 lo, hi, reg_high, reg_low;
+
+	if (idx == SOC_CYCLE_COUNTER_IDX) {
+		reg_low  = SOC_PMCCNTR_LO;
+		reg_high = SOC_PMCCNTR_HI;
+	} else {
+		reg_low  = SOC_PMEVCNTR_LOn(idx);
+		reg_high = SOC_PMEVCNTR_HIn(idx);
+	}
+
+	do {
+		hi = readl(apmu->base + reg_high);
+		lo = readl(apmu->base + reg_low);
+	} while (hi != readl(apmu->base + reg_high));
+
+	return (((u64)hi << 32) | lo);
+}
+
+static inline void ampere_pmu_write_counter(struct ampere_pmu *apmu, int idx, u64 val)
+{
+	if (idx == SOC_CYCLE_COUNTER_IDX) {
+		writel(lower_32_bits(val), apmu->base + SOC_PMCCNTR_LO);
+		writel(upper_32_bits(val), apmu->base + SOC_PMCCNTR_HI);
+	} else {
+		writel(lower_32_bits(val), apmu->base + SOC_PMEVCNTR_LOn(idx));
+		writel(upper_32_bits(val), apmu->base + SOC_PMEVCNTR_HIn(idx));
+	}
+}
+
+static int ampere_get_counter_idx(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	int idx, eventid = SOC_EVENT_EVENTID(event);
+
+	if (eventid == SOC_CYCLE_COUNTER_EVENT) {
+		if (test_and_set_bit(SOC_CYCLE_COUNTER_IDX, apmu->counter_mask))
+			return -ENOSPC;
+
+		return SOC_CYCLE_COUNTER_IDX;
+	}
+
+	for (idx = 0; idx < apmu->nr_counters; idx++)
+		if (!test_and_set_bit(idx, apmu->counter_mask))
+			return idx;
+
+	return -ENOSPC;
+}
+
+static void ampere_pmu_enable(struct pmu *pmu)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(pmu);
+	u32 val;
+
+	val = readl(apmu->base + SOC_PMCR);
+	val |= PMU_PMCR_E;
+	writel(val, apmu->base + SOC_PMCR);
+}
+
+static void ampere_pmu_disable(struct pmu *pmu)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(pmu);
+	u32 val;
+
+	val = readl(apmu->base + SOC_PMCR);
+	val &= ~PMU_PMCR_E;
+	writel(val, apmu->base + SOC_PMCR);
+}
+
+static void ampere_pmu_enable_event(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u32 val, cnt_bit;
+	int eventid = SOC_EVENT_EVENTID(event);
+
+	if (eventid == SOC_CYCLE_COUNTER_EVENT) {
+		cnt_bit = SOC_CYCLE_COUNTER_BIT;
+	} else {
+		cnt_bit = BIT(hw->idx);
+		writel(eventid, apmu->base + SOC_PMEVTYPERn(hw->idx));
+	}
+
+	val = readl(apmu->base + SOC_PMCNTENSET);
+	val |= cnt_bit;
+	writel(val, apmu->base + SOC_PMCNTENSET);
+
+	val = readl(apmu->base + SOC_PMINTENSET);
+	val |= cnt_bit;
+	writel(val, apmu->base + SOC_PMINTENSET);
+}
+
+static void ampere_pmu_disable_event(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u32 val, cnt_bit;
+	int eventid = SOC_EVENT_EVENTID(event);
+
+	if (eventid == SOC_CYCLE_COUNTER_EVENT)
+		cnt_bit = SOC_CYCLE_COUNTER_BIT;
+	else
+		cnt_bit = BIT(hw->idx);
+
+	val = readl(apmu->base + SOC_PMCNTENCLR);
+	val |= cnt_bit;
+	writel(val, apmu->base + SOC_PMCNTENCLR);
+
+	val = readl(apmu->base + SOC_PMINTENSET);
+	val |= cnt_bit;
+	writel(val, apmu->base + SOC_PMINTENSET);
+}
+
+static void ampere_perf_event_set_period(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+
+	/*
+	 * With 64 bit counter, we don't expect the counter to
+	 * wrap past its initial value.
+	 */
+	u64 val = 1ULL << 31;
+
+	local64_set(&hw->prev_count, val);
+	ampere_pmu_write_counter(apmu, hw->idx, val);
+}
+
+static void ampere_perf_event_update(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	u64 prev_count, new_count;
+
+	do {
+		prev_count = local64_read(&hw->prev_count);
+		new_count = ampere_pmu_read_counter(apmu, hw->idx);
+	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
+
+	local64_add((new_count - prev_count) & SOC_MAX_PERIOD,
+		    &event->count);
+}
+
+static void ampere_perf_read(struct perf_event *event)
+{
+	ampere_perf_event_update(event);
+}
+
+static void ampere_perf_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+
+	hw->state = 0;
+	ampere_perf_event_set_period(event);
+
+	ampere_pmu_enable_event(event);
+	perf_event_update_userpage(event);
+}
+
+static void ampere_perf_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	ampere_pmu_disable_event(event);
+
+	WARN_ON_ONCE(hw->state & PERF_HES_STOPPED);
+	hw->state |= PERF_HES_STOPPED;
+
+	ampere_perf_read(event);
+	hw->state |= PERF_HES_UPTODATE;
+}
+
+static int ampere_perf_validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	int counters = 0;
+
+	if (leader == event)
+		return 0;
+
+	if (event->pmu != leader->pmu && !is_software_event(leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, leader) {
+		if (is_software_event(sibling))
+			continue;
+		if (sibling->pmu != event->pmu)
+			return -EINVAL;
+		counters++;
+	}
+
+	if (counters > apmu->nr_counters)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int ampere_perf_event_init(struct perf_event *event)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	event->cpu = apmu->cpu;
+
+	return ampere_perf_validate_group(event);
+}
+
+
+static int ampere_perf_add(struct perf_event *event, int flags)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	int idx;
+
+	idx = ampere_get_counter_idx(event);
+	if (idx < 0)
+		return idx;
+
+	apmu->counters[idx] = event;
+	event->hw.idx = idx;
+
+	ampere_pmu_event_config(event);
+
+	hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		ampere_perf_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void ampere_perf_del(struct perf_event *event, int flags)
+{
+	struct ampere_pmu *apmu = to_ampere_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	int idx = hw->idx;
+
+	ampere_perf_stop(event, PERF_EF_UPDATE);
+
+	apmu->counters[idx] = NULL;
+	clear_bit(idx, apmu->counter_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static int ampere_pmu_get_version(struct ampere_pmu *apmu)
+{
+	u32 reg;
+
+	reg = readl(apmu->base + SOC_PMDEVARCH);
+	apmu->id = reg & SOC_ID_MASK;
+	apmu->version = reg & SOC_VERSION_MASK;
+
+	reg = readl(apmu->base + SOC_PMCFGR);
+	apmu->cycle_exists = reg & SOC_CYCLE_COUNTER;
+	apmu->nr_counters = reg & SOC_NR_COUNTERS_MASK;
+
+	if (apmu->nr_counters > SOC_MAX_COUNTERS) {
+		dev_err(apmu->dev, "Unsupported type (%x, %x, counters %d\n",
+			apmu->id, apmu->version, apmu->nr_counters);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static irqreturn_t ampere_pmu_handle_irq(int irq, void *dev_id)
+{
+	struct ampere_pmu *apmu = dev_id;
+	u32 status;
+	int idx, ret = IRQ_NONE;
+
+	status = readl(apmu->base + SOC_PMOVSSET);
+	if (!status)
+		goto out;
+
+	writel(status, apmu->base + SOC_PMOVSCLR);
+
+	for (idx = 0; idx < apmu->nr_counters; idx++) {
+		struct perf_event *event = apmu->counters[idx];
+		int overflowed = status & BIT(idx);
+
+		/* Ignore if we don't have an event. */
+		if (event && overflowed) {
+			ampere_perf_event_update(event);
+			ampere_perf_event_set_period(event);
+			ret = IRQ_HANDLED;
+		}
+	}
+
+	if (apmu->counters[SOC_CYCLE_COUNTER_IDX] &&
+	    status & SOC_CYCLE_COUNTER_BIT) {
+		ampere_perf_event_update(apmu->counters[SOC_CYCLE_COUNTER_IDX]);
+		ampere_perf_event_set_period(apmu->counters[SOC_CYCLE_COUNTER_IDX]);
+		ret = IRQ_HANDLED;
+	}
+
+out:
+	ampere_pmu_enable(&apmu->pmu);
+	return ret;
+}
+
+static int ampere_pmu_probe(struct platform_device *pdev)
+{
+	struct ampere_pmu *apmu;
+	static atomic_t id;
+	char *name;
+	int ret = 0;
+
+	apmu = devm_kzalloc(&pdev->dev, sizeof(*apmu), GFP_KERNEL);
+	if (!apmu)
+		return -ENOMEM;
+
+	apmu->dev = &pdev->dev;
+	platform_set_drvdata(pdev, apmu);
+
+	apmu->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(apmu->base))
+		return PTR_ERR(apmu->base);
+
+	apmu->cpu = cpumask_local_spread(0, dev_to_node(apmu->dev));
+
+	apmu->irq = platform_get_irq(pdev, 0);
+	if (apmu->irq < 0)
+		return apmu->irq;
+
+	ret = devm_request_irq(&pdev->dev, apmu->irq, ampere_pmu_handle_irq,
+			       IRQF_NOBALANCING | IRQF_SHARED,
+			       pdev->name, apmu);
+	if (ret)
+		return ret;
+
+	ret = irq_set_affinity(apmu->irq, cpumask_of(apmu->cpu));
+	if (ret)
+		return ret;
+
+	ret = ampere_pmu_get_version(apmu);
+	if (ret)
+		return ret;
+
+	apmu->pmu = (struct pmu) {
+		.module		= THIS_MODULE,
+		.attr_groups	= ampere_pmu_attr_groups,
+		.task_ctx_nr	= perf_invalid_context,
+		.pmu_enable	= ampere_pmu_enable,
+		.pmu_disable	= ampere_pmu_disable,
+		.event_init	= ampere_perf_event_init,
+		.add		= ampere_perf_add,
+		.del		= ampere_perf_del,
+		.start		= ampere_perf_start,
+		.stop		= ampere_perf_stop,
+		.read		= ampere_perf_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
+	};
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mcu%d",
+			      atomic_fetch_inc(&id));
+	if (!name)
+		return -ENOMEM;
+
+	ret = cpuhp_state_add_instance(cpuhp_state_num, &apmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug", ret);
+		return ret;
+	}
+
+	ret = perf_pmu_register(&apmu->pmu, name, -1);
+	if (ret)
+		cpuhp_state_remove_instance(cpuhp_state_num, &apmu->node);
+
+	return ret;
+}
+
+static int ampere_pmu_remove(struct platform_device *pdev)
+{
+	struct ampere_pmu *apmu = platform_get_drvdata(pdev);
+
+	cpuhp_state_remove_instance(cpuhp_state_num, &apmu->node);
+	perf_pmu_unregister(&apmu->pmu);
+	return 0;
+}
+
+static const struct acpi_device_id ampere_pmu_acpi_match[] = {
+	{"AMPC0100", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, ampere_pmu_acpi_match);
+
+static struct platform_driver ampere_soc_pmu_driver = {
+	.driver	= {
+		.name = "ampere-soc-pmu",
+		.acpi_match_table = ACPI_PTR(ampere_pmu_acpi_match),
+	},
+	.probe	= ampere_pmu_probe,
+	.remove	= ampere_pmu_remove,
+};
+
+static int ampere_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct ampere_pmu *apmu = hlist_entry_safe(node, struct ampere_pmu, node);
+	unsigned int target;
+
+	if (cpu != apmu->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(&apmu->pmu, cpu, target);
+	if (apmu->irq)
+		irq_set_affinity(apmu->irq, cpumask_of(apmu->cpu));
+
+	apmu->cpu = target;
+	return 0;
+}
+
+static int __init ampere_soc_pmu_init(void)
+{
+	cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+						  "ampere-soc-pmu", NULL,
+						  ampere_pmu_offline_cpu);
+	if (cpuhp_state_num < 0)
+		return cpuhp_state_num;
+
+	return platform_driver_register(&ampere_soc_pmu_driver);
+}
+
+static void __exit ampere_soc_pmu_exit(void)
+{
+	platform_driver_unregister(&ampere_soc_pmu_driver);
+	cpuhp_remove_multi_state(cpuhp_state_num);
+}
+
+module_init(ampere_soc_pmu_init);
+module_exit(ampere_soc_pmu_exit);
+
+MODULE_LICENSE("GPL");