diff mbox series

[RFC,2/2] perf: Add xilinx APM support

Message ID 20220921080623.22077-3-shubhrajyoti.datta@amd.com (mailing list archive)
State New, archived
Headers show
Series Add: Add Xilinx APM support | expand

Commit Message

Datta, Shubhrajyoti Sept. 21, 2022, 8:06 a.m. UTC
The programmable AXI performance monitors (APM) collect real-time
transaction metrics at multiple points on the AXI interconnect to
help system software profile real-time activity. In our platform
we have it in PL and also some of the hardened instances in PS.
Add Xilinx APM driver support.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
---
 drivers/perf/Kconfig      |  11 +
 drivers/perf/Makefile     |   1 +
 drivers/perf/xilinx_apm.c | 516 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 528 insertions(+)
 create mode 100644 drivers/perf/xilinx_apm.c
diff mbox series

Patch

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 1e2d69453771..9be2c5d1f37e 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -183,6 +183,17 @@  config APPLE_M1_CPU_PMU
 	  Provides support for the non-architectural CPU PMUs present on
 	  the Apple M1 SoCs and derivatives.
 
+config XILINX_APM_PMU
+	tristate "Enable PMU support for the Xilinx APM controller"
+	help
+	  Enables perf support for the Xilinx Axi Performance Monitor
+	  controller.The IP AXI Performance Monitor core measures
+	  major performance metrics for the AMBA AXI system. The Performance
+	  Monitor measures bus latency of a specific master/slave (AXI4/AXI3/AXI4-Stream/AXI4-Lite)
+	  in a system and the amount of memory traffic for specific durations.
+	  This core can also be used for real-time profiling for software applications.
+
+
 source "drivers/perf/hisilicon/Kconfig"
 
 config MARVELL_CN10K_DDR_PMU
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 57a279c61df5..44f4505920b1 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -20,3 +20,4 @@  obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
 obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
 obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
+obj-$(CONFIG_XILINX_APM_PMU) += xilinx_apm.o
diff --git a/drivers/perf/xilinx_apm.c b/drivers/perf/xilinx_apm.c
new file mode 100644
index 000000000000..4106805a2a48
--- /dev/null
+++ b/drivers/perf/xilinx_apm.c
@@ -0,0 +1,516 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Xilinx APM Performance Reporting
+ *
+ * Copyright 2022 AMD, Inc.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#define XAPM_MODE_ADVANCED	1
+#define XAPM_MODE_PROFILE	2
+#define XAPM_MODE_TRACE		3
+
+#define XAPM_MSR_OFFSET			0x44
+#define XAPM_MSR_COUNTER		8
+#define XAPM_SLOTID_SHIFT		0x5
+#define XAPM_CTRL_OFFSET		0x300
+#define XAPM_MCR_OFFSET			0x100
+#define XAPM_MCR_COUNTER		16
+#define XAPM_RR				0x108
+
+#define XAPM_METRICS_CNT_EN		BIT(0)
+#define XAPM_GLOBAL_CLK_CNT_EN		BIT(16)
+#define XAPM_COUNTER_MASK		0xF
+#define XAPM_EVENT_MASK			GENMASK_ULL(11, 0)
+#define XAPM_EVENT_SHIFT		0
+#define XAPM_EVTYPE_MASK		GENMASK_ULL(15, 12)
+#define XAPM_EVTYPE_SHIFT		12
+#define XAPM_EVTYPE_APM			1
+
+#define get_event(_config)	FIELD_GET(XAPM_EVENT_MASK, _config)
+#define get_evtype(_config)	FIELD_GET(XAPM_EVTYPE_MASK, _config)
+
+#define to_xapm_perf_priv(_pmu)	container_of(_pmu, struct xapm_perf_priv, pmu)
+
+#define XAPM_EVENT_CONFIG(_event, _type)					\
+	((void *)((((_event) << XAPM_EVENT_SHIFT) & XAPM_EVENT_MASK) |	\
+		(((_type) << XAPM_EVTYPE_SHIFT) & XAPM_EVTYPE_MASK)))
+
+PMU_FORMAT_ATTR(event,		"config:0-11");
+
+/*
+ * Performance Counter Registers for APM.
+ *
+ */
+enum metric {
+	WRITE_TRANSACTION	= 0,
+	READ_TRANSACTION	= 1,
+	WRITE_BYTE		= 2,
+	READ_BYTE		= 3,
+	WRITE_BEAT		= 4,
+	READ_LATENCY		= 5,
+	WRITE_LATENCY		= 6,
+	SLV_WR_IDLE		= 7,
+	MST_RD_IDLE		= 8,
+	BVALID			= 9,
+	WLASTS			= 9,
+	RLASTS			= 10,
+	MIN_WR_LATENCY		= 11,
+	MAX_WR_LATENCY		= 12,
+	MIN_RD_LATENCY		= 13,
+	MAX_RD_LATENCY		= 14,
+	MAX_METRIC		= 15
+};
+
+/**
+ * struct xapm_perf_priv - priv data structure for xapm perf driver
+ *
+ * @dev: parent device.
+ * @ioaddr: mapped base address of DDR region.
+ * @pmu: pmu data structure for xapm perf counters.
+ * @cpu: active CPU to which the PMU is bound for accesses.
+ * @mode: Mode of APM eg advanced/profile/trace .
+ * @counter: current counter.
+ * @slot: current slot to be read.
+ * @range: Range to be allowed.
+ * @node: handle to the xapm node.
+ * @maxslots: Maximum number of slots.
+ * @cpuhp_state: state for CPU hotplug notification.
+ * @clk: clock handle.
+ */
+struct xapm_perf_priv {
+	struct device *dev;
+	void __iomem *ioaddr;
+	struct pmu pmu;
+	unsigned int cpu;
+	u32 mode;
+	u32 counter;
+	u32 slot;
+	u32 maxslots;
+	u32 range;
+	struct hlist_node node;
+	enum cpuhp_state cpuhp_state;
+	struct clk *clk;
+};
+
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct xapm_perf_priv *priv;
+
+	priv = to_xapm_perf_priv(pmu);
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask_of(priv->cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t slot_store(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf, size_t size)
+{
+	int ret;
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct xapm_perf_priv *priv;
+
+	priv = to_xapm_perf_priv(pmu);
+	ret = kstrtou32(buf, 0, &priv->slot);
+	if (ret < 0)
+		return ret;
+
+	if (priv->slot > priv->maxslots)
+		return -EINVAL;
+
+	return size;
+}
+static DEVICE_ATTR_WO(slot);
+
+static ssize_t range_store(struct device *dev,
+			   struct device_attribute *attr,
+			   const char *buf, size_t size)
+{
+	int ret;
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct xapm_perf_priv *priv;
+
+	priv = to_xapm_perf_priv(pmu);
+	ret = kstrtou32(buf, 0, &priv->range);
+	if (ret < 0)
+		return ret;
+
+	writel(priv->range, priv->ioaddr + XAPM_RR);
+
+	return size;
+}
+static DEVICE_ATTR_WO(range);
+
+static struct attribute *xapm_perf_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	&dev_attr_slot.attr,
+	&dev_attr_range.attr,
+	NULL,
+};
+
+static struct attribute_group xapm_perf_cpumask_group = {
+	.attrs = xapm_perf_cpumask_attrs,
+};
+
+static struct attribute *xapm_perf_format_attrs[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group xapm_perf_format_group = {
+	.name = "format",
+	.attrs = xapm_perf_format_attrs,
+};
+
+static ssize_t xapm_perf_event_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct dev_ext_attribute *eattr;
+	unsigned long config;
+	char *ptr = buf;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+	config = (unsigned long)eattr->var;
+
+	ptr += sprintf(ptr, "event=0x%02x\n", (unsigned int)get_event(config));
+
+	return (ssize_t)(ptr - buf);
+}
+
+#define XAPM_EVENT_ATTR(_name) \
+	__ATTR(_name, 0444, xapm_perf_event_show, NULL)
+
+#define XAPM_EVENT(_name, _event)					\
+static struct dev_ext_attribute xapm_perf_event_##_name = {		\
+	.attr = XAPM_EVENT_ATTR(xapm_##_name),				\
+	.var = XAPM_EVENT_CONFIG(_event, XAPM_EVTYPE_APM),		\
+}
+
+XAPM_EVENT(write_cnt_0,   WRITE_TRANSACTION | (0 << 4));
+XAPM_EVENT(read_cnt_0,  READ_TRANSACTION | (0 << 4));
+XAPM_EVENT(write_cnt_1,   WRITE_TRANSACTION | (1 << 4));
+XAPM_EVENT(read_cnt_1,  READ_TRANSACTION | (1 << 4));
+XAPM_EVENT(write_cnt_2,   WRITE_TRANSACTION | (2 << 4));
+XAPM_EVENT(read_cnt_2,  READ_TRANSACTION | (2 << 4));
+XAPM_EVENT(write_cnt_3,   WRITE_TRANSACTION | (3 << 4));
+XAPM_EVENT(read_cnt_3,  READ_TRANSACTION | (3 << 4));
+XAPM_EVENT(write_cnt_4,   WRITE_TRANSACTION | (4 << 4));
+XAPM_EVENT(read_cnt_4,  READ_TRANSACTION | (4 << 4));
+XAPM_EVENT(write_cnt_5,   WRITE_TRANSACTION | (5 << 4));
+XAPM_EVENT(read_cnt_5,  READ_TRANSACTION | (5 << 4));
+XAPM_EVENT(write_cnt_6,   WRITE_TRANSACTION | (6 << 4));
+XAPM_EVENT(read_cnt_6,  READ_TRANSACTION | (6 << 4));
+XAPM_EVENT(write_cnt_7,   WRITE_TRANSACTION | (7 << 4));
+XAPM_EVENT(read_cnt_7,  READ_TRANSACTION | (7 << 4));
+
+static struct attribute *xapm_perf_events_attrs_all[17] = {
+	&xapm_perf_event_read_cnt_0.attr.attr,
+	&xapm_perf_event_write_cnt_0.attr.attr,
+	&xapm_perf_event_read_cnt_1.attr.attr,
+	&xapm_perf_event_write_cnt_1.attr.attr,
+	&xapm_perf_event_read_cnt_2.attr.attr,
+	&xapm_perf_event_write_cnt_2.attr.attr,
+	&xapm_perf_event_read_cnt_3.attr.attr,
+	&xapm_perf_event_write_cnt_3.attr.attr,
+	&xapm_perf_event_read_cnt_4.attr.attr,
+	&xapm_perf_event_write_cnt_4.attr.attr,
+	&xapm_perf_event_read_cnt_5.attr.attr,
+	&xapm_perf_event_write_cnt_5.attr.attr,
+	&xapm_perf_event_read_cnt_6.attr.attr,
+	&xapm_perf_event_write_cnt_6.attr.attr,
+	&xapm_perf_event_read_cnt_7.attr.attr,
+	&xapm_perf_event_write_cnt_7.attr.attr,
+	NULL,
+};
+
+static u64 xapm_read_event_counter(struct xapm_perf_priv *priv, u32 event)
+{
+	void __iomem *base = priv->ioaddr;
+	u64 counter;
+	u64 reg;
+
+	priv->counter = event >> 4;
+
+	reg = readl(base + XAPM_MSR_OFFSET);
+	reg =	reg & ~(XAPM_COUNTER_MASK  << priv->counter * XAPM_MSR_COUNTER);
+	reg =	reg | (event) << priv->counter * 8;
+	reg =	reg | (priv->slot) << (priv->counter * 8 + XAPM_SLOTID_SHIFT);
+	writel(reg, base + XAPM_MSR_OFFSET);
+
+	counter = readl(base + XAPM_MCR_OFFSET + (priv->counter * XAPM_MCR_COUNTER));
+
+	return counter;
+}
+
+static int xapm_perf_event_init(struct perf_event *event)
+{
+	struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	void __iomem *base = priv->ioaddr;
+	u32 reg;
+
+	hwc->event_base = get_evtype(event->attr.config);
+	hwc->idx = (int)get_event(event->attr.config);
+
+	reg = readl(base + XAPM_CTRL_OFFSET);
+	reg |= XAPM_GLOBAL_CLK_CNT_EN;
+	reg |= XAPM_METRICS_CNT_EN;
+	writel(reg, base + XAPM_CTRL_OFFSET);
+
+	return 0;
+}
+
+static void xapm_perf_event_update(struct perf_event *event)
+{
+	struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 now, prev, delta;
+
+	now = xapm_read_event_counter(priv, (u32)hwc->idx);
+	prev = local64_read(&hwc->prev_count);
+	delta = now - prev;
+
+	local64_add(delta, &event->count);
+}
+
+static void xapm_perf_event_start(struct perf_event *event, int flags)
+{
+	struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 count;
+
+	count = xapm_read_event_counter(priv, (u32)hwc->idx);
+	local64_set(&hwc->prev_count, count);
+}
+
+static void xapm_perf_event_stop(struct perf_event *event, int flags)
+{
+	xapm_perf_event_update(event);
+}
+
+static int xapm_perf_event_add(struct perf_event *event, int flags)
+{
+	if (flags & PERF_EF_START)
+		xapm_perf_event_start(event, flags);
+
+	return 0;
+}
+
+static void xapm_perf_event_del(struct perf_event *event, int flags)
+{
+	xapm_perf_event_stop(event, PERF_EF_UPDATE);
+}
+
+static void xapm_perf_event_read(struct perf_event *event)
+{
+	xapm_perf_event_update(event);
+}
+
+static int xapm_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+	struct xapm_perf_priv *priv;
+	int target;
+
+	priv = hlist_entry_safe(node, struct xapm_perf_priv, node);
+
+	if (cpu != priv->cpu)
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	priv->cpu = target;
+	return 0;
+}
+
+static int xapm_perf_probe(struct platform_device *pdev)
+{
+	static struct attribute_group xapm_perf_events_group;
+	static struct attribute *xapm_perf_events_attrs[17];
+	struct xapm_perf_priv *priv;
+	void __iomem *baseaddr;
+	struct resource *res;
+	u32 numcounters;
+	struct pmu *pmu;
+	u32 mode = 0;
+	char *name;
+	int ret, i;
+	static const struct attribute_group *xapm_perf_groups[] = {
+		&xapm_perf_format_group,
+		&xapm_perf_cpumask_group,
+		&xapm_perf_events_group,
+		NULL,
+	};
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, priv);
+	baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(baseaddr))
+		return PTR_ERR(baseaddr);
+
+	priv->dev = &pdev->dev;
+	priv->ioaddr = baseaddr;
+
+	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,num-of-counters",
+				   &numcounters);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "no property xlnx,num-of-counters");
+		return ret;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		if (PTR_ERR(priv->clk) != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "axi clock error\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	priv->cpu = raw_smp_processor_id();
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "perf/xapm/xapm:online",
+				      NULL, xapm_perf_offline_cpu);
+	if (ret < 0)
+		return ret;
+
+	priv->cpuhp_state = ret;
+	priv->mode = XAPM_MODE_ADVANCED;
+
+	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,enable-profile", &mode);
+	if (ret < 0)
+		dev_info(&pdev->dev, "no property xlnx,enable-profile\n");
+	else if (mode)
+		priv->mode = XAPM_MODE_PROFILE;
+
+	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,enable-trace", &mode);
+	if (ret < 0)
+		dev_info(&pdev->dev, "no property xlnx,enable-trace\n");
+	else if (mode)
+		priv->mode = XAPM_MODE_TRACE;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		goto cpuhp_instance_err;
+	}
+	/* Register the pmu instance for cpu hotplug */
+	ret = cpuhp_state_add_instance_nocalls(priv->cpuhp_state, &priv->node);
+	if (ret)
+		goto cpuhp_instance_err;
+
+	for (i = 0; i < numcounters * 2; i++)
+		xapm_perf_events_attrs[i] = xapm_perf_events_attrs_all[i];
+	xapm_perf_events_attrs[i] = NULL;
+
+	xapm_perf_events_group.name = "events";
+	xapm_perf_events_group.attrs = xapm_perf_events_attrs;
+
+	pmu = &priv->pmu;
+
+	pmu->task_ctx_nr =	perf_invalid_context;
+	pmu->attr_groups =	xapm_perf_groups;
+	pmu->event_init =	xapm_perf_event_init;
+	pmu->add =		xapm_perf_event_add;
+	pmu->del =		xapm_perf_event_del;
+	pmu->start =		xapm_perf_event_start;
+	pmu->stop =		xapm_perf_event_stop;
+	pmu->read =		xapm_perf_event_read;
+	pmu->capabilities =	PERF_PMU_CAP_NO_INTERRUPT |
+				PERF_PMU_CAP_NO_EXCLUDE;
+
+	ret = of_property_read_u32(pdev->dev.of_node, "xlnx,num-monitor-slots",
+				   &priv->maxslots);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "no property xlnx,num-monitor-slots");
+		return ret;
+	}
+
+	name = devm_kasprintf(priv->dev, GFP_KERNEL, "xapm%llx_counter",
+			      res->start);
+	ret = perf_pmu_register(pmu, name, -1);
+	if (ret)
+		goto pmu_register_err;
+
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	return 0;
+pmu_register_err:
+	cpuhp_state_remove_instance_nocalls(priv->cpuhp_state, &priv->node);
+cpuhp_instance_err:
+	cpuhp_remove_multi_state(priv->cpuhp_state);
+	return ret;
+}
+
+static int xapm_perf_remove(struct platform_device *pdev)
+{
+	struct xapm_perf_priv *priv = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&priv->pmu);
+	cpuhp_state_remove_instance_nocalls(priv->cpuhp_state, &priv->node);
+	clk_disable_unprepare(priv->clk);
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	return 0;
+}
+
+static int __maybe_unused xapm_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct xapm_perf_priv *priv = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(priv->clk);
+	return 0;
+};
+
+static int __maybe_unused xapm_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct xapm_perf_priv *priv = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		return ret;
+	}
+	return 0;
+};
+
+static const struct dev_pm_ops xapm_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(xapm_runtime_suspend, xapm_runtime_resume)
+	SET_RUNTIME_PM_OPS(xapm_runtime_suspend,
+			   xapm_runtime_resume, NULL)
+};
+
+static const struct of_device_id xapm_perf_match[] = {
+	{ .compatible = "xlnx,axi-perf-monitor", },
+	{},
+};
+
+static struct platform_driver xlnx_apm_driver = {
+	.driver = {
+		.name = "xlnx_apm_perf",
+		.of_match_table = xapm_perf_match,
+		.suppress_bind_attrs = true,
+		.pm = &xapm_dev_pm_ops,
+	},
+	.probe = xapm_perf_probe,
+	.remove = xapm_perf_remove,
+};
+builtin_platform_driver(xlnx_apm_driver);