@@ -183,6 +183,17 @@ config APPLE_M1_CPU_PMU
Provides support for the non-architectural CPU PMUs present on
the Apple M1 SoCs and derivatives.
+config XILINX_APM_PMU
+ tristate "Enable PMU support for the Xilinx APM controller"
+ help
+ Enables perf support for the Xilinx Axi Performance Monitor
+ controller.The IP AXI Performance Monitor core measures
+ major performance metrics for the AMBA AXI system. The Performance
+ Monitor measures bus latency of a specific master/slave (AXI4/AXI3/AXI4-Stream/AXI4-Lite)
+ in a system and the amount of memory traffic for specific durations.
+ This core can also be used for real-time profiling for software applications.
+
+
source "drivers/perf/hisilicon/Kconfig"
config MARVELL_CN10K_DDR_PMU
@@ -20,3 +20,4 @@ obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
+obj-$(CONFIG_XILINX_APM_PMU) += xilinx_apm.o
new file mode 100644
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Xilinx APM Performance Reporting
+ *
+ * Copyright 2022 AMD, Inc.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#define XAPM_MODE_ADVANCED 1
+#define XAPM_MODE_PROFILE 2
+#define XAPM_MODE_TRACE 3
+
+#define XAPM_MSR_OFFSET 0x44
+#define XAPM_MSR_COUNTER 8
+#define XAPM_SLOTID_SHIFT 0x5
+#define XAPM_CTRL_OFFSET 0x300
+#define XAPM_MCR_OFFSET 0x100
+#define XAPM_MCR_COUNTER 16
+#define XAPM_RR 0x108
+
+#define XAPM_METRICS_CNT_EN BIT(0)
+#define XAPM_GLOBAL_CLK_CNT_EN BIT(16)
+#define XAPM_COUNTER_MASK 0xF
+#define XAPM_EVENT_MASK GENMASK_ULL(11, 0)
+#define XAPM_EVENT_SHIFT 0
+#define XAPM_EVTYPE_MASK GENMASK_ULL(15, 12)
+#define XAPM_EVTYPE_SHIFT 12
+#define XAPM_EVTYPE_APM 1
+
+#define get_event(_config) FIELD_GET(XAPM_EVENT_MASK, _config)
+#define get_evtype(_config) FIELD_GET(XAPM_EVTYPE_MASK, _config)
+
+#define to_xapm_perf_priv(_pmu) container_of(_pmu, struct xapm_perf_priv, pmu)
+
+#define XAPM_EVENT_CONFIG(_event, _type) \
+ ((void *)((((_event) << XAPM_EVENT_SHIFT) & XAPM_EVENT_MASK) | \
+ (((_type) << XAPM_EVTYPE_SHIFT) & XAPM_EVTYPE_MASK)))
+
+PMU_FORMAT_ATTR(event, "config:0-11");
+
+/*
+ * Performance Counter Registers for APM.
+ *
+ */
+enum metric {
+ WRITE_TRANSACTION = 0,
+ READ_TRANSACTION = 1,
+ WRITE_BYTE = 2,
+ READ_BYTE = 3,
+ WRITE_BEAT = 4,
+ READ_LATENCY = 5,
+ WRITE_LATENCY = 6,
+ SLV_WR_IDLE = 7,
+ MST_RD_IDLE = 8,
+ BVALID = 9,
+ WLASTS = 9,
+ RLASTS = 10,
+ MIN_WR_LATENCY = 11,
+ MAX_WR_LATENCY = 12,
+ MIN_RD_LATENCY = 13,
+ MAX_RD_LATENCY = 14,
+ MAX_METRIC = 15
+};
+
+/**
+ * struct xapm_perf_priv - priv data structure for xapm perf driver
+ *
+ * @dev: parent device.
+ * @ioaddr: mapped base address of DDR region.
+ * @pmu: pmu data structure for xapm perf counters.
+ * @cpu: active CPU to which the PMU is bound for accesses.
+ * @mode: Mode of APM eg advanced/profile/trace .
+ * @counter: current counter.
+ * @slot: current slot to be read.
+ * @range: Range to be allowed.
+ * @node: handle to the xapm node.
+ * @maxslots: Maximum number of slots.
+ * @cpuhp_state: state for CPU hotplug notification.
+ * @clk: clock handle.
+ */
+struct xapm_perf_priv {
+ struct device *dev;
+ void __iomem *ioaddr;
+ struct pmu pmu;
+ unsigned int cpu;
+ u32 mode;
+ u32 counter;
+ u32 slot;
+ u32 maxslots;
+ u32 range;
+ struct hlist_node node;
+ enum cpuhp_state cpuhp_state;
+ struct clk *clk;
+};
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct xapm_perf_priv *priv;
+
+ priv = to_xapm_perf_priv(pmu);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(priv->cpu));
+}
+static DEVICE_ATTR_RO(cpumask);
+
+static ssize_t slot_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int ret;
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct xapm_perf_priv *priv;
+
+ priv = to_xapm_perf_priv(pmu);
+ ret = kstrtou32(buf, 0, &priv->slot);
+ if (ret < 0)
+ return ret;
+
+ if (priv->slot > priv->maxslots)
+ return -EINVAL;
+
+ return size;
+}
+static DEVICE_ATTR_WO(slot);
+
+static ssize_t range_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int ret;
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct xapm_perf_priv *priv;
+
+ priv = to_xapm_perf_priv(pmu);
+ ret = kstrtou32(buf, 0, &priv->range);
+ if (ret < 0)
+ return ret;
+
+ writel(priv->range, priv->ioaddr + XAPM_RR);
+
+ return size;
+}
+static DEVICE_ATTR_WO(range);
+
+static struct attribute *xapm_perf_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ &dev_attr_slot.attr,
+ &dev_attr_range.attr,
+ NULL,
+};
+
+static struct attribute_group xapm_perf_cpumask_group = {
+ .attrs = xapm_perf_cpumask_attrs,
+};
+
+static struct attribute *xapm_perf_format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group xapm_perf_format_group = {
+ .name = "format",
+ .attrs = xapm_perf_format_attrs,
+};
+
+static ssize_t xapm_perf_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dev_ext_attribute *eattr;
+ unsigned long config;
+ char *ptr = buf;
+
+ eattr = container_of(attr, struct dev_ext_attribute, attr);
+ config = (unsigned long)eattr->var;
+
+ ptr += sprintf(ptr, "event=0x%02x\n", (unsigned int)get_event(config));
+
+ return (ssize_t)(ptr - buf);
+}
+
+#define XAPM_EVENT_ATTR(_name) \
+ __ATTR(_name, 0444, xapm_perf_event_show, NULL)
+
+#define XAPM_EVENT(_name, _event) \
+static struct dev_ext_attribute xapm_perf_event_##_name = { \
+ .attr = XAPM_EVENT_ATTR(xapm_##_name), \
+ .var = XAPM_EVENT_CONFIG(_event, XAPM_EVTYPE_APM), \
+}
+
+XAPM_EVENT(write_cnt_0, WRITE_TRANSACTION | (0 << 4));
+XAPM_EVENT(read_cnt_0, READ_TRANSACTION | (0 << 4));
+XAPM_EVENT(write_cnt_1, WRITE_TRANSACTION | (1 << 4));
+XAPM_EVENT(read_cnt_1, READ_TRANSACTION | (1 << 4));
+XAPM_EVENT(write_cnt_2, WRITE_TRANSACTION | (2 << 4));
+XAPM_EVENT(read_cnt_2, READ_TRANSACTION | (2 << 4));
+XAPM_EVENT(write_cnt_3, WRITE_TRANSACTION | (3 << 4));
+XAPM_EVENT(read_cnt_3, READ_TRANSACTION | (3 << 4));
+XAPM_EVENT(write_cnt_4, WRITE_TRANSACTION | (4 << 4));
+XAPM_EVENT(read_cnt_4, READ_TRANSACTION | (4 << 4));
+XAPM_EVENT(write_cnt_5, WRITE_TRANSACTION | (5 << 4));
+XAPM_EVENT(read_cnt_5, READ_TRANSACTION | (5 << 4));
+XAPM_EVENT(write_cnt_6, WRITE_TRANSACTION | (6 << 4));
+XAPM_EVENT(read_cnt_6, READ_TRANSACTION | (6 << 4));
+XAPM_EVENT(write_cnt_7, WRITE_TRANSACTION | (7 << 4));
+XAPM_EVENT(read_cnt_7, READ_TRANSACTION | (7 << 4));
+
+static struct attribute *xapm_perf_events_attrs_all[17] = {
+ &xapm_perf_event_read_cnt_0.attr.attr,
+ &xapm_perf_event_write_cnt_0.attr.attr,
+ &xapm_perf_event_read_cnt_1.attr.attr,
+ &xapm_perf_event_write_cnt_1.attr.attr,
+ &xapm_perf_event_read_cnt_2.attr.attr,
+ &xapm_perf_event_write_cnt_2.attr.attr,
+ &xapm_perf_event_read_cnt_3.attr.attr,
+ &xapm_perf_event_write_cnt_3.attr.attr,
+ &xapm_perf_event_read_cnt_4.attr.attr,
+ &xapm_perf_event_write_cnt_4.attr.attr,
+ &xapm_perf_event_read_cnt_5.attr.attr,
+ &xapm_perf_event_write_cnt_5.attr.attr,
+ &xapm_perf_event_read_cnt_6.attr.attr,
+ &xapm_perf_event_write_cnt_6.attr.attr,
+ &xapm_perf_event_read_cnt_7.attr.attr,
+ &xapm_perf_event_write_cnt_7.attr.attr,
+ NULL,
+};
+
+static u64 xapm_read_event_counter(struct xapm_perf_priv *priv, u32 event)
+{
+ void __iomem *base = priv->ioaddr;
+ u64 counter;
+ u64 reg;
+
+ priv->counter = event >> 4;
+
+ reg = readl(base + XAPM_MSR_OFFSET);
+ reg = reg & ~(XAPM_COUNTER_MASK << priv->counter * XAPM_MSR_COUNTER);
+ reg = reg | (event) << priv->counter * 8;
+ reg = reg | (priv->slot) << (priv->counter * 8 + XAPM_SLOTID_SHIFT);
+ writel(reg, base + XAPM_MSR_OFFSET);
+
+ counter = readl(base + XAPM_MCR_OFFSET + (priv->counter * XAPM_MCR_COUNTER));
+
+ return counter;
+}
+
+static int xapm_perf_event_init(struct perf_event *event)
+{
+ struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ void __iomem *base = priv->ioaddr;
+ u32 reg;
+
+ hwc->event_base = get_evtype(event->attr.config);
+ hwc->idx = (int)get_event(event->attr.config);
+
+ reg = readl(base + XAPM_CTRL_OFFSET);
+ reg |= XAPM_GLOBAL_CLK_CNT_EN;
+ reg |= XAPM_METRICS_CNT_EN;
+ writel(reg, base + XAPM_CTRL_OFFSET);
+
+ return 0;
+}
+
+static void xapm_perf_event_update(struct perf_event *event)
+{
+ struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 now, prev, delta;
+
+ now = xapm_read_event_counter(priv, (u32)hwc->idx);
+ prev = local64_read(&hwc->prev_count);
+ delta = now - prev;
+
+ local64_add(delta, &event->count);
+}
+
+static void xapm_perf_event_start(struct perf_event *event, int flags)
+{
+ struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 count;
+
+ count = xapm_read_event_counter(priv, (u32)hwc->idx);
+ local64_set(&hwc->prev_count, count);
+}
+
+static void xapm_perf_event_stop(struct perf_event *event, int flags)
+{
+ xapm_perf_event_update(event);
+}
+
+static int xapm_perf_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ xapm_perf_event_start(event, flags);
+
+ return 0;
+}
+
+static void xapm_perf_event_del(struct perf_event *event, int flags)
+{
+ xapm_perf_event_stop(event, PERF_EF_UPDATE);
+}
+
+static void xapm_perf_event_read(struct perf_event *event)
+{
+ xapm_perf_event_update(event);
+}
+
+static int xapm_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
+{
+ struct xapm_perf_priv *priv;
+ int target;
+
+ priv = hlist_entry_safe(node, struct xapm_perf_priv, node);
+
+ if (cpu != priv->cpu)
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+ if (target >= nr_cpu_ids)
+ return 0;
+
+ priv->cpu = target;
+ return 0;
+}
+
+static int xapm_perf_probe(struct platform_device *pdev)
+{
+ static struct attribute_group xapm_perf_events_group;
+ static struct attribute *xapm_perf_events_attrs[17];
+ struct xapm_perf_priv *priv;
+ void __iomem *baseaddr;
+ struct resource *res;
+ u32 numcounters;
+ struct pmu *pmu;
+ u32 mode = 0;
+ char *name;
+ int ret, i;
+ static const struct attribute_group *xapm_perf_groups[] = {
+ &xapm_perf_format_group,
+ &xapm_perf_cpumask_group,
+ &xapm_perf_events_group,
+ NULL,
+ };
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, priv);
+ baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+ if (IS_ERR(baseaddr))
+ return PTR_ERR(baseaddr);
+
+ priv->dev = &pdev->dev;
+ priv->ioaddr = baseaddr;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "xlnx,num-of-counters",
+ &numcounters);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "no property xlnx,num-of-counters");
+ return ret;
+ }
+
+ priv->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(priv->clk)) {
+ if (PTR_ERR(priv->clk) != -EPROBE_DEFER)
+ dev_err(&pdev->dev, "axi clock error\n");
+ return PTR_ERR(priv->clk);
+ }
+
+ priv->cpu = raw_smp_processor_id();
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "perf/xapm/xapm:online",
+ NULL, xapm_perf_offline_cpu);
+ if (ret < 0)
+ return ret;
+
+ priv->cpuhp_state = ret;
+ priv->mode = XAPM_MODE_ADVANCED;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "xlnx,enable-profile", &mode);
+ if (ret < 0)
+ dev_info(&pdev->dev, "no property xlnx,enable-profile\n");
+ else if (mode)
+ priv->mode = XAPM_MODE_PROFILE;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "xlnx,enable-trace", &mode);
+ if (ret < 0)
+ dev_info(&pdev->dev, "no property xlnx,enable-trace\n");
+ else if (mode)
+ priv->mode = XAPM_MODE_TRACE;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Unable to enable clock.\n");
+ goto cpuhp_instance_err;
+ }
+ /* Register the pmu instance for cpu hotplug */
+ ret = cpuhp_state_add_instance_nocalls(priv->cpuhp_state, &priv->node);
+ if (ret)
+ goto cpuhp_instance_err;
+
+ for (i = 0; i < numcounters * 2; i++)
+ xapm_perf_events_attrs[i] = xapm_perf_events_attrs_all[i];
+ xapm_perf_events_attrs[i] = NULL;
+
+ xapm_perf_events_group.name = "events";
+ xapm_perf_events_group.attrs = xapm_perf_events_attrs;
+
+ pmu = &priv->pmu;
+
+ pmu->task_ctx_nr = perf_invalid_context;
+ pmu->attr_groups = xapm_perf_groups;
+ pmu->event_init = xapm_perf_event_init;
+ pmu->add = xapm_perf_event_add;
+ pmu->del = xapm_perf_event_del;
+ pmu->start = xapm_perf_event_start;
+ pmu->stop = xapm_perf_event_stop;
+ pmu->read = xapm_perf_event_read;
+ pmu->capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_EXCLUDE;
+
+ ret = of_property_read_u32(pdev->dev.of_node, "xlnx,num-monitor-slots",
+ &priv->maxslots);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "no property xlnx,num-monitor-slots");
+ return ret;
+ }
+
+ name = devm_kasprintf(priv->dev, GFP_KERNEL, "xapm%llx_counter",
+ res->start);
+ ret = perf_pmu_register(pmu, name, -1);
+ if (ret)
+ goto pmu_register_err;
+
+ pm_runtime_get_noresume(&pdev->dev);
+ pm_runtime_set_active(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
+
+ return 0;
+pmu_register_err:
+ cpuhp_state_remove_instance_nocalls(priv->cpuhp_state, &priv->node);
+cpuhp_instance_err:
+ cpuhp_remove_multi_state(priv->cpuhp_state);
+ return ret;
+}
+
+static int xapm_perf_remove(struct platform_device *pdev)
+{
+ struct xapm_perf_priv *priv = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&priv->pmu);
+ cpuhp_state_remove_instance_nocalls(priv->cpuhp_state, &priv->node);
+ clk_disable_unprepare(priv->clk);
+ pm_runtime_disable(&pdev->dev);
+ pm_runtime_set_suspended(&pdev->dev);
+ return 0;
+}
+
+static int __maybe_unused xapm_runtime_suspend(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct xapm_perf_priv *priv = platform_get_drvdata(pdev);
+
+ clk_disable_unprepare(priv->clk);
+ return 0;
+};
+
+static int __maybe_unused xapm_runtime_resume(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct xapm_perf_priv *priv = platform_get_drvdata(pdev);
+ int ret;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret) {
+ dev_err(&pdev->dev, "Unable to enable clock.\n");
+ return ret;
+ }
+ return 0;
+};
+
+static const struct dev_pm_ops xapm_dev_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(xapm_runtime_suspend, xapm_runtime_resume)
+ SET_RUNTIME_PM_OPS(xapm_runtime_suspend,
+ xapm_runtime_resume, NULL)
+};
+
+static const struct of_device_id xapm_perf_match[] = {
+ { .compatible = "xlnx,axi-perf-monitor", },
+ {},
+};
+
+static struct platform_driver xlnx_apm_driver = {
+ .driver = {
+ .name = "xlnx_apm_perf",
+ .of_match_table = xapm_perf_match,
+ .suppress_bind_attrs = true,
+ .pm = &xapm_dev_pm_ops,
+ },
+ .probe = xapm_perf_probe,
+ .remove = xapm_perf_remove,
+};
+builtin_platform_driver(xlnx_apm_driver);
The programmable AXI performance monitors (APM) collect real-time transaction metrics at multiple points on the AXI interconnect to help system software profile real-time activity. In our platform we have it in PL and also some of the hardened instances in PS. Add Xilinx APM driver support. Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com> --- drivers/perf/Kconfig | 11 + drivers/perf/Makefile | 1 + drivers/perf/xilinx_apm.c | 516 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 528 insertions(+) create mode 100644 drivers/perf/xilinx_apm.c