diff mbox

[v5,3/6] perf: hisi: Add support for HiSilicon SoC L3C PMU driver

Message ID 1503389277-134131-4-git-send-email-zhangshaokun@hisilicon.com (mailing list archive)
State New, archived
Headers show

Commit Message

Shaokun Zhang Aug. 22, 2017, 8:07 a.m. UTC
This patch adds support for L3C PMU driver in HiSilicon SoC chip, Each
L3C has own control, counter and interrupt registers and is an separate
PMU. For each L3C PMU, it has 8-programable counters and each counter
is free-running. Interrupt is supported to handle counter (48-bits)
overflow.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Anurup M <anurup.m@huawei.com>
---
 drivers/perf/hisilicon/Makefile              |   2 +-
 drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 482 +++++++++++++++++++++++++++
 include/linux/cpuhotplug.h                   |   1 +
 3 files changed, 484 insertions(+), 1 deletion(-)
 create mode 100644 drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c

Comments

Mark Rutland Oct. 17, 2017, 3:16 p.m. UTC | #1
On Tue, Aug 22, 2017 at 04:07:54PM +0800, Shaokun Zhang wrote:
> +static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu,
> +				 struct platform_device *pdev)
> +{
> +	int irq, ret;
> +
> +	/* Read and init IRQ */
> +	irq = platform_get_irq(pdev, 0);
> +	if (irq < 0) {
> +		dev_err(&pdev->dev, "L3C PMU get irq fail; irq:%d\n", irq);
> +		return irq;
> +	}
> +
> +	ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr,
> +			       IRQF_NOBALANCING | IRQF_NO_THREAD,
> +			       dev_name(&pdev->dev), l3c_pmu);
> +	if (ret < 0) {
> +		dev_err(&pdev->dev,
> +			"Fail to request IRQ:%d ret:%d\n", irq, ret);
> +		return ret;
> +	}
> +
> +	l3c_pmu->irq = irq;
> +
> +	return 0;
> +}
> +
> +/*
> + * Check whether the CPU is associated with this L3C PMU by SCCL_ID
> + * and CCL_ID, if true, set the associated cpumask of the L3C PMU.
> + */
> +static void hisi_l3c_pmu_set_cpumask_by_ccl(void *arg)
> +{
> +	struct hisi_pmu *l3c_pmu = (struct hisi_pmu *)arg;
> +	u32 ccl_id, sccl_id;
> +
> +	hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
> +	if (sccl_id == l3c_pmu->sccl_id && ccl_id == l3c_pmu->ccl_id)
> +		cpumask_set_cpu(smp_processor_id(), &l3c_pmu->associated_cpus);
> +}

The shared code has hisi_uncore_pmu_set_cpumask_by_sccl(), and it would
be nice to place this in the same place.

Otherwise, the same comments apply here.

> +
> +static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
> +	{ "HISI0213", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
> +
> +static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
> +				  struct hisi_pmu *l3c_pmu)
> +{
> +	unsigned long long id;
> +	struct resource *res;
> +	acpi_status status;
> +	int cpu;
> +
> +	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
> +				       "_UID", NULL, &id);
> +	if (ACPI_FAILURE(status))
> +		return -EINVAL;
> +
> +	l3c_pmu->id = id;
> +
> +	/*
> +	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
> +	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
> +	 */
> +	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
> +				     &l3c_pmu->sccl_id)) {
> +		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
> +		return -EINVAL;
> +	}
> +
> +	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
> +				     &l3c_pmu->ccl_id)) {
> +		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
> +		return -EINVAL;
> +	}
> +
> +	/* Initialise the associated cpumask of the PMU */
> +	for_each_present_cpu(cpu)
> +		smp_call_function_single(cpu, hisi_l3c_pmu_set_cpumask_by_ccl,
> +					 (void *)l3c_pmu, 1);

Ah, so that's why hisi_uncore_pmu_set_cpumask_by_sccl took a void
pointer.

Please drop a comment above hisi_uncore_pmu_set_cpumask_by_sccl to cover
that.

I think you can drop the void cast here; I don't beleive it is
necessary.

Rather than a proble-time smp_call_function_single(), can you follow the
qcom l2's approach of associating CPUs with a PMU instance in the
notifier? That will work even if CPUs are brought online very late.

> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(l3c_pmu->base)) {
> +		dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
> +		return PTR_ERR(l3c_pmu->base);
> +	}
> +
> +	return 0;
> +}

Thanks,
Mark.
Shaokun Zhang Oct. 18, 2017, 1:33 p.m. UTC | #2
Hi Mark,

On 2017/10/17 23:16, Mark Rutland wrote:
> On Tue, Aug 22, 2017 at 04:07:54PM +0800, Shaokun Zhang wrote:
>> +static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu,
>> +				 struct platform_device *pdev)
>> +{
>> +	int irq, ret;
>> +
>> +	/* Read and init IRQ */
>> +	irq = platform_get_irq(pdev, 0);
>> +	if (irq < 0) {
>> +		dev_err(&pdev->dev, "L3C PMU get irq fail; irq:%d\n", irq);
>> +		return irq;
>> +	}
>> +
>> +	ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr,
>> +			       IRQF_NOBALANCING | IRQF_NO_THREAD,
>> +			       dev_name(&pdev->dev), l3c_pmu);
>> +	if (ret < 0) {
>> +		dev_err(&pdev->dev,
>> +			"Fail to request IRQ:%d ret:%d\n", irq, ret);
>> +		return ret;
>> +	}
>> +
>> +	l3c_pmu->irq = irq;
>> +
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Check whether the CPU is associated with this L3C PMU by SCCL_ID
>> + * and CCL_ID, if true, set the associated cpumask of the L3C PMU.
>> + */
>> +static void hisi_l3c_pmu_set_cpumask_by_ccl(void *arg)
>> +{
>> +	struct hisi_pmu *l3c_pmu = (struct hisi_pmu *)arg;
>> +	u32 ccl_id, sccl_id;
>> +
>> +	hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
>> +	if (sccl_id == l3c_pmu->sccl_id && ccl_id == l3c_pmu->ccl_id)
>> +		cpumask_set_cpu(smp_processor_id(), &l3c_pmu->associated_cpus);
>> +}
> 
> The shared code has hisi_uncore_pmu_set_cpumask_by_sccl(), and it would
> be nice to place this in the same place.
> 
> Otherwise, the same comments apply here.
> 

Ok, shall fix the same issues.

>> +
>> +static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
>> +	{ "HISI0213", },
>> +	{},
>> +};
>> +MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
>> +
>> +static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
>> +				  struct hisi_pmu *l3c_pmu)
>> +{
>> +	unsigned long long id;
>> +	struct resource *res;
>> +	acpi_status status;
>> +	int cpu;
>> +
>> +	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
>> +				       "_UID", NULL, &id);
>> +	if (ACPI_FAILURE(status))
>> +		return -EINVAL;
>> +
>> +	l3c_pmu->id = id;
>> +
>> +	/*
>> +	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
>> +	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
>> +	 */
>> +	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
>> +				     &l3c_pmu->sccl_id)) {
>> +		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
>> +				     &l3c_pmu->ccl_id)) {
>> +		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	/* Initialise the associated cpumask of the PMU */
>> +	for_each_present_cpu(cpu)
>> +		smp_call_function_single(cpu, hisi_l3c_pmu_set_cpumask_by_ccl,
>> +					 (void *)l3c_pmu, 1);
> 
> Ah, so that's why hisi_uncore_pmu_set_cpumask_by_sccl took a void
> pointer.
> 
> Please drop a comment above hisi_uncore_pmu_set_cpumask_by_sccl to cover
> that.
> 
> I think you can drop the void cast here; I don't beleive it is
> necessary.
> 

Ok.

> Rather than a proble-time smp_call_function_single(), can you follow the
> qcom l2's approach of associating CPUs with a PMU instance in the
> notifier? That will work even if CPUs are brought online very late.
> 

A good guidance, but HHA and DDRC PMUs are different from L3C PMU, the former
share the same SCCL and the latter share the same SCCL and CCL. I will
try to deal with this difference in online notifier.

Thanks,
Shaokun

>> +
>> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
>> +	l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
>> +	if (IS_ERR(l3c_pmu->base)) {
>> +		dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
>> +		return PTR_ERR(l3c_pmu->base);
>> +	}
>> +
>> +	return 0;
>> +}
> 
> Thanks,
> Mark.
> 
> .
>
Mark Rutland Oct. 18, 2017, 1:55 p.m. UTC | #3
On Wed, Oct 18, 2017 at 09:33:30PM +0800, Zhangshaokun wrote:
> On 2017/10/17 23:16, Mark Rutland wrote:
> > On Tue, Aug 22, 2017 at 04:07:54PM +0800, Shaokun Zhang wrote:
> >> +static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
> >> +				  struct hisi_pmu *l3c_pmu)
> >> +{
> >> +	unsigned long long id;
> >> +	struct resource *res;
> >> +	acpi_status status;
> >> +	int cpu;
> >> +
> >> +	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
> >> +				       "_UID", NULL, &id);
> >> +	if (ACPI_FAILURE(status))
> >> +		return -EINVAL;
> >> +
> >> +	l3c_pmu->id = id;
> >> +
> >> +	/*
> >> +	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
> >> +	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
> >> +	 */
> >> +	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
> >> +				     &l3c_pmu->sccl_id)) {
> >> +		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
> >> +		return -EINVAL;
> >> +	}
> >> +
> >> +	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
> >> +				     &l3c_pmu->ccl_id)) {
> >> +		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
> >> +		return -EINVAL;
> >> +	}
> >> +
> >> +	/* Initialise the associated cpumask of the PMU */
> >> +	for_each_present_cpu(cpu)
> >> +		smp_call_function_single(cpu, hisi_l3c_pmu_set_cpumask_by_ccl,
> >> +					 (void *)l3c_pmu, 1);

> > Rather than a proble-time smp_call_function_single(), can you follow the
> > qcom l2's approach of associating CPUs with a PMU instance in the
> > notifier? That will work even if CPUs are brought online very late.
> 
> A good guidance, but HHA and DDRC PMUs are different from L3C PMU, the former
> share the same SCCL and the latter share the same SCCL and CCL. I will
> try to deal with this difference in online notifier.

FWIW, I think it makes sense for each PMU to have its own notifier
(perhaps with some shared code that each calls to do the migration).

I just want to avoid the smp_call_function_single() at probe time, as
that doesn't work in some cases.

Thanks,
Mark.
Shaokun Zhang Oct. 18, 2017, 2:08 p.m. UTC | #4
Hi Mark,

Thanks for your further explanation.

On 2017/10/18 21:55, Mark Rutland wrote:
> On Wed, Oct 18, 2017 at 09:33:30PM +0800, Zhangshaokun wrote:
>> On 2017/10/17 23:16, Mark Rutland wrote:
>>> On Tue, Aug 22, 2017 at 04:07:54PM +0800, Shaokun Zhang wrote:
>>>> +static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
>>>> +				  struct hisi_pmu *l3c_pmu)
>>>> +{
>>>> +	unsigned long long id;
>>>> +	struct resource *res;
>>>> +	acpi_status status;
>>>> +	int cpu;
>>>> +
>>>> +	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
>>>> +				       "_UID", NULL, &id);
>>>> +	if (ACPI_FAILURE(status))
>>>> +		return -EINVAL;
>>>> +
>>>> +	l3c_pmu->id = id;
>>>> +
>>>> +	/*
>>>> +	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
>>>> +	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
>>>> +	 */
>>>> +	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
>>>> +				     &l3c_pmu->sccl_id)) {
>>>> +		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
>>>> +				     &l3c_pmu->ccl_id)) {
>>>> +		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	/* Initialise the associated cpumask of the PMU */
>>>> +	for_each_present_cpu(cpu)
>>>> +		smp_call_function_single(cpu, hisi_l3c_pmu_set_cpumask_by_ccl,
>>>> +					 (void *)l3c_pmu, 1);
> 
>>> Rather than a proble-time smp_call_function_single(), can you follow the
>>> qcom l2's approach of associating CPUs with a PMU instance in the
>>> notifier? That will work even if CPUs are brought online very late.
>>
>> A good guidance, but HHA and DDRC PMUs are different from L3C PMU, the former
>> share the same SCCL and the latter share the same SCCL and CCL. I will
>> try to deal with this difference in online notifier.
> 
> FWIW, I think it makes sense for each PMU to have its own notifier
> (perhaps with some shared code that each calls to do the migration).
> 
> I just want to avoid the smp_call_function_single() at probe time, as
> that doesn't work in some cases.
> 

Got it, i shall update the hisi_pmu::associated_cpus only in online
and offline notifiers.

Thanks,
Shaokun

> Thanks,
> Mark.
> 
> .
>
diff mbox

Patch

diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile
index 2783bb3..4a3d3e6 100644
--- a/drivers/perf/hisilicon/Makefile
+++ b/drivers/perf/hisilicon/Makefile
@@ -1 +1 @@ 
-obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o
+obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
new file mode 100644
index 0000000..db96e7c
--- /dev/null
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -0,0 +1,482 @@ 
+/*
+ * HiSilicon SoC L3C uncore Hardware event counters support
+ *
+ * Copyright (C) 2017 Hisilicon Limited
+ * Author: Anurup M <anurup.m@huawei.com>
+ *         Shaokun Zhang <zhangshaokun@hisilicon.com>
+ *
+ * This code is based on the uncore PMUs like arm-cci and arm-ccn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/bug.h>
+#include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <linux/smp.h>
+
+#include "hisi_uncore_pmu.h"
+
+/* L3C register definition */
+#define L3C_PERF_CTRL		0x0408
+#define L3C_INT_MASK		0x0800
+#define L3C_INT_STATUS		0x0808
+#define L3C_INT_CLEAR		0x080c
+#define L3C_EVENT_CTRL	        0x1c00
+#define L3C_EVENT_TYPE0		0x1d00
+/*
+ * Each counter is 48-bits and [48:63] are reserved
+ * which are Read-As-Zero and Writes-Ignored.
+ */
+#define L3C_CNTR0_LOWER		0x1e00
+
+/* L3C has 8-counters */
+#define L3C_NR_COUNTERS		0x8
+
+#define L3C_PERF_CTRL_EN	0x20000
+#define L3C_EVTYPE_NONE		0xff
+
+/*
+ * Select the counter register offset using the counter index
+ */
+static u32 hisi_l3c_pmu_get_counter_offset(int cntr_idx)
+{
+	return (L3C_CNTR0_LOWER + (cntr_idx * 8));
+}
+
+static u64 hisi_l3c_pmu_read_counter(struct hisi_pmu *l3c_pmu,
+				     struct hw_perf_event *hwc)
+{
+	u32 idx = hwc->idx;
+
+	if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
+		dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return 0;
+	}
+
+	/* Read 64-bits and the upper 16 bits are RAZ */
+	return readq(l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+}
+
+static void hisi_l3c_pmu_write_counter(struct hisi_pmu *l3c_pmu,
+				       struct hw_perf_event *hwc, u64 val)
+{
+	u32 idx = hwc->idx;
+
+	if (!hisi_uncore_pmu_counter_valid(l3c_pmu, idx)) {
+		dev_err(l3c_pmu->dev, "Unsupported event index:%d!\n", idx);
+		return;
+	}
+
+	/* Write 64-bits and the upper 16 bits are WI */
+	writeq(val, l3c_pmu->base + hisi_l3c_pmu_get_counter_offset(idx));
+}
+
+static void hisi_l3c_pmu_write_evtype(struct hisi_pmu *l3c_pmu, int idx,
+				      u32 type)
+{
+	u32 reg, reg_idx, shift, val;
+
+	/*
+	 * Select the appropriate event select register(L3C_EVENT_TYPE0/1).
+	 * There are 2 event select registers for the 8 hardware counters.
+	 * Event code is 8-bits and for the former 4 hardware counters,
+	 * L3C_EVENT_TYPE0 is chosen. For the latter 4 hardware counters,
+	 * L3C_EVENT_TYPE1 is chosen.
+	 */
+	reg = L3C_EVENT_TYPE0 + (idx / 4) * 4;
+	reg_idx = idx % 4;
+	shift = 8 * reg_idx;
+
+	/* Write event code to L3C_EVENT_TYPEx Register */
+	val = readl(l3c_pmu->base + reg);
+	val &= ~(L3C_EVTYPE_NONE << shift);
+	val |= (type << shift);
+	writel(val, l3c_pmu->base + reg);
+}
+
+static void hisi_l3c_pmu_start_counters(struct hisi_pmu *l3c_pmu)
+{
+	u32 val;
+
+	/*
+	 * Set perf_enable bit in L3C_PERF_CTRL register to start counting
+	 * for all enabled counters.
+	 */
+	val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+	val |= L3C_PERF_CTRL_EN;
+	writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_stop_counters(struct hisi_pmu *l3c_pmu)
+{
+	u32 val;
+
+	/*
+	 * Clear perf_enable bit in L3C_PERF_CTRL register to stop counting
+	 * for all enabled counters.
+	 */
+	val = readl(l3c_pmu->base + L3C_PERF_CTRL);
+	val &= ~(L3C_PERF_CTRL_EN);
+	writel(val, l3c_pmu->base + L3C_PERF_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter(struct hisi_pmu *l3c_pmu,
+					struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Enable counter index in L3C_EVENT_CTRL register */
+	val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+	val |= (1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_disable_counter(struct hisi_pmu *l3c_pmu,
+					 struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	/* Clear counter index in L3C_EVENT_CTRL register */
+	val = readl(l3c_pmu->base + L3C_EVENT_CTRL);
+	val &= ~(1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_EVENT_CTRL);
+}
+
+static void hisi_l3c_pmu_enable_counter_int(struct hisi_pmu *l3c_pmu,
+					    struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(l3c_pmu->base + L3C_INT_MASK);
+	/* Write 0 to enable interrupt */
+	val &= ~(1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static void hisi_l3c_pmu_disable_counter_int(struct hisi_pmu *l3c_pmu,
+					     struct hw_perf_event *hwc)
+{
+	u32 val;
+
+	val = readl(l3c_pmu->base + L3C_INT_MASK);
+	/* Write 1 to mask interrupt */
+	val |= (1 << hwc->idx);
+	writel(val, l3c_pmu->base + L3C_INT_MASK);
+}
+
+static irqreturn_t hisi_l3c_pmu_isr(int irq, void *dev_id)
+{
+	struct hisi_pmu *l3c_pmu = dev_id;
+	struct perf_event *event;
+	unsigned long overflown;
+	int idx;
+
+	/* Read L3C_INT_STATUS register */
+	overflown = readl(l3c_pmu->base + L3C_INT_STATUS);
+	if (!overflown)
+		return IRQ_NONE;
+
+	/*
+	 * Find the counter index which overflowed if the bit was set
+	 * and handle it.
+	 */
+	for_each_set_bit(idx, &overflown, L3C_NR_COUNTERS) {
+		/* Write 1 to clear the IRQ status flag */
+		writel((1 << idx), l3c_pmu->base + L3C_INT_CLEAR);
+
+		/* Get the corresponding event struct */
+		event = l3c_pmu->pmu_events.hw_events[idx];
+		if (!event)
+			continue;
+
+		hisi_uncore_pmu_event_update(event);
+		hisi_uncore_pmu_set_event_period(event);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_l3c_pmu_init_irq(struct hisi_pmu *l3c_pmu,
+				 struct platform_device *pdev)
+{
+	int irq, ret;
+
+	/* Read and init IRQ */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "L3C PMU get irq fail; irq:%d\n", irq);
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, hisi_l3c_pmu_isr,
+			       IRQF_NOBALANCING | IRQF_NO_THREAD,
+			       dev_name(&pdev->dev), l3c_pmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Fail to request IRQ:%d ret:%d\n", irq, ret);
+		return ret;
+	}
+
+	l3c_pmu->irq = irq;
+
+	return 0;
+}
+
+/*
+ * Check whether the CPU is associated with this L3C PMU by SCCL_ID
+ * and CCL_ID, if true, set the associated cpumask of the L3C PMU.
+ */
+static void hisi_l3c_pmu_set_cpumask_by_ccl(void *arg)
+{
+	struct hisi_pmu *l3c_pmu = (struct hisi_pmu *)arg;
+	u32 ccl_id, sccl_id;
+
+	hisi_read_sccl_and_ccl_id(&sccl_id, &ccl_id);
+	if (sccl_id == l3c_pmu->sccl_id && ccl_id == l3c_pmu->ccl_id)
+		cpumask_set_cpu(smp_processor_id(), &l3c_pmu->associated_cpus);
+}
+
+static const struct acpi_device_id hisi_l3c_pmu_acpi_match[] = {
+	{ "HISI0213", },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, hisi_l3c_pmu_acpi_match);
+
+static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
+				  struct hisi_pmu *l3c_pmu)
+{
+	unsigned long long id;
+	struct resource *res;
+	acpi_status status;
+	int cpu;
+
+	status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
+				       "_UID", NULL, &id);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	l3c_pmu->id = id;
+
+	/*
+	 * Use the SCCL_ID and CCL_ID to identify the L3C PMU, while
+	 * SCCL_ID is in MPIDR[aff2] and CCL_ID is in MPIDR[aff1].
+	 */
+	if (device_property_read_u32(&pdev->dev, "hisilicon,scl-id",
+				     &l3c_pmu->sccl_id)) {
+		dev_err(&pdev->dev, "Can not read l3c sccl-id!\n");
+		return -EINVAL;
+	}
+
+	if (device_property_read_u32(&pdev->dev, "hisilicon,ccl-id",
+				     &l3c_pmu->ccl_id)) {
+		dev_err(&pdev->dev, "Can not read l3c ccl-id!\n");
+		return -EINVAL;
+	}
+
+	/* Initialise the associated cpumask of the PMU */
+	for_each_present_cpu(cpu)
+		smp_call_function_single(cpu, hisi_l3c_pmu_set_cpumask_by_ccl,
+					 (void *)l3c_pmu, 1);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(l3c_pmu->base)) {
+		dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
+		return PTR_ERR(l3c_pmu->base);
+	}
+
+	return 0;
+}
+
+static struct attribute *hisi_l3c_pmu_format_attr[] = {
+	HISI_PMU_FORMAT_ATTR(event, "config:0-7"),
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_format_group = {
+	.name = "format",
+	.attrs = hisi_l3c_pmu_format_attr,
+};
+
+static struct attribute *hisi_l3c_pmu_events_attr[] = {
+	HISI_PMU_EVENT_ATTR(rd_cpipe,		0x00),
+	HISI_PMU_EVENT_ATTR(wr_cpipe,		0x01),
+	HISI_PMU_EVENT_ATTR(rd_hit_cpipe,	0x02),
+	HISI_PMU_EVENT_ATTR(wr_hit_cpipe,	0x03),
+	HISI_PMU_EVENT_ATTR(victim_num,		0x04),
+	HISI_PMU_EVENT_ATTR(rd_spipe,		0x20),
+	HISI_PMU_EVENT_ATTR(wr_spipe,		0x21),
+	HISI_PMU_EVENT_ATTR(rd_hit_spipe,	0x22),
+	HISI_PMU_EVENT_ATTR(wr_hit_spipe,	0x23),
+	HISI_PMU_EVENT_ATTR(back_invalid,	0x29),
+	HISI_PMU_EVENT_ATTR(retry_cpu,		0x40),
+	HISI_PMU_EVENT_ATTR(retry_ring,		0x41),
+	HISI_PMU_EVENT_ATTR(prefetch_drop,	0x42),
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_events_group = {
+	.name = "events",
+	.attrs = hisi_l3c_pmu_events_attr,
+};
+
+static DEVICE_ATTR(cpumask, 0444, hisi_cpumask_sysfs_show, NULL);
+
+static struct attribute *hisi_l3c_pmu_cpumask_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static const struct attribute_group hisi_l3c_pmu_cpumask_attr_group = {
+	.attrs = hisi_l3c_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *hisi_l3c_pmu_attr_groups[] = {
+	&hisi_l3c_pmu_format_group,
+	&hisi_l3c_pmu_events_group,
+	&hisi_l3c_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static const struct hisi_uncore_ops hisi_uncore_l3c_ops = {
+	.write_evtype		= hisi_l3c_pmu_write_evtype,
+	.get_event_idx		= hisi_uncore_pmu_get_event_idx,
+	.start_counters		= hisi_l3c_pmu_start_counters,
+	.stop_counters		= hisi_l3c_pmu_stop_counters,
+	.enable_counter		= hisi_l3c_pmu_enable_counter,
+	.disable_counter	= hisi_l3c_pmu_disable_counter,
+	.enable_counter_int	= hisi_l3c_pmu_enable_counter_int,
+	.disable_counter_int	= hisi_l3c_pmu_disable_counter_int,
+	.write_counter		= hisi_l3c_pmu_write_counter,
+	.read_counter		= hisi_l3c_pmu_read_counter,
+};
+
+static int hisi_l3c_pmu_dev_probe(struct platform_device *pdev,
+				  struct hisi_pmu *l3c_pmu)
+{
+	int ret;
+
+	ret = hisi_l3c_pmu_init_data(pdev, l3c_pmu);
+	if (ret)
+		return ret;
+
+	ret = hisi_l3c_pmu_init_irq(l3c_pmu, pdev);
+	if (ret)
+		return ret;
+
+	l3c_pmu->num_counters = L3C_NR_COUNTERS;
+	l3c_pmu->counter_bits = 48;
+	l3c_pmu->ops = &hisi_uncore_l3c_ops;
+	l3c_pmu->dev = &pdev->dev;
+	l3c_pmu->on_cpu = -1;
+
+	return 0;
+}
+
+static int hisi_l3c_pmu_probe(struct platform_device *pdev)
+{
+	struct hisi_pmu *l3c_pmu;
+	char *name;
+	int ret;
+
+	l3c_pmu = devm_kzalloc(&pdev->dev, sizeof(*l3c_pmu), GFP_KERNEL);
+	if (!l3c_pmu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, l3c_pmu);
+
+	ret = hisi_l3c_pmu_dev_probe(pdev, l3c_pmu);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_state_add_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				       &l3c_pmu->node);
+	if (ret) {
+		dev_err(&pdev->dev, "Error %d registering hotplug\n", ret);
+		return ret;
+	}
+
+	name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "hisi_sccl%u_l3c%u",
+			      l3c_pmu->sccl_id, l3c_pmu->id);
+	l3c_pmu->pmu = (struct pmu) {
+		.name		= name,
+		.task_ctx_nr	= perf_invalid_context,
+		.event_init	= hisi_uncore_pmu_event_init,
+		.pmu_enable	= hisi_uncore_pmu_enable,
+		.pmu_disable	= hisi_uncore_pmu_disable,
+		.add		= hisi_uncore_pmu_add,
+		.del		= hisi_uncore_pmu_del,
+		.start		= hisi_uncore_pmu_start,
+		.stop		= hisi_uncore_pmu_stop,
+		.read		= hisi_uncore_pmu_read,
+		.attr_groups	= hisi_l3c_pmu_attr_groups,
+	};
+
+	ret = perf_pmu_register(&l3c_pmu->pmu, name, -1);
+	if (ret) {
+		dev_err(l3c_pmu->dev, "L3C PMU register failed!\n");
+		cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+					    &l3c_pmu->node);
+	}
+
+	return ret;
+}
+
+static int hisi_l3c_pmu_remove(struct platform_device *pdev)
+{
+	struct hisi_pmu *l3c_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&l3c_pmu->pmu);
+	cpuhp_state_remove_instance(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				    &l3c_pmu->node);
+
+	return 0;
+}
+
+static struct platform_driver hisi_l3c_pmu_driver = {
+	.driver = {
+		.name = "hisi_l3c_pmu",
+		.acpi_match_table = ACPI_PTR(hisi_l3c_pmu_acpi_match),
+	},
+	.probe = hisi_l3c_pmu_probe,
+	.remove = hisi_l3c_pmu_remove,
+};
+
+static int __init hisi_l3c_pmu_module_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
+				      "AP_PERF_ARM_HISI_L3_ONLINE",
+				      hisi_uncore_pmu_online_cpu,
+				      hisi_uncore_pmu_offline_cpu);
+	if (ret) {
+		pr_err("L3C PMU: Error setup hotplug, ret = %d\n", ret);
+		return ret;
+	}
+
+	ret = platform_driver_register(&hisi_l3c_pmu_driver);
+	if (ret)
+		cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+
+	return ret;
+}
+module_init(hisi_l3c_pmu_module_init);
+
+static void __exit hisi_l3c_pmu_module_exit(void)
+{
+	platform_driver_unregister(&hisi_l3c_pmu_driver);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_HISI_L3_ONLINE);
+}
+module_exit(hisi_l3c_pmu_module_exit);
+
+MODULE_DESCRIPTION("HiSilicon SoC L3C uncore PMU driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Anurup M <anurup.m@huawei.com>");
+MODULE_AUTHOR("Shaokun Zhang <zhangshaokun@hisilicon.com>");
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 82b30e6..6858942 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -134,6 +134,7 @@  enum cpuhp_state {
 	CPUHP_AP_PERF_S390_SF_ONLINE,
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
+	CPUHP_AP_PERF_ARM_HISI_L3_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
 	CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE,