diff mbox

[v2,2/3] arm: exynos: Add MCPM call-back functions

Message ID 1398147435-3122-1-git-send-email-a.kesavan@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Abhilash Kesavan April 22, 2014, 6:17 a.m. UTC
Add machine-dependent MCPM call-backs for Exynos5420. These are used
to power up/down the secondary CPUs during boot, shutdown, s2r and
switching.

Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
---
 arch/arm/mach-exynos/Kconfig       |    8 +
 arch/arm/mach-exynos/Makefile      |    2 +
 arch/arm/mach-exynos/mcpm-exynos.c |  345 ++++++++++++++++++++++++++++++++++++
 arch/arm/mach-exynos/regs-pmu.h    |    2 +
 4 files changed, 357 insertions(+)
 create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c

Comments

Daniel Lezcano April 22, 2014, 11:21 a.m. UTC | #1
On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
> Add machine-dependent MCPM call-backs for Exynos5420. These are used
> to power up/down the secondary CPUs during boot, shutdown, s2r and
> switching.
>
> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
> ---
>   arch/arm/mach-exynos/Kconfig       |    8 +
>   arch/arm/mach-exynos/Makefile      |    2 +
>   arch/arm/mach-exynos/mcpm-exynos.c |  345 ++++++++++++++++++++++++++++++++++++
>   arch/arm/mach-exynos/regs-pmu.h    |    2 +
>   4 files changed, 357 insertions(+)
>   create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c
>
> diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
> index fc8bf18..1602abc 100644
> --- a/arch/arm/mach-exynos/Kconfig
> +++ b/arch/arm/mach-exynos/Kconfig
> @@ -110,4 +110,12 @@ config SOC_EXYNOS5440
>
>   endmenu
>
> +config EXYNOS5420_MCPM
> +	bool "Exynos5420 Multi-Cluster PM support"
> +	depends on MCPM && SOC_EXYNOS5420
> +	select ARM_CCI
> +	help
> +	  This is needed to provide CPU and cluster power management
> +	  on Exynos5420 implementing big.LITTLE.
> +
>   endif
> diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
> index a656dbe..01bc9b9 100644
> --- a/arch/arm/mach-exynos/Makefile
> +++ b/arch/arm/mach-exynos/Makefile
> @@ -29,3 +29,5 @@ obj-$(CONFIG_ARCH_EXYNOS)	+= firmware.o
>
>   plus_sec := $(call as-instr,.arch_extension sec,+sec)
>   AFLAGS_exynos-smc.o		:=-Wa,-march=armv7-a$(plus_sec)
> +
> +obj-$(CONFIG_EXYNOS5420_MCPM)	+= mcpm-exynos.o
> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
> new file mode 100644
> index 0000000..49b9031
> --- /dev/null
> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
> @@ -0,0 +1,345 @@
> +/*
> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
> + *		http://www.samsung.com
> + *
> + * arch/arm/mach-exynos/mcpm-exynos.c
> + *
> + * Based on arch/arm/mach-vexpress/dcscb.c
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/io.h>
> +#include <linux/spinlock.h>
> +#include <linux/uaccess.h>
> +#include <linux/miscdevice.h>
> +#include <linux/fs.h>
> +#include <linux/delay.h>
> +#include <linux/arm-cci.h>
> +
> +#include <asm/mcpm.h>
> +#include <asm/cputype.h>
> +#include <asm/cp15.h>
> +
> +#include <plat/cpu.h>
> +#include "regs-pmu.h"
> +
> +#define EXYNOS5420_CPUS_PER_CLUSTER	4
> +#define EXYNOS5420_NR_CLUSTERS		2
> +
> +/* Secondary CPU entry point */
> +#define REG_ENTRY_ADDR		(S5P_VA_SYSRAM_NS + 0x1C)
> +
> +#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
> +#define EXYNOS_CORE_LOCAL_PWR_DIS		0x0
>
> +#define EXYNOS_ARM_COMMON_CONFIGURATION		S5P_PMUREG(0x2500)
> +#define EXYNOS_ARM_L2_CONFIGURATION		S5P_PMUREG(0x2600)
> +
> +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
> +			(S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
> +#define EXYNOS_ARM_CORE_STATUS(_nr)		\
> +			(S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
> +
> +#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
> +			(EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
> +#define EXYNOS_COMMON_STATUS(_nr)		\
> +			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
> +
> +#define EXYNOS_L2_CONFIGURATION(_nr)		\
> +			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
> +#define EXYNOS_L2_STATUS(_nr)			\
> +			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
> +

Is it possible to share the definition of those macros with the rest of 
the code via functions, so they can be re-used for the other sub-systems 
? eg: https://patches.linaro.org/27798/

> +/*
> + * The common v7_exit_coherency_flush API could not be used because of the
> + * Erratum 799270 workaround. This macro is the same as the common one (in
> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> + */
> +#define exynos_v7_exit_coherency_flush(level) \
> +	asm volatile( \
> +	"stmfd	sp!, {fp, ip}\n\t"\
> +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
> +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
> +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
> +	"isb\n\t"\
> +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
> +	"clrex\n\t"\
> +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
> +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
> +	/* Dummy Load of a device register to avoid Erratum 799270 */ \

Wouldn't make sense to add the erratum in the Kconfig and re-use it in 
the generic v7_exit_coherency_flush macro instead of redefining it ?

> +	"ldr	r4, [%0]\n\t" \
> +	"and	r4, r4, #0\n\t" \
> +	"orr	r0, r0, r4\n\t" \
> +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
> +	"isb\n\t" \
> +	"dsb\n\t" \
> +	"ldmfd	sp!, {fp, ip}" \
> +	: \
> +	: "Ir" (S5P_INFORM0) \
> +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
> +	  "r9", "r10", "lr", "memory")



> +/*
> + * We can't use regular spinlocks. In the switcher case, it is possible
> + * for an outbound CPU to call power_down() after its inbound counterpart
> + * is already live using the same logical CPU number which trips lockdep
> + * debugging.
> + */
> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> +static int
> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
> +
> +static bool exynos_core_power_state(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned int val;
> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> +
> +	val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
> +				EXYNOS_CORE_LOCAL_PWR_EN;
> +	return !!val;
> +}
> +
> +static void exynos_core_power_control(unsigned int cpu, unsigned int cluster,
> +						int enable)
> +{
> +	unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> +
> +	if (exynos_core_power_state(cpu, cluster) == enable)
> +		return;
> +
> +	if (enable)
> +		val = EXYNOS_CORE_LOCAL_PWR_EN;
> +	__raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
> +}

Same comment as above about sharing these functions.

Shouldn't these functions to be reused by hotplug ?

> +static void exynos_cluster_power_control(unsigned int cluster, int enable)
> +{
> +	unsigned long timeout = jiffies + msecs_to_jiffies(20);
> +	unsigned int status, val = EXYNOS_CORE_LOCAL_PWR_DIS;
> +
> +	if (enable)
> +		val = EXYNOS_CORE_LOCAL_PWR_EN;
> +
> +	status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
> +	if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
> +		return;
> +
> +	__raw_writel(val, EXYNOS_COMMON_CONFIGURATION(cluster));
> +	/* Wait until cluster power control is applied */
> +	while (time_before(jiffies, timeout)) {
> +		status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
> +
> +		if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
> +			return;
> +
> +		cpu_relax();
> +	}
> +	pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
> +		enable ? "on" : "off");
> +}
> +
> +static int exynos_power_up(unsigned int cpu, unsigned int cluster)
> +{
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +		cluster >= EXYNOS5420_NR_CLUSTERS)
> +		return -EINVAL;
> +
> +	/*
> +	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
> +	 * variant exists, we need to disable IRQs manually here.
> +	 */
> +	local_irq_disable();
> +	arch_spin_lock(&exynos_mcpm_lock);
> +
> +	cpu_use_count[cpu][cluster]++;
> +	if (cpu_use_count[cpu][cluster] == 1) {
> +		bool was_cluster_down =
> +			__mcpm_cluster_state(cluster) == CLUSTER_DOWN;
> +
> +		/*
> +		 * Turn on the cluster (L2/COMMON) and then power on the cores.
> +		 * TODO: Turn off the clusters when all cores in the cluster
> +		 * are down to achieve significant power savings.
> +		 */
> +		if (was_cluster_down)
> +			exynos_cluster_power_control(cluster, 1);
> +		exynos_core_power_control(cpu, cluster, 1);
> +
> +		/* CPU should be powered up there */
> +		/* Also setup Cluster Power Sequence here */
> +	} else if (cpu_use_count[cpu][cluster] != 2) {
> +		/*
> +		 * The only possible values are:
> +		 * 0 = CPU down
> +		 * 1 = CPU (still) up
> +		 * 2 = CPU requested to be up before it had a chance
> +		 *     to actually make itself down.
> +		 * Any other value is a bug.
> +		 */
> +		BUG();
> +	}
> +
> +	arch_spin_unlock(&exynos_mcpm_lock);
> +	local_irq_enable();
> +
> +	return 0;
> +}
> +
> +static void exynos_power_down(void)
> +{
> +	unsigned int mpidr, cpu, cluster, cpumask;
> +	bool last_man = false, skip_wfi = false;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +	cpumask = (1 << cpu);

This variable is not used.

> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	__mcpm_cpu_going_down(cpu, cluster);
> +
> +	arch_spin_lock(&exynos_mcpm_lock);
> +	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> +	cpu_use_count[cpu][cluster]--;
> +	if (cpu_use_count[cpu][cluster] == 0) {
> +		int cnt = 0, i;
> +
> +		exynos_core_power_control(cpu, cluster, 0);
> +		for (i = 0; i < EXYNOS5420_CPUS_PER_CLUSTER; i++)
> +			cnt += cpu_use_count[i][cluster];
> +		if (cnt == 0)
> +			last_man = true;
> +	} else if (cpu_use_count[cpu][cluster] == 1) {
> +		/*
> +		 * A power_up request went ahead of us.
> +		 * Even if we do not want to shut this CPU down,
> +		 * the caller expects a certain state as if the WFI
> +		 * was aborted.  So let's continue with cache cleaning.
> +		 */
> +		skip_wfi = true;
> +	} else {
> +		BUG();
> +	}
> +
> +	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
> +		arch_spin_unlock(&exynos_mcpm_lock);
> +
> +		/* Flush all cache levels for this cluster. */
> +		exynos_v7_exit_coherency_flush(all);
> +
> +		/*
> +		 * Disable cluster-level coherency by masking
> +		 * incoming snoops and DVM messages:
> +		 */
> +		cci_disable_port_by_cpu(mpidr);
> +
> +		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
> +	} else {
> +		arch_spin_unlock(&exynos_mcpm_lock);
> +
> +		/* Disable and flush the local CPU cache. */
> +		exynos_v7_exit_coherency_flush(louis);
> +	}
> +
> +	__mcpm_cpu_down(cpu, cluster);
> +
> +	/* Now we are prepared for power-down, do it: */
> +	if (!skip_wfi)
> +		wfi();
> +
> +	/* Not dead at this point?  Let our caller cope. */
> +}
> +
> +static int exynos_power_down_finish(unsigned int cpu, unsigned int cluster)
> +{
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	/* Wait for the core state to be OFF */
> +	while (exynos_core_power_state(cpu, cluster) != 0x0)
> +		;

As this is a potential infinite loop, a timeout may be desirable, no ?

> +
> +	return 0; /* success: the CPU is halted */
> +}
> +
> +static const struct mcpm_platform_ops exynos_power_ops = {
> +	.power_up		= exynos_power_up,
> +	.power_down		= exynos_power_down,
> +	.power_down_finish	= exynos_power_down_finish,
> +};
> +
> +static void __init exynos_mcpm_usage_count_init(void)
> +{
> +	unsigned int mpidr, cpu, cluster;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER  ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	cpu_use_count[cpu][cluster] = 1;
> +}
> +
> +/*
> + * Enable cluster-level coherency, in preparation for turning on the MMU.
> + */
> +static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
> +{
> +	asm volatile ("\n"
> +	"cmp	r0, #1\n"
> +	"bxne	lr\n"
> +	"b	cci_enable_port_for_self");
> +}
> +
> +static int __init exynos_mcpm_init(void)
> +{
> +	int ret = 0;
> +
> +	if (!soc_is_exynos5420())
> +		return -ENODEV;
> +
> +	if (!cci_probed())
> +		return -ENODEV;
> +
> +	/*
> +	 * To increase the stability of KFC reset we need to program
> +	 * the PMU SPARE3 register
> +	 */
> +	__raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
> +
> +	exynos_mcpm_usage_count_init();
> +
> +	ret = mcpm_platform_register(&exynos_power_ops);
> +	if (!ret)
> +		ret = mcpm_sync_init(exynos_pm_power_up_setup);
> +	if (ret)
> +		return ret;
> +
> +	mcpm_smp_set_ops();
> +
> +	pr_info("Exynos MCPM support installed\n");
> +
> +	/*
> +	 * Future entries into the kernel can now go
> +	 * through the cluster entry vectors.
> +	 */
> +	__raw_writel(virt_to_phys(mcpm_entry_point),
> +		REG_ENTRY_ADDR);
> +
> +	return ret;
> +}
> +
> +early_initcall(exynos_mcpm_init);
> diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
> index cfbfc575..43fe7a0 100644
> --- a/arch/arm/mach-exynos/regs-pmu.h
> +++ b/arch/arm/mach-exynos/regs-pmu.h
> @@ -38,6 +38,7 @@
>   #define S5P_INFORM5				S5P_PMUREG(0x0814)
>   #define S5P_INFORM6				S5P_PMUREG(0x0818)
>   #define S5P_INFORM7				S5P_PMUREG(0x081C)
> +#define S5P_PMU_SPARE3				S5P_PMUREG(0x090C)
>
>   #define EXYNOS_IROM_DATA2			S5P_PMUREG(0x0988)
>
> @@ -540,5 +541,6 @@
>   #define EXYNOS5420_KFC_USE_STANDBY_WFE3				(1 << 23)
>
>   #define DUR_WAIT_RESET				0xF
> +#define EXYNOS5420_SWRESET_KFC_SEL		0x3
>
>   #endif /* __ASM_ARCH_REGS_PMU_H */
>
Nicolas Pitre April 22, 2014, 3:40 p.m. UTC | #2
On Tue, 22 Apr 2014, Daniel Lezcano wrote:

> On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
> > Add machine-dependent MCPM call-backs for Exynos5420. These are used
> > to power up/down the secondary CPUs during boot, shutdown, s2r and
> > switching.
> >
> > Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> > Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
> > Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
> > Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
> > ---
> >   arch/arm/mach-exynos/Kconfig       |    8 +
> >   arch/arm/mach-exynos/Makefile      |    2 +
> >   arch/arm/mach-exynos/mcpm-exynos.c |  345
> >   ++++++++++++++++++++++++++++++++++++
> >   arch/arm/mach-exynos/regs-pmu.h    |    2 +
> >   4 files changed, 357 insertions(+)
> >   create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c
> >
> > diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
> > index fc8bf18..1602abc 100644
> > --- a/arch/arm/mach-exynos/Kconfig
> > +++ b/arch/arm/mach-exynos/Kconfig
> > @@ -110,4 +110,12 @@ config SOC_EXYNOS5440
> >
> >   endmenu
> >
> > +config EXYNOS5420_MCPM
> > +	bool "Exynos5420 Multi-Cluster PM support"
> > +	depends on MCPM && SOC_EXYNOS5420
> > +	select ARM_CCI
> > +	help
> > +	  This is needed to provide CPU and cluster power management
> > +	  on Exynos5420 implementing big.LITTLE.
> > +
> >   endif
> > diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
> > index a656dbe..01bc9b9 100644
> > --- a/arch/arm/mach-exynos/Makefile
> > +++ b/arch/arm/mach-exynos/Makefile
> > @@ -29,3 +29,5 @@ obj-$(CONFIG_ARCH_EXYNOS)	+= firmware.o
> >
> >   plus_sec := $(call as-instr,.arch_extension sec,+sec)
> >   AFLAGS_exynos-smc.o		:=-Wa,-march=armv7-a$(plus_sec)
> > +
> > +obj-$(CONFIG_EXYNOS5420_MCPM)	+= mcpm-exynos.o
> > diff --git a/arch/arm/mach-exynos/mcpm-exynos.c
> > b/arch/arm/mach-exynos/mcpm-exynos.c
> > new file mode 100644
> > index 0000000..49b9031
> > --- /dev/null
> > +++ b/arch/arm/mach-exynos/mcpm-exynos.c
> > @@ -0,0 +1,345 @@
> > +/*
> > + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
> > + *		http://www.samsung.com
> > + *
> > + * arch/arm/mach-exynos/mcpm-exynos.c
> > + *
> > + * Based on arch/arm/mach-vexpress/dcscb.c
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + */
> > +
> > +#include <linux/kernel.h>
> > +#include <linux/io.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/uaccess.h>
> > +#include <linux/miscdevice.h>
> > +#include <linux/fs.h>
> > +#include <linux/delay.h>
> > +#include <linux/arm-cci.h>
> > +
> > +#include <asm/mcpm.h>
> > +#include <asm/cputype.h>
> > +#include <asm/cp15.h>
> > +
> > +#include <plat/cpu.h>
> > +#include "regs-pmu.h"
> > +
> > +#define EXYNOS5420_CPUS_PER_CLUSTER	4
> > +#define EXYNOS5420_NR_CLUSTERS		2
> > +
> > +/* Secondary CPU entry point */
> > +#define REG_ENTRY_ADDR		(S5P_VA_SYSRAM_NS + 0x1C)
> > +
> > +#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
> > +#define EXYNOS_CORE_LOCAL_PWR_DIS		0x0
> >
> > +#define EXYNOS_ARM_COMMON_CONFIGURATION		S5P_PMUREG(0x2500)
> > +#define EXYNOS_ARM_L2_CONFIGURATION		S5P_PMUREG(0x2600)
> > +
> > +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
> > +			(S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
> > +#define EXYNOS_ARM_CORE_STATUS(_nr)		\
> > +			(S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
> > +
> > +#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
> > +			(EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
> > +#define EXYNOS_COMMON_STATUS(_nr)		\
> > +			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
> > +
> > +#define EXYNOS_L2_CONFIGURATION(_nr)		\
> > +			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
> > +#define EXYNOS_L2_STATUS(_nr)			\
> > +			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
> > +
> 
> Is it possible to share the definition of those macros with the rest of the
> code via functions, so they can be re-used for the other sub-systems ? eg:
> https://patches.linaro.org/27798/

This patch is incompatible with MCPM.  A proper idle driver on top of 
MCPM is required instead.  That's the role of MCPM i.e. arbitrate power 
requests between different requestors, including hotplug, cpuidle, and 
the b.L switcher in some cases.

> > +/*
> > + * The common v7_exit_coherency_flush API could not be used because of the
> > + * Erratum 799270 workaround. This macro is the same as the common one (in
> > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> > + */
> > +#define exynos_v7_exit_coherency_flush(level) \
> > +	asm volatile( \
> > +	"stmfd	sp!, {fp, ip}\n\t"\
> > +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
> > +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
> > +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
> > +	"isb\n\t"\
> > +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
> > +	"clrex\n\t"\
> > +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
> > +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
> > +	/* Dummy Load of a device register to avoid Erratum 799270 */ \
> 
> Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
> generic v7_exit_coherency_flush macro instead of redefining it ?

The implementation of the erratum (the dummy device register load) is 
platform specific I'm afraid.

Is TC2 also concerned by this erratum, or is this Samsung specific?

> > +	"ldr	r4, [%0]\n\t" \
> > +	"and	r4, r4, #0\n\t" \
> > +	"orr	r0, r0, r4\n\t" \
> > +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
> > +	"isb\n\t" \
> > +	"dsb\n\t" \
> > +	"ldmfd	sp!, {fp, ip}" \
> > +	: \
> > +	: "Ir" (S5P_INFORM0) \
> > +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
> > +	  "r9", "r10", "lr", "memory")
> 
> 
> 
> > +/*
> > + * We can't use regular spinlocks. In the switcher case, it is possible
> > + * for an outbound CPU to call power_down() after its inbound counterpart
> > + * is already live using the same logical CPU number which trips lockdep
> > + * debugging.
> > + */
> > +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> > +static int
> > +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
> > +
> > +static bool exynos_core_power_state(unsigned int cpu, unsigned int cluster)
> > +{
> > +	unsigned int val;
> > +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> > +
> > +	val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
> > +				EXYNOS_CORE_LOCAL_PWR_EN;
> > +	return !!val;
> > +}
> > +
> > +static void exynos_core_power_control(unsigned int cpu, unsigned int
> > cluster,
> > +						int enable)
> > +{
> > +	unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
> > +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> > +
> > +	if (exynos_core_power_state(cpu, cluster) == enable)
> > +		return;
> > +
> > +	if (enable)
> > +		val = EXYNOS_CORE_LOCAL_PWR_EN;
> > +	__raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
> > +}
> 
> Same comment as above about sharing these functions.
> 
> Shouldn't these functions to be reused by hotplug ?

This _is_ hotplug.  Once this MCPM backend is registered, CPU hotplug is 
automatically available.  See arch/arm/common/mcpm_platsmp.c.


Nicolas
Nicolas Pitre April 22, 2014, 7:18 p.m. UTC | #3
On Tue, 22 Apr 2014, Abhilash Kesavan wrote:

> Add machine-dependent MCPM call-backs for Exynos5420. These are used
> to power up/down the secondary CPUs during boot, shutdown, s2r and
> switching.
> 
> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>

Getting there!  See comments below.

[...]
> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
> new file mode 100644
> index 0000000..49b9031
> --- /dev/null
> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
> @@ -0,0 +1,345 @@
> +/*
> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
> + *		http://www.samsung.com
> + *
> + * arch/arm/mach-exynos/mcpm-exynos.c
> + *
> + * Based on arch/arm/mach-vexpress/dcscb.c
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/io.h>
> +#include <linux/spinlock.h>
> +#include <linux/uaccess.h>
> +#include <linux/miscdevice.h>
> +#include <linux/fs.h>
> +#include <linux/delay.h>
> +#include <linux/arm-cci.h>
> +
> +#include <asm/mcpm.h>
> +#include <asm/cputype.h>
> +#include <asm/cp15.h>
> +
> +#include <plat/cpu.h>
> +#include "regs-pmu.h"
> +
> +#define EXYNOS5420_CPUS_PER_CLUSTER	4
> +#define EXYNOS5420_NR_CLUSTERS		2
> +
> +/* Secondary CPU entry point */
> +#define REG_ENTRY_ADDR		(S5P_VA_SYSRAM_NS + 0x1C)
> +
> +#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
> +#define EXYNOS_CORE_LOCAL_PWR_DIS		0x0
> +
> +#define EXYNOS_ARM_COMMON_CONFIGURATION		S5P_PMUREG(0x2500)
> +#define EXYNOS_ARM_L2_CONFIGURATION		S5P_PMUREG(0x2600)
> +
> +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
> +			(S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
> +#define EXYNOS_ARM_CORE_STATUS(_nr)		\
> +			(S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
> +
> +#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
> +			(EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
> +#define EXYNOS_COMMON_STATUS(_nr)		\
> +			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
> +
> +#define EXYNOS_L2_CONFIGURATION(_nr)		\
> +			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
> +#define EXYNOS_L2_STATUS(_nr)			\
> +			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
> +
> +/*
> + * The common v7_exit_coherency_flush API could not be used because of the
> + * Erratum 799270 workaround. This macro is the same as the common one (in
> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> + */
> +#define exynos_v7_exit_coherency_flush(level) \
> +	asm volatile( \
> +	"stmfd	sp!, {fp, ip}\n\t"\
> +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
> +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
> +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
> +	"isb\n\t"\
> +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
> +	"clrex\n\t"\
> +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
> +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
> +	/* Dummy Load of a device register to avoid Erratum 799270 */ \
> +	"ldr	r4, [%0]\n\t" \
> +	"and	r4, r4, #0\n\t" \
> +	"orr	r0, r0, r4\n\t" \
> +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
> +	"isb\n\t" \
> +	"dsb\n\t" \
> +	"ldmfd	sp!, {fp, ip}" \
> +	: \
> +	: "Ir" (S5P_INFORM0) \
> +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
> +	  "r9", "r10", "lr", "memory")
> +
> +/*
> + * We can't use regular spinlocks. In the switcher case, it is possible
> + * for an outbound CPU to call power_down() after its inbound counterpart
> + * is already live using the same logical CPU number which trips lockdep
> + * debugging.
> + */
> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> +static int
> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
> +
> +static bool exynos_core_power_state(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned int val;
> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> +
> +	val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
> +				EXYNOS_CORE_LOCAL_PWR_EN;
> +	return !!val;
> +}
> +
> +static void exynos_core_power_control(unsigned int cpu, unsigned int cluster,
> +						int enable)
> +{
> +	unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> +
> +	if (exynos_core_power_state(cpu, cluster) == enable)
> +		return;

I think this isn't right.

Normally, the power _status_ and the power _control_ are not always in 
sync.  It is possible for the power control to be set to OFF but the
status won't reflect that until the CPU has executed WFI for example.

Here we don't care about the actual status.  What matters is the 
control.  In the exynos_power_up case, if the control bit says "power 
off" but the status bit says "it isn't off yet" then the code won't turn 
the control bit back on.

For example, it should be possible for CPU1 to call exynos_power_up() 
while CPU0 is still executing code in exynos_power_down() right after 
releasing exynos_mcpm_lock but before WFI is executed.  There is a cache 
flush during that window which might take some time to execute, making 
this scenario more likely that you might think.

What we want here is simply to set the power control bit back on.  No 
need to test its previous status as the value of cpu_use_count already 
reflects it.  And if the affected CPU didn't reach WFI yet, then the 
execution of WFI will simply return and the CPU will be soft rebooted.

And in the exynos_power_down() case the CPU turning the control bit off 
is always doing so for itself, therefore its status bit must obviously 
show that it is running.  Testing it is therefore redundant.

Therefore you should be able to simplify this greatly.

> +	if (enable)
> +		val = EXYNOS_CORE_LOCAL_PWR_EN;
> +	__raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
> +}
> +
> +static void exynos_cluster_power_control(unsigned int cluster, int enable)
> +{
> +	unsigned long timeout = jiffies + msecs_to_jiffies(20);
> +	unsigned int status, val = EXYNOS_CORE_LOCAL_PWR_DIS;
> +
> +	if (enable)
> +		val = EXYNOS_CORE_LOCAL_PWR_EN;
> +
> +	status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
> +	if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
> +		return;

Same comment as above.  You shouldn't need to check current status 
before adjusting the wanted control.  If you get here that's because the 
control is never what you want it to be.

> +	__raw_writel(val, EXYNOS_COMMON_CONFIGURATION(cluster));
> +	/* Wait until cluster power control is applied */
> +	while (time_before(jiffies, timeout)) {
> +		status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
> +
> +		if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
> +			return;
> +
> +		cpu_relax();
> +	}

As mentioned in a previous email, it is possible for the CPU executing 
this code to be the only CPU currently alive.  And in this code path 
IRQs are disabled.  That means nothing will ever increase jiffies in 
that case and the timeout will never expire.

If in practice there is only a few loops to perform here, I'd suggest 
simply looping, say, 100 times and bail out if that fails.

Oh and if the poll loop fails then you must turn the power control back 
off and return an error via exynos_power_up().

> +	pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
> +		enable ? "on" : "off");
> +}
> +
> +static int exynos_power_up(unsigned int cpu, unsigned int cluster)
> +{
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +		cluster >= EXYNOS5420_NR_CLUSTERS)
> +		return -EINVAL;
> +
> +	/*
> +	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
> +	 * variant exists, we need to disable IRQs manually here.
> +	 */
> +	local_irq_disable();
> +	arch_spin_lock(&exynos_mcpm_lock);
> +
> +	cpu_use_count[cpu][cluster]++;
> +	if (cpu_use_count[cpu][cluster] == 1) {
> +		bool was_cluster_down =
> +			__mcpm_cluster_state(cluster) == CLUSTER_DOWN;
> +
> +		/*
> +		 * Turn on the cluster (L2/COMMON) and then power on the cores.
> +		 * TODO: Turn off the clusters when all cores in the cluster
> +		 * are down to achieve significant power savings.
> +		 */
> +		if (was_cluster_down)
> +			exynos_cluster_power_control(cluster, 1);
> +		exynos_core_power_control(cpu, cluster, 1);
> +
> +		/* CPU should be powered up there */
> +		/* Also setup Cluster Power Sequence here */
> +	} else if (cpu_use_count[cpu][cluster] != 2) {
> +		/*
> +		 * The only possible values are:
> +		 * 0 = CPU down
> +		 * 1 = CPU (still) up
> +		 * 2 = CPU requested to be up before it had a chance
> +		 *     to actually make itself down.
> +		 * Any other value is a bug.
> +		 */
> +		BUG();
> +	}
> +
> +	arch_spin_unlock(&exynos_mcpm_lock);
> +	local_irq_enable();
> +
> +	return 0;
> +}
> +
> +static void exynos_power_down(void)
> +{
> +	unsigned int mpidr, cpu, cluster, cpumask;
> +	bool last_man = false, skip_wfi = false;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +	cpumask = (1 << cpu);
> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	__mcpm_cpu_going_down(cpu, cluster);
> +
> +	arch_spin_lock(&exynos_mcpm_lock);
> +	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> +	cpu_use_count[cpu][cluster]--;
> +	if (cpu_use_count[cpu][cluster] == 0) {
> +		int cnt = 0, i;
> +
> +		exynos_core_power_control(cpu, cluster, 0);
> +		for (i = 0; i < EXYNOS5420_CPUS_PER_CLUSTER; i++)
> +			cnt += cpu_use_count[i][cluster];
> +		if (cnt == 0)
> +			last_man = true;
> +	} else if (cpu_use_count[cpu][cluster] == 1) {
> +		/*
> +		 * A power_up request went ahead of us.
> +		 * Even if we do not want to shut this CPU down,
> +		 * the caller expects a certain state as if the WFI
> +		 * was aborted.  So let's continue with cache cleaning.
> +		 */
> +		skip_wfi = true;
> +	} else {
> +		BUG();
> +	}
> +
> +	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
> +		arch_spin_unlock(&exynos_mcpm_lock);
> +
> +		/* Flush all cache levels for this cluster. */
> +		exynos_v7_exit_coherency_flush(all);
> +
> +		/*
> +		 * Disable cluster-level coherency by masking
> +		 * incoming snoops and DVM messages:
> +		 */
> +		cci_disable_port_by_cpu(mpidr);
> +
> +		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
> +	} else {
> +		arch_spin_unlock(&exynos_mcpm_lock);
> +
> +		/* Disable and flush the local CPU cache. */
> +		exynos_v7_exit_coherency_flush(louis);
> +	}
> +
> +	__mcpm_cpu_down(cpu, cluster);
> +
> +	/* Now we are prepared for power-down, do it: */
> +	if (!skip_wfi)
> +		wfi();
> +
> +	/* Not dead at this point?  Let our caller cope. */
> +}
> +
> +static int exynos_power_down_finish(unsigned int cpu, unsigned int cluster)
> +{
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	/* Wait for the core state to be OFF */
> +	while (exynos_core_power_state(cpu, cluster) != 0x0)
> +		;
> +

*Here* is the place to actually establish a timeout.  Please see 
tc2_pm_power_down_finish() for example.

Also beware that this method is going to be renamed once RMK applies the 
following patch:

http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=8029/1





> +	return 0; /* success: the CPU is halted */
> +}
> +
> +static const struct mcpm_platform_ops exynos_power_ops = {
> +	.power_up		= exynos_power_up,
> +	.power_down		= exynos_power_down,
> +	.power_down_finish	= exynos_power_down_finish,
> +};
> +
> +static void __init exynos_mcpm_usage_count_init(void)
> +{
> +	unsigned int mpidr, cpu, cluster;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER  ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	cpu_use_count[cpu][cluster] = 1;
> +}
> +
> +/*
> + * Enable cluster-level coherency, in preparation for turning on the MMU.
> + */
> +static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
> +{
> +	asm volatile ("\n"
> +	"cmp	r0, #1\n"
> +	"bxne	lr\n"
> +	"b	cci_enable_port_for_self");
> +}
> +
> +static int __init exynos_mcpm_init(void)
> +{
> +	int ret = 0;
> +
> +	if (!soc_is_exynos5420())
> +		return -ENODEV;
> +
> +	if (!cci_probed())
> +		return -ENODEV;
> +
> +	/*
> +	 * To increase the stability of KFC reset we need to program
> +	 * the PMU SPARE3 register
> +	 */
> +	__raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
> +
> +	exynos_mcpm_usage_count_init();
> +
> +	ret = mcpm_platform_register(&exynos_power_ops);
> +	if (!ret)
> +		ret = mcpm_sync_init(exynos_pm_power_up_setup);
> +	if (ret)
> +		return ret;
> +
> +	mcpm_smp_set_ops();
> +
> +	pr_info("Exynos MCPM support installed\n");
> +
> +	/*
> +	 * Future entries into the kernel can now go
> +	 * through the cluster entry vectors.
> +	 */
> +	__raw_writel(virt_to_phys(mcpm_entry_point),
> +		REG_ENTRY_ADDR);
> +
> +	return ret;
> +}
> +
> +early_initcall(exynos_mcpm_init);
> diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
> index cfbfc575..43fe7a0 100644
> --- a/arch/arm/mach-exynos/regs-pmu.h
> +++ b/arch/arm/mach-exynos/regs-pmu.h
> @@ -38,6 +38,7 @@
>  #define S5P_INFORM5				S5P_PMUREG(0x0814)
>  #define S5P_INFORM6				S5P_PMUREG(0x0818)
>  #define S5P_INFORM7				S5P_PMUREG(0x081C)
> +#define S5P_PMU_SPARE3				S5P_PMUREG(0x090C)
>  
>  #define EXYNOS_IROM_DATA2			S5P_PMUREG(0x0988)
>  
> @@ -540,5 +541,6 @@
>  #define EXYNOS5420_KFC_USE_STANDBY_WFE3				(1 << 23)
>  
>  #define DUR_WAIT_RESET				0xF
> +#define EXYNOS5420_SWRESET_KFC_SEL		0x3
>  
>  #endif /* __ASM_ARCH_REGS_PMU_H */
> -- 
> 1.7.9.5
>
Daniel Lezcano April 22, 2014, 7:21 p.m. UTC | #4
On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
> On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>
>> On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
>>> Add machine-dependent MCPM call-backs for Exynos5420. These are used
>>> to power up/down the secondary CPUs during boot, shutdown, s2r and
>>> switching.
>>>
>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
>>> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
>>> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
>>> ---
>>>    arch/arm/mach-exynos/Kconfig       |    8 +
>>>    arch/arm/mach-exynos/Makefile      |    2 +
>>>    arch/arm/mach-exynos/mcpm-exynos.c |  345
>>>    ++++++++++++++++++++++++++++++++++++
>>>    arch/arm/mach-exynos/regs-pmu.h    |    2 +
>>>    4 files changed, 357 insertions(+)
>>>    create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c
>>>
>>> diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
>>> index fc8bf18..1602abc 100644
>>> --- a/arch/arm/mach-exynos/Kconfig
>>> +++ b/arch/arm/mach-exynos/Kconfig
>>> @@ -110,4 +110,12 @@ config SOC_EXYNOS5440
>>>
>>>    endmenu
>>>
>>> +config EXYNOS5420_MCPM
>>> +	bool "Exynos5420 Multi-Cluster PM support"
>>> +	depends on MCPM && SOC_EXYNOS5420
>>> +	select ARM_CCI
>>> +	help
>>> +	  This is needed to provide CPU and cluster power management
>>> +	  on Exynos5420 implementing big.LITTLE.
>>> +
>>>    endif
>>> diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
>>> index a656dbe..01bc9b9 100644
>>> --- a/arch/arm/mach-exynos/Makefile
>>> +++ b/arch/arm/mach-exynos/Makefile
>>> @@ -29,3 +29,5 @@ obj-$(CONFIG_ARCH_EXYNOS)	+= firmware.o
>>>
>>>    plus_sec := $(call as-instr,.arch_extension sec,+sec)
>>>    AFLAGS_exynos-smc.o		:=-Wa,-march=armv7-a$(plus_sec)
>>> +
>>> +obj-$(CONFIG_EXYNOS5420_MCPM)	+= mcpm-exynos.o
>>> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c
>>> b/arch/arm/mach-exynos/mcpm-exynos.c
>>> new file mode 100644
>>> index 0000000..49b9031
>>> --- /dev/null
>>> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
>>> @@ -0,0 +1,345 @@
>>> +/*
>>> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
>>> + *		http://www.samsung.com
>>> + *
>>> + * arch/arm/mach-exynos/mcpm-exynos.c
>>> + *
>>> + * Based on arch/arm/mach-vexpress/dcscb.c
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License version 2 as
>>> + * published by the Free Software Foundation.
>>> + */
>>> +
>>> +#include <linux/kernel.h>
>>> +#include <linux/io.h>
>>> +#include <linux/spinlock.h>
>>> +#include <linux/uaccess.h>
>>> +#include <linux/miscdevice.h>
>>> +#include <linux/fs.h>
>>> +#include <linux/delay.h>
>>> +#include <linux/arm-cci.h>
>>> +
>>> +#include <asm/mcpm.h>
>>> +#include <asm/cputype.h>
>>> +#include <asm/cp15.h>
>>> +
>>> +#include <plat/cpu.h>
>>> +#include "regs-pmu.h"
>>> +
>>> +#define EXYNOS5420_CPUS_PER_CLUSTER	4
>>> +#define EXYNOS5420_NR_CLUSTERS		2
>>> +
>>> +/* Secondary CPU entry point */
>>> +#define REG_ENTRY_ADDR		(S5P_VA_SYSRAM_NS + 0x1C)
>>> +
>>> +#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
>>> +#define EXYNOS_CORE_LOCAL_PWR_DIS		0x0
>>>
>>> +#define EXYNOS_ARM_COMMON_CONFIGURATION		S5P_PMUREG(0x2500)
>>> +#define EXYNOS_ARM_L2_CONFIGURATION		S5P_PMUREG(0x2600)
>>> +
>>> +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
>>> +			(S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
>>> +#define EXYNOS_ARM_CORE_STATUS(_nr)		\
>>> +			(S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
>>> +
>>> +#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
>>> +			(EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
>>> +#define EXYNOS_COMMON_STATUS(_nr)		\
>>> +			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
>>> +
>>> +#define EXYNOS_L2_CONFIGURATION(_nr)		\
>>> +			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
>>> +#define EXYNOS_L2_STATUS(_nr)			\
>>> +			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
>>> +
>>
>> Is it possible to share the definition of those macros with the rest of the
>> code via functions, so they can be re-used for the other sub-systems ? eg:
>> https://patches.linaro.org/27798/
>
> This patch is incompatible with MCPM.  A proper idle driver on top of
> MCPM is required instead.  That's the role of MCPM i.e. arbitrate power
> requests between different requestors, including hotplug, cpuidle, and
> the b.L switcher in some cases.

The driver is resulting from some empirical testing, so I am not sure 
yet MCPM will work. I will give it a try and I will be happy to convert 
to MCPM if possible.

>>> +/*
>>> + * The common v7_exit_coherency_flush API could not be used because of the
>>> + * Erratum 799270 workaround. This macro is the same as the common one (in
>>> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>>> + */
>>> +#define exynos_v7_exit_coherency_flush(level) \
>>> +	asm volatile( \
>>> +	"stmfd	sp!, {fp, ip}\n\t"\
>>> +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
>>> +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
>>> +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
>>> +	"isb\n\t"\
>>> +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
>>> +	"clrex\n\t"\
>>> +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
>>> +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
>>> +	/* Dummy Load of a device register to avoid Erratum 799270 */ \
>>
>> Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
>> generic v7_exit_coherency_flush macro instead of redefining it ?
>
> The implementation of the erratum (the dummy device register load) is
> platform specific I'm afraid.
>
> Is TC2 also concerned by this erratum, or is this Samsung specific?

Sounds like it is ARM Cortex-A15MP specific:

http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf

Page 31.

>>> +	"ldr	r4, [%0]\n\t" \
>>> +	"and	r4, r4, #0\n\t" \
>>> +	"orr	r0, r0, r4\n\t" \
>>> +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
>>> +	"isb\n\t" \
>>> +	"dsb\n\t" \
>>> +	"ldmfd	sp!, {fp, ip}" \
>>> +	: \
>>> +	: "Ir" (S5P_INFORM0) \
>>> +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
>>> +	  "r9", "r10", "lr", "memory")
>>
>>
>>
>>> +/*
>>> + * We can't use regular spinlocks. In the switcher case, it is possible
>>> + * for an outbound CPU to call power_down() after its inbound counterpart
>>> + * is already live using the same logical CPU number which trips lockdep
>>> + * debugging.
>>> + */
>>> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
>>> +static int
>>> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
>>> +
>>> +static bool exynos_core_power_state(unsigned int cpu, unsigned int cluster)
>>> +{
>>> +	unsigned int val;
>>> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
>>> +
>>> +	val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
>>> +				EXYNOS_CORE_LOCAL_PWR_EN;
>>> +	return !!val;
>>> +}
>>> +
>>> +static void exynos_core_power_control(unsigned int cpu, unsigned int
>>> cluster,
>>> +						int enable)
>>> +{
>>> +	unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
>>> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
>>> +
>>> +	if (exynos_core_power_state(cpu, cluster) == enable)
>>> +		return;
>>> +
>>> +	if (enable)
>>> +		val = EXYNOS_CORE_LOCAL_PWR_EN;
>>> +	__raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
>>> +}
>>
>> Same comment as above about sharing these functions.
>>
>> Shouldn't these functions to be reused by hotplug ?
>
> This _is_ hotplug.  Once this MCPM backend is registered, CPU hotplug is
> automatically available.  See arch/arm/common/mcpm_platsmp.c.

Sorry, I think I was not clear. What I meant is : couldn't these macros 
be converted to wrapper functions (powerdown, powerup, powerstate, etc 
...) and be generic for the entire EXYNOS arch, so the different sub 
systems can reuse it (eg. hotplug.c) instead of defining macros + 
read_raw + writel_raw all over the place (eg. hotplug.c + smp.c + mcpm [ 
+ exynos4 dual cpus for cpuidle ]).

Thanks

   -- Daniel
Nicolas Pitre April 22, 2014, 7:56 p.m. UTC | #5
[ Moved Lorenzo up in the addressee list to get his attention ]

On Tue, 22 Apr 2014, Daniel Lezcano wrote:

> On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
> > On Tue, 22 Apr 2014, Daniel Lezcano wrote:
> >
> > > On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
> > > > +/*
> > > > + * The common v7_exit_coherency_flush API could not be used because of the
> > > > + * Erratum 799270 workaround. This macro is the same as the common one (in
> > > > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> > > > + */
> > > > +#define exynos_v7_exit_coherency_flush(level) \
> > > > +	asm volatile( \
> > > > +	"stmfd	sp!, {fp, ip}\n\t"\
> > > > +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
> > > > +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
> > > > +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
> > > > +	"isb\n\t"\
> > > > +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
> > > > +	"clrex\n\t"\
> > > > +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
> > > > +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
> > > > +	/* Dummy Load of a device register to avoid Erratum 799270 */ \
> > >
> > > Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
> > > generic v7_exit_coherency_flush macro instead of redefining it ?
> >
> > The implementation of the erratum (the dummy device register load) is
> > platform specific I'm afraid.
> >
> > Is TC2 also concerned by this erratum, or is this Samsung specific?
> 
> Sounds like it is ARM Cortex-A15MP specific:
> 
> http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf
> 
> Page 31.

The condition for hitting the erratum is: "The L2 cache block has been 
idle for 256 or more cycles with no memory requests from any core, no 
external snoops, and no ACP requests".

Given that the operation that almost immediately precedes the 
problematic mcr is a call to v7_flush_dcache_louis or 
v7_flush_dcache_all, is this possible that the condition for the erratum 
could ever be met?

If no then we should document that v7_exit_coherency_flush(() is safe 
against erratum 799270 and use it here.

> > > > +	"ldr	r4, [%0]\n\t" \
> > > > +	"and	r4, r4, #0\n\t" \
> > > > +	"orr	r0, r0, r4\n\t" \
> > > > +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
> > > > +	"isb\n\t" \
> > > > +	"dsb\n\t" \
> > > > +	"ldmfd	sp!, {fp, ip}" \
> > > > +	: \
> > > > +	: "Ir" (S5P_INFORM0) \
> > > > +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
> > > > +	  "r9", "r10", "lr", "memory")
> > >
Abhilash Kesavan April 23, 2014, 3:31 p.m. UTC | #6
Hi Daniel,
On Tue, Apr 22, 2014 at 4:51 PM, Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
> On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
>>
>> Add machine-dependent MCPM call-backs for Exynos5420. These are used
>> to power up/down the secondary CPUs during boot, shutdown, s2r and
>> switching.
>>
>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
>> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
>> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
>> ---
>>   arch/arm/mach-exynos/Kconfig       |    8 +
>>   arch/arm/mach-exynos/Makefile      |    2 +
>>   arch/arm/mach-exynos/mcpm-exynos.c |  345
>> ++++++++++++++++++++++++++++++++++++
>>   arch/arm/mach-exynos/regs-pmu.h    |    2 +
>>   4 files changed, 357 insertions(+)
>>   create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c
>>
>> diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
>> index fc8bf18..1602abc 100644
>> --- a/arch/arm/mach-exynos/Kconfig
>> +++ b/arch/arm/mach-exynos/Kconfig
>> @@ -110,4 +110,12 @@ config SOC_EXYNOS5440
>>
>>   endmenu
>>
>> +config EXYNOS5420_MCPM
>> +       bool "Exynos5420 Multi-Cluster PM support"
>> +       depends on MCPM && SOC_EXYNOS5420
>> +       select ARM_CCI
>> +       help
>> +         This is needed to provide CPU and cluster power management
>> +         on Exynos5420 implementing big.LITTLE.
>> +
>>   endif
>> diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
>> index a656dbe..01bc9b9 100644
>> --- a/arch/arm/mach-exynos/Makefile
>> +++ b/arch/arm/mach-exynos/Makefile
>> @@ -29,3 +29,5 @@ obj-$(CONFIG_ARCH_EXYNOS)     += firmware.o
>>
>>   plus_sec := $(call as-instr,.arch_extension sec,+sec)
>>   AFLAGS_exynos-smc.o           :=-Wa,-march=armv7-a$(plus_sec)
>> +
>> +obj-$(CONFIG_EXYNOS5420_MCPM)  += mcpm-exynos.o
>> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c
>> b/arch/arm/mach-exynos/mcpm-exynos.c
>> new file mode 100644
>> index 0000000..49b9031
>> --- /dev/null
>> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
>> @@ -0,0 +1,345 @@
>> +/*
>> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
>> + *             http://www.samsung.com
>> + *
>> + * arch/arm/mach-exynos/mcpm-exynos.c
>> + *
>> + * Based on arch/arm/mach-vexpress/dcscb.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/io.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/fs.h>
>> +#include <linux/delay.h>
>> +#include <linux/arm-cci.h>
>> +
>> +#include <asm/mcpm.h>
>> +#include <asm/cputype.h>
>> +#include <asm/cp15.h>
>> +
>> +#include <plat/cpu.h>
>> +#include "regs-pmu.h"
>> +
>> +#define EXYNOS5420_CPUS_PER_CLUSTER    4
>> +#define EXYNOS5420_NR_CLUSTERS         2
>> +
>> +/* Secondary CPU entry point */
>> +#define REG_ENTRY_ADDR         (S5P_VA_SYSRAM_NS + 0x1C)
>> +
>> +#define EXYNOS_CORE_LOCAL_PWR_EN               0x3
>> +#define EXYNOS_CORE_LOCAL_PWR_DIS              0x0
>>
>> +#define EXYNOS_ARM_COMMON_CONFIGURATION                S5P_PMUREG(0x2500)
>> +#define EXYNOS_ARM_L2_CONFIGURATION            S5P_PMUREG(0x2600)
>> +
>> +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)     \
>> +                       (S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
>> +#define EXYNOS_ARM_CORE_STATUS(_nr)            \
>> +                       (S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
>> +
>> +#define EXYNOS_COMMON_CONFIGURATION(_nr)       \
>> +                       (EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
>> +#define EXYNOS_COMMON_STATUS(_nr)              \
>> +                       (EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
>> +
>> +#define EXYNOS_L2_CONFIGURATION(_nr)           \
>> +                       (EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
>> +#define EXYNOS_L2_STATUS(_nr)                  \
>> +                       (EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
>> +
>
>
> Is it possible to share the definition of those macros with the rest of the
> code via functions, so they can be re-used for the other sub-systems ? eg:
> https://patches.linaro.org/27798/
OK..I will work on making wrapper functions for these. Would these new
functions be better placed in the mcpm code or the pm code ?
>
>
>> +/*
>> + * The common v7_exit_coherency_flush API could not be used because of
>> the
>> + * Erratum 799270 workaround. This macro is the same as the common one
>> (in
>> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>> + */
>> +#define exynos_v7_exit_coherency_flush(level) \
>> +       asm volatile( \
>> +       "stmfd  sp!, {fp, ip}\n\t"\
>> +       "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
>> +       "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
>> +       "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
>> +       "isb\n\t"\
>> +       "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
>> +       "clrex\n\t"\
>> +       "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
>> +       "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
>> +       /* Dummy Load of a device register to avoid Erratum 799270 */ \
>
>
> Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
> generic v7_exit_coherency_flush macro instead of redefining it ?
I am re-looking at the necessity of this after Nicolas' comments.
>
>
>> +       "ldr    r4, [%0]\n\t" \
>> +       "and    r4, r4, #0\n\t" \
>> +       "orr    r0, r0, r4\n\t" \
>> +       "mcr    p15, 0, r0, c1, c0, 1   @ set ACTLR\n\t" \
>> +       "isb\n\t" \
>> +       "dsb\n\t" \
>> +       "ldmfd  sp!, {fp, ip}" \
>> +       : \
>> +       : "Ir" (S5P_INFORM0) \
>> +       : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
>> +         "r9", "r10", "lr", "memory")
>
>
>
>
>> +/*
>> + * We can't use regular spinlocks. In the switcher case, it is possible
>> + * for an outbound CPU to call power_down() after its inbound counterpart
>> + * is already live using the same logical CPU number which trips lockdep
>> + * debugging.
>> + */
>> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
>> +static int
>> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
>> +
>> +static bool exynos_core_power_state(unsigned int cpu, unsigned int
>> cluster)
>> +{
>> +       unsigned int val;
>> +       unsigned int cpunr = cpu + (cluster *
>> EXYNOS5420_CPUS_PER_CLUSTER);
>> +
>> +       val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
>> +                               EXYNOS_CORE_LOCAL_PWR_EN;
>> +       return !!val;
>> +}
>> +
>> +static void exynos_core_power_control(unsigned int cpu, unsigned int
>> cluster,
>> +                                               int enable)
>> +{
>> +       unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
>> +       unsigned int cpunr = cpu + (cluster *
>> EXYNOS5420_CPUS_PER_CLUSTER);
>> +
>> +       if (exynos_core_power_state(cpu, cluster) == enable)
>> +               return;
>> +
>> +       if (enable)
>> +               val = EXYNOS_CORE_LOCAL_PWR_EN;
>> +       __raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
>> +}
>
>
> Same comment as above about sharing these functions.
>
> Shouldn't these functions to be reused by hotplug ?
>
>
>> +static void exynos_cluster_power_control(unsigned int cluster, int
>> enable)
>> +{
>> +       unsigned long timeout = jiffies + msecs_to_jiffies(20);
>> +       unsigned int status, val = EXYNOS_CORE_LOCAL_PWR_DIS;
>> +
>> +       if (enable)
>> +               val = EXYNOS_CORE_LOCAL_PWR_EN;
>> +
>> +       status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
>> +       if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
>> +               return;
>> +
>> +       __raw_writel(val, EXYNOS_COMMON_CONFIGURATION(cluster));
>> +       /* Wait until cluster power control is applied */
>> +       while (time_before(jiffies, timeout)) {
>> +               status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
>> +
>> +               if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
>> +                       return;
>> +
>> +               cpu_relax();
>> +       }
>> +       pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
>> +               enable ? "on" : "off");
>> +}
>> +
>> +static int exynos_power_up(unsigned int cpu, unsigned int cluster)
>> +{
>> +       pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +       if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +               cluster >= EXYNOS5420_NR_CLUSTERS)
>> +               return -EINVAL;
>> +
>> +       /*
>> +        * Since this is called with IRQs enabled, and no
>> arch_spin_lock_irq
>> +        * variant exists, we need to disable IRQs manually here.
>> +        */
>> +       local_irq_disable();
>> +       arch_spin_lock(&exynos_mcpm_lock);
>> +
>> +       cpu_use_count[cpu][cluster]++;
>> +       if (cpu_use_count[cpu][cluster] == 1) {
>> +               bool was_cluster_down =
>> +                       __mcpm_cluster_state(cluster) == CLUSTER_DOWN;
>> +
>> +               /*
>> +                * Turn on the cluster (L2/COMMON) and then power on the
>> cores.
>> +                * TODO: Turn off the clusters when all cores in the
>> cluster
>> +                * are down to achieve significant power savings.
>> +                */
>> +               if (was_cluster_down)
>> +                       exynos_cluster_power_control(cluster, 1);
>> +               exynos_core_power_control(cpu, cluster, 1);
>> +
>> +               /* CPU should be powered up there */
>> +               /* Also setup Cluster Power Sequence here */
>> +       } else if (cpu_use_count[cpu][cluster] != 2) {
>> +               /*
>> +                * The only possible values are:
>> +                * 0 = CPU down
>> +                * 1 = CPU (still) up
>> +                * 2 = CPU requested to be up before it had a chance
>> +                *     to actually make itself down.
>> +                * Any other value is a bug.
>> +                */
>> +               BUG();
>> +       }
>> +
>> +       arch_spin_unlock(&exynos_mcpm_lock);
>> +       local_irq_enable();
>> +
>> +       return 0;
>> +}
>> +
>> +static void exynos_power_down(void)
>> +{
>> +       unsigned int mpidr, cpu, cluster, cpumask;
>> +       bool last_man = false, skip_wfi = false;
>> +
>> +       mpidr = read_cpuid_mpidr();
>> +       cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +       cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +       cpumask = (1 << cpu);
>
>
> This variable is not used.
Will remove.
>
>
>> +
>> +       pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +       BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +                       cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +       __mcpm_cpu_going_down(cpu, cluster);
>> +
>> +       arch_spin_lock(&exynos_mcpm_lock);
>> +       BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
>> +       cpu_use_count[cpu][cluster]--;
>> +       if (cpu_use_count[cpu][cluster] == 0) {
>> +               int cnt = 0, i;
>> +
>> +               exynos_core_power_control(cpu, cluster, 0);
>> +               for (i = 0; i < EXYNOS5420_CPUS_PER_CLUSTER; i++)
>> +                       cnt += cpu_use_count[i][cluster];
>> +               if (cnt == 0)
>> +                       last_man = true;
>> +       } else if (cpu_use_count[cpu][cluster] == 1) {
>> +               /*
>> +                * A power_up request went ahead of us.
>> +                * Even if we do not want to shut this CPU down,
>> +                * the caller expects a certain state as if the WFI
>> +                * was aborted.  So let's continue with cache cleaning.
>> +                */
>> +               skip_wfi = true;
>> +       } else {
>> +               BUG();
>> +       }
>> +
>> +       if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
>> +               arch_spin_unlock(&exynos_mcpm_lock);
>> +
>> +               /* Flush all cache levels for this cluster. */
>> +               exynos_v7_exit_coherency_flush(all);
>> +
>> +               /*
>> +                * Disable cluster-level coherency by masking
>> +                * incoming snoops and DVM messages:
>> +                */
>> +               cci_disable_port_by_cpu(mpidr);
>> +
>> +               __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
>> +       } else {
>> +               arch_spin_unlock(&exynos_mcpm_lock);
>> +
>> +               /* Disable and flush the local CPU cache. */
>> +               exynos_v7_exit_coherency_flush(louis);
>> +       }
>> +
>> +       __mcpm_cpu_down(cpu, cluster);
>> +
>> +       /* Now we are prepared for power-down, do it: */
>> +       if (!skip_wfi)
>> +               wfi();
>> +
>> +       /* Not dead at this point?  Let our caller cope. */
>> +}
>> +
>> +static int exynos_power_down_finish(unsigned int cpu, unsigned int
>> cluster)
>> +{
>> +       pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +       BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +                       cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +       /* Wait for the core state to be OFF */
>> +       while (exynos_core_power_state(cpu, cluster) != 0x0)
>> +               ;
>
>
> As this is a potential infinite loop, a timeout may be desirable, no ?
Will be re-working this function.
>
>
>> +
>> +       return 0; /* success: the CPU is halted */
>> +}
>> +
>> +static const struct mcpm_platform_ops exynos_power_ops = {
>> +       .power_up               = exynos_power_up,
>> +       .power_down             = exynos_power_down,
>> +       .power_down_finish      = exynos_power_down_finish,
>> +};
>> +
>> +static void __init exynos_mcpm_usage_count_init(void)
>> +{
>> +       unsigned int mpidr, cpu, cluster;
>> +
>> +       mpidr = read_cpuid_mpidr();
>> +       cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +       cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +
>> +       pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +       BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER  ||
>> +                       cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +       cpu_use_count[cpu][cluster] = 1;
>> +}
>> +
>> +/*
>> + * Enable cluster-level coherency, in preparation for turning on the MMU.
>> + */
>> +static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
>> +{
>> +       asm volatile ("\n"
>> +       "cmp    r0, #1\n"
>> +       "bxne   lr\n"
>> +       "b      cci_enable_port_for_self");
>> +}
>> +
>> +static int __init exynos_mcpm_init(void)
>> +{
>> +       int ret = 0;
>> +
>> +       if (!soc_is_exynos5420())
>> +               return -ENODEV;
>> +
>> +       if (!cci_probed())
>> +               return -ENODEV;
>> +
>> +       /*
>> +        * To increase the stability of KFC reset we need to program
>> +        * the PMU SPARE3 register
>> +        */
>> +       __raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
>> +
>> +       exynos_mcpm_usage_count_init();
>> +
>> +       ret = mcpm_platform_register(&exynos_power_ops);
>> +       if (!ret)
>> +               ret = mcpm_sync_init(exynos_pm_power_up_setup);
>> +       if (ret)
>> +               return ret;
>> +
>> +       mcpm_smp_set_ops();
>> +
>> +       pr_info("Exynos MCPM support installed\n");
>> +
>> +       /*
>> +        * Future entries into the kernel can now go
>> +        * through the cluster entry vectors.
>> +        */
>> +       __raw_writel(virt_to_phys(mcpm_entry_point),
>> +               REG_ENTRY_ADDR);
>> +
>> +       return ret;
>> +}
>> +
>> +early_initcall(exynos_mcpm_init);
>> diff --git a/arch/arm/mach-exynos/regs-pmu.h
>> b/arch/arm/mach-exynos/regs-pmu.h
>> index cfbfc575..43fe7a0 100644
>> --- a/arch/arm/mach-exynos/regs-pmu.h
>> +++ b/arch/arm/mach-exynos/regs-pmu.h
>> @@ -38,6 +38,7 @@
>>   #define S5P_INFORM5                           S5P_PMUREG(0x0814)
>>   #define S5P_INFORM6                           S5P_PMUREG(0x0818)
>>   #define S5P_INFORM7                           S5P_PMUREG(0x081C)
>> +#define S5P_PMU_SPARE3                         S5P_PMUREG(0x090C)
>>
>>   #define EXYNOS_IROM_DATA2                     S5P_PMUREG(0x0988)
>>
>> @@ -540,5 +541,6 @@
>>   #define EXYNOS5420_KFC_USE_STANDBY_WFE3                               (1
>> << 23)
>>
>>   #define DUR_WAIT_RESET                                0xF
>> +#define EXYNOS5420_SWRESET_KFC_SEL             0x3
>>
>>   #endif /* __ASM_ARCH_REGS_PMU_H */
>>
>
>
> --
>  <http://www.linaro.org/> Linaro.org ? Open source software for ARM SoCs
>
> Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
> <http://twitter.com/#!/linaroorg> Twitter |
> <http://www.linaro.org/linaro-blog/> Blog
>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Abhilash Kesavan April 23, 2014, 3:31 p.m. UTC | #7
Hi Nicolas,

On Wed, Apr 23, 2014 at 12:48 AM, Nicolas Pitre
<nicolas.pitre@linaro.org> wrote:
> On Tue, 22 Apr 2014, Abhilash Kesavan wrote:
>
>> Add machine-dependent MCPM call-backs for Exynos5420. These are used
>> to power up/down the secondary CPUs during boot, shutdown, s2r and
>> switching.
>>
>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
>> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
>> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
>
> Getting there!  See comments below.
>
> [...]
>> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
>> new file mode 100644
>> index 0000000..49b9031
>> --- /dev/null
>> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
>> @@ -0,0 +1,345 @@
>> +/*
>> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
>> + *           http://www.samsung.com
>> + *
>> + * arch/arm/mach-exynos/mcpm-exynos.c
>> + *
>> + * Based on arch/arm/mach-vexpress/dcscb.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/kernel.h>
>> +#include <linux/io.h>
>> +#include <linux/spinlock.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/miscdevice.h>
>> +#include <linux/fs.h>
>> +#include <linux/delay.h>
>> +#include <linux/arm-cci.h>
>> +
>> +#include <asm/mcpm.h>
>> +#include <asm/cputype.h>
>> +#include <asm/cp15.h>
>> +
>> +#include <plat/cpu.h>
>> +#include "regs-pmu.h"
>> +
>> +#define EXYNOS5420_CPUS_PER_CLUSTER  4
>> +#define EXYNOS5420_NR_CLUSTERS               2
>> +
>> +/* Secondary CPU entry point */
>> +#define REG_ENTRY_ADDR               (S5P_VA_SYSRAM_NS + 0x1C)
>> +
>> +#define EXYNOS_CORE_LOCAL_PWR_EN             0x3
>> +#define EXYNOS_CORE_LOCAL_PWR_DIS            0x0
>> +
>> +#define EXYNOS_ARM_COMMON_CONFIGURATION              S5P_PMUREG(0x2500)
>> +#define EXYNOS_ARM_L2_CONFIGURATION          S5P_PMUREG(0x2600)
>> +
>> +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)   \
>> +                     (S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
>> +#define EXYNOS_ARM_CORE_STATUS(_nr)          \
>> +                     (S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
>> +
>> +#define EXYNOS_COMMON_CONFIGURATION(_nr)     \
>> +                     (EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
>> +#define EXYNOS_COMMON_STATUS(_nr)            \
>> +                     (EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
>> +
>> +#define EXYNOS_L2_CONFIGURATION(_nr)         \
>> +                     (EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
>> +#define EXYNOS_L2_STATUS(_nr)                        \
>> +                     (EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
>> +
>> +/*
>> + * The common v7_exit_coherency_flush API could not be used because of the
>> + * Erratum 799270 workaround. This macro is the same as the common one (in
>> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>> + */
>> +#define exynos_v7_exit_coherency_flush(level) \
>> +     asm volatile( \
>> +     "stmfd  sp!, {fp, ip}\n\t"\
>> +     "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
>> +     "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
>> +     "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
>> +     "isb\n\t"\
>> +     "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
>> +     "clrex\n\t"\
>> +     "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
>> +     "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
>> +     /* Dummy Load of a device register to avoid Erratum 799270 */ \
>> +     "ldr    r4, [%0]\n\t" \
>> +     "and    r4, r4, #0\n\t" \
>> +     "orr    r0, r0, r4\n\t" \
>> +     "mcr    p15, 0, r0, c1, c0, 1   @ set ACTLR\n\t" \
>> +     "isb\n\t" \
>> +     "dsb\n\t" \
>> +     "ldmfd  sp!, {fp, ip}" \
>> +     : \
>> +     : "Ir" (S5P_INFORM0) \
>> +     : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
>> +       "r9", "r10", "lr", "memory")
>> +
>> +/*
>> + * We can't use regular spinlocks. In the switcher case, it is possible
>> + * for an outbound CPU to call power_down() after its inbound counterpart
>> + * is already live using the same logical CPU number which trips lockdep
>> + * debugging.
>> + */
>> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
>> +static int
>> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
>> +
>> +static bool exynos_core_power_state(unsigned int cpu, unsigned int cluster)
>> +{
>> +     unsigned int val;
>> +     unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
>> +
>> +     val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
>> +                             EXYNOS_CORE_LOCAL_PWR_EN;
>> +     return !!val;
>> +}
>> +
>> +static void exynos_core_power_control(unsigned int cpu, unsigned int cluster,
>> +                                             int enable)
>> +{
>> +     unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
>> +     unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
>> +
>> +     if (exynos_core_power_state(cpu, cluster) == enable)
>> +             return;
>
> I think this isn't right.
>
> Normally, the power _status_ and the power _control_ are not always in
> sync.  It is possible for the power control to be set to OFF but the
> status won't reflect that until the CPU has executed WFI for example.
>
> Here we don't care about the actual status.  What matters is the
> control.  In the exynos_power_up case, if the control bit says "power
> off" but the status bit says "it isn't off yet" then the code won't turn
> the control bit back on.
>
> For example, it should be possible for CPU1 to call exynos_power_up()
> while CPU0 is still executing code in exynos_power_down() right after
> releasing exynos_mcpm_lock but before WFI is executed.  There is a cache
> flush during that window which might take some time to execute, making
> this scenario more likely that you might think.
>
> What we want here is simply to set the power control bit back on.  No
> need to test its previous status as the value of cpu_use_count already
> reflects it.  And if the affected CPU didn't reach WFI yet, then the
> execution of WFI will simply return and the CPU will be soft rebooted.
>
> And in the exynos_power_down() case the CPU turning the control bit off
> is always doing so for itself, therefore its status bit must obviously
> show that it is running.  Testing it is therefore redundant.
>
> Therefore you should be able to simplify this greatly.
Thanks for the detailed explanation. I will remove the status checks
and only activate the control bits.
>
>> +     if (enable)
>> +             val = EXYNOS_CORE_LOCAL_PWR_EN;
>> +     __raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
>> +}
>> +
>> +static void exynos_cluster_power_control(unsigned int cluster, int enable)
>> +{
>> +     unsigned long timeout = jiffies + msecs_to_jiffies(20);
>> +     unsigned int status, val = EXYNOS_CORE_LOCAL_PWR_DIS;
>> +
>> +     if (enable)
>> +             val = EXYNOS_CORE_LOCAL_PWR_EN;
>> +
>> +     status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
>> +     if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
>> +             return;
>
> Same comment as above.  You shouldn't need to check current status
> before adjusting the wanted control.  If you get here that's because the
> control is never what you want it to be.
OK.
>
>> +     __raw_writel(val, EXYNOS_COMMON_CONFIGURATION(cluster));
>> +     /* Wait until cluster power control is applied */
>> +     while (time_before(jiffies, timeout)) {
>> +             status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
>> +
>> +             if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
>> +                     return;
>> +
>> +             cpu_relax();
>> +     }
>
> As mentioned in a previous email, it is possible for the CPU executing
> this code to be the only CPU currently alive.  And in this code path
> IRQs are disabled.  That means nothing will ever increase jiffies in
> that case and the timeout will never expire.
>
> If in practice there is only a few loops to perform here, I'd suggest
> simply looping, say, 100 times and bail out if that fails.
>
> Oh and if the poll loop fails then you must turn the power control back
> off and return an error via exynos_power_up().
Will change the polling loop and add error handling in case of failure..
>
>> +     pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
>> +             enable ? "on" : "off");
>> +}
>> +
>> +static int exynos_power_up(unsigned int cpu, unsigned int cluster)
>> +{
>> +     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +     if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +             cluster >= EXYNOS5420_NR_CLUSTERS)
>> +             return -EINVAL;
>> +
>> +     /*
>> +      * Since this is called with IRQs enabled, and no arch_spin_lock_irq
>> +      * variant exists, we need to disable IRQs manually here.
>> +      */
>> +     local_irq_disable();
>> +     arch_spin_lock(&exynos_mcpm_lock);
>> +
>> +     cpu_use_count[cpu][cluster]++;
>> +     if (cpu_use_count[cpu][cluster] == 1) {
>> +             bool was_cluster_down =
>> +                     __mcpm_cluster_state(cluster) == CLUSTER_DOWN;
>> +
>> +             /*
>> +              * Turn on the cluster (L2/COMMON) and then power on the cores.
>> +              * TODO: Turn off the clusters when all cores in the cluster
>> +              * are down to achieve significant power savings.
>> +              */
>> +             if (was_cluster_down)
>> +                     exynos_cluster_power_control(cluster, 1);
>> +             exynos_core_power_control(cpu, cluster, 1);
>> +
>> +             /* CPU should be powered up there */
>> +             /* Also setup Cluster Power Sequence here */
>> +     } else if (cpu_use_count[cpu][cluster] != 2) {
>> +             /*
>> +              * The only possible values are:
>> +              * 0 = CPU down
>> +              * 1 = CPU (still) up
>> +              * 2 = CPU requested to be up before it had a chance
>> +              *     to actually make itself down.
>> +              * Any other value is a bug.
>> +              */
>> +             BUG();
>> +     }
>> +
>> +     arch_spin_unlock(&exynos_mcpm_lock);
>> +     local_irq_enable();
>> +
>> +     return 0;
>> +}
>> +
>> +static void exynos_power_down(void)
>> +{
>> +     unsigned int mpidr, cpu, cluster, cpumask;
>> +     bool last_man = false, skip_wfi = false;
>> +
>> +     mpidr = read_cpuid_mpidr();
>> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +     cpumask = (1 << cpu);
>> +
>> +     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +     BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +                     cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +     __mcpm_cpu_going_down(cpu, cluster);
>> +
>> +     arch_spin_lock(&exynos_mcpm_lock);
>> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
>> +     cpu_use_count[cpu][cluster]--;
>> +     if (cpu_use_count[cpu][cluster] == 0) {
>> +             int cnt = 0, i;
>> +
>> +             exynos_core_power_control(cpu, cluster, 0);
>> +             for (i = 0; i < EXYNOS5420_CPUS_PER_CLUSTER; i++)
>> +                     cnt += cpu_use_count[i][cluster];
>> +             if (cnt == 0)
>> +                     last_man = true;
>> +     } else if (cpu_use_count[cpu][cluster] == 1) {
>> +             /*
>> +              * A power_up request went ahead of us.
>> +              * Even if we do not want to shut this CPU down,
>> +              * the caller expects a certain state as if the WFI
>> +              * was aborted.  So let's continue with cache cleaning.
>> +              */
>> +             skip_wfi = true;
>> +     } else {
>> +             BUG();
>> +     }
>> +
>> +     if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
>> +             arch_spin_unlock(&exynos_mcpm_lock);
>> +
>> +             /* Flush all cache levels for this cluster. */
>> +             exynos_v7_exit_coherency_flush(all);
>> +
>> +             /*
>> +              * Disable cluster-level coherency by masking
>> +              * incoming snoops and DVM messages:
>> +              */
>> +             cci_disable_port_by_cpu(mpidr);
>> +
>> +             __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
>> +     } else {
>> +             arch_spin_unlock(&exynos_mcpm_lock);
>> +
>> +             /* Disable and flush the local CPU cache. */
>> +             exynos_v7_exit_coherency_flush(louis);
>> +     }
>> +
>> +     __mcpm_cpu_down(cpu, cluster);
>> +
>> +     /* Now we are prepared for power-down, do it: */
>> +     if (!skip_wfi)
>> +             wfi();
>> +
>> +     /* Not dead at this point?  Let our caller cope. */
>> +}
>> +
>> +static int exynos_power_down_finish(unsigned int cpu, unsigned int cluster)
>> +{
>> +     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +     BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +                     cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +     /* Wait for the core state to be OFF */
>> +     while (exynos_core_power_state(cpu, cluster) != 0x0)
>> +             ;
>> +
>
> *Here* is the place to actually establish a timeout.  Please see
> tc2_pm_power_down_finish() for example.
>
> Also beware that this method is going to be renamed once RMK applies the
> following patch:
>
> http://www.arm.linux.org.uk/developer/patches/viewpatch.php?id=8029/1
>
I will re-work this function referencing the tc2 code. v3 will be posted soon.

Regards,
Abhilash
>
>
>
>
>> +     return 0; /* success: the CPU is halted */
>> +}
>> +
>> +static const struct mcpm_platform_ops exynos_power_ops = {
>> +     .power_up               = exynos_power_up,
>> +     .power_down             = exynos_power_down,
>> +     .power_down_finish      = exynos_power_down_finish,
>> +};
>> +
>> +static void __init exynos_mcpm_usage_count_init(void)
>> +{
>> +     unsigned int mpidr, cpu, cluster;
>> +
>> +     mpidr = read_cpuid_mpidr();
>> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +
>> +     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +     BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER  ||
>> +                     cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +     cpu_use_count[cpu][cluster] = 1;
>> +}
>> +
>> +/*
>> + * Enable cluster-level coherency, in preparation for turning on the MMU.
>> + */
>> +static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
>> +{
>> +     asm volatile ("\n"
>> +     "cmp    r0, #1\n"
>> +     "bxne   lr\n"
>> +     "b      cci_enable_port_for_self");
>> +}
>> +
>> +static int __init exynos_mcpm_init(void)
>> +{
>> +     int ret = 0;
>> +
>> +     if (!soc_is_exynos5420())
>> +             return -ENODEV;
>> +
>> +     if (!cci_probed())
>> +             return -ENODEV;
>> +
>> +     /*
>> +      * To increase the stability of KFC reset we need to program
>> +      * the PMU SPARE3 register
>> +      */
>> +     __raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
>> +
>> +     exynos_mcpm_usage_count_init();
>> +
>> +     ret = mcpm_platform_register(&exynos_power_ops);
>> +     if (!ret)
>> +             ret = mcpm_sync_init(exynos_pm_power_up_setup);
>> +     if (ret)
>> +             return ret;
>> +
>> +     mcpm_smp_set_ops();
>> +
>> +     pr_info("Exynos MCPM support installed\n");
>> +
>> +     /*
>> +      * Future entries into the kernel can now go
>> +      * through the cluster entry vectors.
>> +      */
>> +     __raw_writel(virt_to_phys(mcpm_entry_point),
>> +             REG_ENTRY_ADDR);
>> +
>> +     return ret;
>> +}
>> +
>> +early_initcall(exynos_mcpm_init);
>> diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
>> index cfbfc575..43fe7a0 100644
>> --- a/arch/arm/mach-exynos/regs-pmu.h
>> +++ b/arch/arm/mach-exynos/regs-pmu.h
>> @@ -38,6 +38,7 @@
>>  #define S5P_INFORM5                          S5P_PMUREG(0x0814)
>>  #define S5P_INFORM6                          S5P_PMUREG(0x0818)
>>  #define S5P_INFORM7                          S5P_PMUREG(0x081C)
>> +#define S5P_PMU_SPARE3                               S5P_PMUREG(0x090C)
>>
>>  #define EXYNOS_IROM_DATA2                    S5P_PMUREG(0x0988)
>>
>> @@ -540,5 +541,6 @@
>>  #define EXYNOS5420_KFC_USE_STANDBY_WFE3                              (1 << 23)
>>
>>  #define DUR_WAIT_RESET                               0xF
>> +#define EXYNOS5420_SWRESET_KFC_SEL           0x3
>>
>>  #endif /* __ASM_ARCH_REGS_PMU_H */
>> --
>> 1.7.9.5
>>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Abhilash Kesavan April 23, 2014, 3:31 p.m. UTC | #8
Hi Nicolas,

On Wed, Apr 23, 2014 at 1:26 AM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>
> [ Moved Lorenzo up in the addressee list to get his attention ]
>
> On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>
>> On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
>> > On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>> >
>> > > On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
>> > > > +/*
>> > > > + * The common v7_exit_coherency_flush API could not be used because of the
>> > > > + * Erratum 799270 workaround. This macro is the same as the common one (in
>> > > > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>> > > > + */
>> > > > +#define exynos_v7_exit_coherency_flush(level) \
>> > > > +       asm volatile( \
>> > > > +       "stmfd  sp!, {fp, ip}\n\t"\
>> > > > +       "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
>> > > > +       "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
>> > > > +       "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
>> > > > +       "isb\n\t"\
>> > > > +       "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
>> > > > +       "clrex\n\t"\
>> > > > +       "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
>> > > > +       "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
>> > > > +       /* Dummy Load of a device register to avoid Erratum 799270 */ \
>> > >
>> > > Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
>> > > generic v7_exit_coherency_flush macro instead of redefining it ?
>> >
>> > The implementation of the erratum (the dummy device register load) is
>> > platform specific I'm afraid.
>> >
>> > Is TC2 also concerned by this erratum, or is this Samsung specific?
>>
>> Sounds like it is ARM Cortex-A15MP specific:
>>
>> http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf
>>
>> Page 31.
>
> The condition for hitting the erratum is: "The L2 cache block has been
> idle for 256 or more cycles with no memory requests from any core, no
> external snoops, and no ACP requests".
>
> Given that the operation that almost immediately precedes the
> problematic mcr is a call to v7_flush_dcache_louis or
> v7_flush_dcache_all, is this possible that the condition for the erratum
> could ever be met?
>
> If no then we should document that v7_exit_coherency_flush(() is safe
> against erratum 799270 and use it here.
We have L2 dynamic clock gating enabled  (Force L2 logic clock enable
bit in L2ACTLR disabled) for our SoC. During initial development we
added the workaround to be on the safe side.
However, as you pointed out it takes 256 idle cycles for the L2 clock
gating to kick in and so we might never hit the erratum with
v7_exit_coherency_flush(). I will do some ageing tests without the
dummy load and confirm.

Regards,
Abhilash
>
>> > > > +       "ldr    r4, [%0]\n\t" \
>> > > > +       "and    r4, r4, #0\n\t" \
>> > > > +       "orr    r0, r0, r4\n\t" \
>> > > > +       "mcr    p15, 0, r0, c1, c0, 1   @ set ACTLR\n\t" \
>> > > > +       "isb\n\t" \
>> > > > +       "dsb\n\t" \
>> > > > +       "ldmfd  sp!, {fp, ip}" \
>> > > > +       : \
>> > > > +       : "Ir" (S5P_INFORM0) \
>> > > > +       : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
>> > > > +         "r9", "r10", "lr", "memory")
>> > >
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Lorenzo Pieralisi April 23, 2014, 4:13 p.m. UTC | #9
On Tue, Apr 22, 2014 at 08:56:23PM +0100, Nicolas Pitre wrote:
> 
> [ Moved Lorenzo up in the addressee list to get his attention ]

Sorry for the delay in replying.

> On Tue, 22 Apr 2014, Daniel Lezcano wrote:
> 
> > On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
> > > On Tue, 22 Apr 2014, Daniel Lezcano wrote:
> > >
> > > > On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
> > > > > +/*
> > > > > + * The common v7_exit_coherency_flush API could not be used because of the
> > > > > + * Erratum 799270 workaround. This macro is the same as the common one (in
> > > > > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> > > > > + */
> > > > > +#define exynos_v7_exit_coherency_flush(level) \
> > > > > +	asm volatile( \
> > > > > +	"stmfd	sp!, {fp, ip}\n\t"\
> > > > > +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
> > > > > +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
> > > > > +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
> > > > > +	"isb\n\t"\
> > > > > +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
> > > > > +	"clrex\n\t"\
> > > > > +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
> > > > > +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
> > > > > +	/* Dummy Load of a device register to avoid Erratum 799270 */ \
> > > >
> > > > Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
> > > > generic v7_exit_coherency_flush macro instead of redefining it ?
> > >
> > > The implementation of the erratum (the dummy device register load) is
> > > platform specific I'm afraid.
> > >
> > > Is TC2 also concerned by this erratum, or is this Samsung specific?
> > 
> > Sounds like it is ARM Cortex-A15MP specific:
> > 
> > http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf
> > 
> > Page 31.
> 
> The condition for hitting the erratum is: "The L2 cache block has been 
> idle for 256 or more cycles with no memory requests from any core, no 
> external snoops, and no ACP requests".
> 
> Given that the operation that almost immediately precedes the 
> problematic mcr is a call to v7_flush_dcache_louis or 
> v7_flush_dcache_all, is this possible that the condition for the erratum 
> could ever be met?

Yes, if we execute out of the I-cache (no-data accesses) I think this might be
triggered.

On a side note, the errata affects also the setting of SMP bit
(__v7_ca15mp_setup and cpu_v7_do_resume) but we do nothing there (it
is a different situation though, MMU there is off and D-cache off...but
I-cache is allowed to be on, so I am actually checking if we should be doing
something there too).

> If no then we should document that v7_exit_coherency_flush(() is safe 
> against erratum 799270 and use it here.

Eh, I wish we could use it safely, I am currently investigating and will get
back to you asap.

Lorenzo

> > > > > +	"ldr	r4, [%0]\n\t" \
> > > > > +	"and	r4, r4, #0\n\t" \
> > > > > +	"orr	r0, r0, r4\n\t" \
> > > > > +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
> > > > > +	"isb\n\t" \
> > > > > +	"dsb\n\t" \
> > > > > +	"ldmfd	sp!, {fp, ip}" \
> > > > > +	: \
> > > > > +	: "Ir" (S5P_INFORM0) \
> > > > > +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
> > > > > +	  "r9", "r10", "lr", "memory")
> > > >
>
Daniel Lezcano April 23, 2014, 8:56 p.m. UTC | #10
On 04/23/2014 05:31 PM, Abhilash Kesavan wrote:
> Hi Daniel,
> On Tue, Apr 22, 2014 at 4:51 PM, Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>> On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
>>>
>>> Add machine-dependent MCPM call-backs for Exynos5420. These are used
>>> to power up/down the secondary CPUs during boot, shutdown, s2r and
>>> switching.
>>>
>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
>>> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
>>> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
>>> ---

[ ... ]

>>> +#include <plat/cpu.h>
>>> +#include "regs-pmu.h"
>>> +
>>> +#define EXYNOS5420_CPUS_PER_CLUSTER    4
>>> +#define EXYNOS5420_NR_CLUSTERS         2
>>> +
>>> +/* Secondary CPU entry point */
>>> +#define REG_ENTRY_ADDR         (S5P_VA_SYSRAM_NS + 0x1C)
>>> +
>>> +#define EXYNOS_CORE_LOCAL_PWR_EN               0x3
>>> +#define EXYNOS_CORE_LOCAL_PWR_DIS              0x0
>>>
>>> +#define EXYNOS_ARM_COMMON_CONFIGURATION                S5P_PMUREG(0x2500)
>>> +#define EXYNOS_ARM_L2_CONFIGURATION            S5P_PMUREG(0x2600)
>>> +
>>> +#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)     \
>>> +                       (S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
>>> +#define EXYNOS_ARM_CORE_STATUS(_nr)            \
>>> +                       (S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
>>> +
>>> +#define EXYNOS_COMMON_CONFIGURATION(_nr)       \
>>> +                       (EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
>>> +#define EXYNOS_COMMON_STATUS(_nr)              \
>>> +                       (EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
>>> +
>>> +#define EXYNOS_L2_CONFIGURATION(_nr)           \
>>> +                       (EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
>>> +#define EXYNOS_L2_STATUS(_nr)                  \
>>> +                       (EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
>>> +
>>
>>
>> Is it possible to share the definition of those macros with the rest of the
>> code via functions, so they can be re-used for the other sub-systems ? eg:
>> https://patches.linaro.org/27798/
> OK..I will work on making wrapper functions for these. Would these new
> functions be better placed in the mcpm code or the pm code ?

Hi Abhilash,

yes, in the pm code. That would make more sense.

Thanks
   -- Daniel
Abhilash Kesavan June 17, 2014, 2:38 a.m. UTC | #11
Hi Lorenzo,

On Wed, Apr 23, 2014 at 9:43 PM, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Tue, Apr 22, 2014 at 08:56:23PM +0100, Nicolas Pitre wrote:
>>
>> [ Moved Lorenzo up in the addressee list to get his attention ]
>
> Sorry for the delay in replying.
>
>> On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>>
>> > On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
>> > > On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>> > >
>> > > > On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
>> > > > > +/*
>> > > > > + * The common v7_exit_coherency_flush API could not be used because of the
>> > > > > + * Erratum 799270 workaround. This macro is the same as the common one (in
>> > > > > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>> > > > > + */
>> > > > > +#define exynos_v7_exit_coherency_flush(level) \
>> > > > > +     asm volatile( \
>> > > > > +     "stmfd  sp!, {fp, ip}\n\t"\
>> > > > > +     "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
>> > > > > +     "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
>> > > > > +     "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
>> > > > > +     "isb\n\t"\
>> > > > > +     "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
>> > > > > +     "clrex\n\t"\
>> > > > > +     "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
>> > > > > +     "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
>> > > > > +     /* Dummy Load of a device register to avoid Erratum 799270 */ \
>> > > >
>> > > > Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
>> > > > generic v7_exit_coherency_flush macro instead of redefining it ?
>> > >
>> > > The implementation of the erratum (the dummy device register load) is
>> > > platform specific I'm afraid.
>> > >
>> > > Is TC2 also concerned by this erratum, or is this Samsung specific?
>> >
>> > Sounds like it is ARM Cortex-A15MP specific:
>> >
>> > http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf
>> >
>> > Page 31.
>>
>> The condition for hitting the erratum is: "The L2 cache block has been
>> idle for 256 or more cycles with no memory requests from any core, no
>> external snoops, and no ACP requests".
>>
>> Given that the operation that almost immediately precedes the
>> problematic mcr is a call to v7_flush_dcache_louis or
>> v7_flush_dcache_all, is this possible that the condition for the erratum
>> could ever be met?
>
> Yes, if we execute out of the I-cache (no-data accesses) I think this might be
> triggered.
>
> On a side note, the errata affects also the setting of SMP bit
> (__v7_ca15mp_setup and cpu_v7_do_resume) but we do nothing there (it
> is a different situation though, MMU there is off and D-cache off...but
> I-cache is allowed to be on, so I am actually checking if we should be doing
> something there too).
>
>> If no then we should document that v7_exit_coherency_flush(() is safe
>> against erratum 799270 and use it here.
>
> Eh, I wish we could use it safely, I am currently investigating and will get
> back to you asap.

Have you been able to come to a conclusion regarding the necessity of
the erratum workaround ? We are currently carrying the workaround in
the merged exynos back-end.

Regards,
Abhilash
>
> Lorenzo
>
>> > > > > +     "ldr    r4, [%0]\n\t" \
>> > > > > +     "and    r4, r4, #0\n\t" \
>> > > > > +     "orr    r0, r0, r4\n\t" \
>> > > > > +     "mcr    p15, 0, r0, c1, c0, 1   @ set ACTLR\n\t" \
>> > > > > +     "isb\n\t" \
>> > > > > +     "dsb\n\t" \
>> > > > > +     "ldmfd  sp!, {fp, ip}" \
>> > > > > +     : \
>> > > > > +     : "Ir" (S5P_INFORM0) \
>> > > > > +     : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
>> > > > > +       "r9", "r10", "lr", "memory")
>> > > >
>>
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Lorenzo Pieralisi June 19, 2014, 8:56 a.m. UTC | #12
On Tue, Jun 17, 2014 at 03:38:29AM +0100, Abhilash Kesavan wrote:
> Hi Lorenzo,
> 
> On Wed, Apr 23, 2014 at 9:43 PM, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> > On Tue, Apr 22, 2014 at 08:56:23PM +0100, Nicolas Pitre wrote:
> >>
> >> [ Moved Lorenzo up in the addressee list to get his attention ]
> >
> > Sorry for the delay in replying.
> >
> >> On Tue, 22 Apr 2014, Daniel Lezcano wrote:
> >>
> >> > On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
> >> > > On Tue, 22 Apr 2014, Daniel Lezcano wrote:
> >> > >
> >> > > > On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
> >> > > > > +/*
> >> > > > > + * The common v7_exit_coherency_flush API could not be used because of the
> >> > > > > + * Erratum 799270 workaround. This macro is the same as the common one (in
> >> > > > > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> >> > > > > + */
> >> > > > > +#define exynos_v7_exit_coherency_flush(level) \
> >> > > > > +     asm volatile( \
> >> > > > > +     "stmfd  sp!, {fp, ip}\n\t"\
> >> > > > > +     "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
> >> > > > > +     "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
> >> > > > > +     "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
> >> > > > > +     "isb\n\t"\
> >> > > > > +     "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
> >> > > > > +     "clrex\n\t"\
> >> > > > > +     "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
> >> > > > > +     "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
> >> > > > > +     /* Dummy Load of a device register to avoid Erratum 799270 */ \
> >> > > >
> >> > > > Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
> >> > > > generic v7_exit_coherency_flush macro instead of redefining it ?
> >> > >
> >> > > The implementation of the erratum (the dummy device register load) is
> >> > > platform specific I'm afraid.
> >> > >
> >> > > Is TC2 also concerned by this erratum, or is this Samsung specific?
> >> >
> >> > Sounds like it is ARM Cortex-A15MP specific:
> >> >
> >> > http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf
> >> >
> >> > Page 31.
> >>
> >> The condition for hitting the erratum is: "The L2 cache block has been
> >> idle for 256 or more cycles with no memory requests from any core, no
> >> external snoops, and no ACP requests".
> >>
> >> Given that the operation that almost immediately precedes the
> >> problematic mcr is a call to v7_flush_dcache_louis or
> >> v7_flush_dcache_all, is this possible that the condition for the erratum
> >> could ever be met?
> >
> > Yes, if we execute out of the I-cache (no-data accesses) I think this might be
> > triggered.
> >
> > On a side note, the errata affects also the setting of SMP bit
> > (__v7_ca15mp_setup and cpu_v7_do_resume) but we do nothing there (it
> > is a different situation though, MMU there is off and D-cache off...but
> > I-cache is allowed to be on, so I am actually checking if we should be doing
> > something there too).
> >
> >> If no then we should document that v7_exit_coherency_flush(() is safe
> >> against erratum 799270 and use it here.
> >
> > Eh, I wish we could use it safely, I am currently investigating and will get
> > back to you asap.
> 
> Have you been able to come to a conclusion regarding the necessity of
> the erratum workaround ? We are currently carrying the workaround in
> the merged exynos back-end.

Yes. There is a way to avoid it, but we would end up deviating from the
standard power down procedure, so I am keener on duplicating the macro and
adding errata code there (current code) rather than rewriting the whole
thing, which has to be retested and we will still have a procedure that
differs from the standard one to work around that issue. At least as it is
it is explicit.

Thanks,
Lorenzo
Abhilash Kesavan June 19, 2014, 12:23 p.m. UTC | #13
Hi Lorenzo,

On Thu, Jun 19, 2014 at 2:26 PM, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Tue, Jun 17, 2014 at 03:38:29AM +0100, Abhilash Kesavan wrote:
>> Hi Lorenzo,
>>
>> On Wed, Apr 23, 2014 at 9:43 PM, Lorenzo Pieralisi
>> <lorenzo.pieralisi@arm.com> wrote:
>> > On Tue, Apr 22, 2014 at 08:56:23PM +0100, Nicolas Pitre wrote:
>> >>
>> >> [ Moved Lorenzo up in the addressee list to get his attention ]
>> >
>> > Sorry for the delay in replying.
>> >
>> >> On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>> >>
>> >> > On 04/22/2014 05:40 PM, Nicolas Pitre wrote:
>> >> > > On Tue, 22 Apr 2014, Daniel Lezcano wrote:
>> >> > >
>> >> > > > On 04/22/2014 08:17 AM, Abhilash Kesavan wrote:
>> >> > > > > +/*
>> >> > > > > + * The common v7_exit_coherency_flush API could not be used because of the
>> >> > > > > + * Erratum 799270 workaround. This macro is the same as the common one (in
>> >> > > > > + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>> >> > > > > + */
>> >> > > > > +#define exynos_v7_exit_coherency_flush(level) \
>> >> > > > > +     asm volatile( \
>> >> > > > > +     "stmfd  sp!, {fp, ip}\n\t"\
>> >> > > > > +     "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
>> >> > > > > +     "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
>> >> > > > > +     "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
>> >> > > > > +     "isb\n\t"\
>> >> > > > > +     "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
>> >> > > > > +     "clrex\n\t"\
>> >> > > > > +     "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
>> >> > > > > +     "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
>> >> > > > > +     /* Dummy Load of a device register to avoid Erratum 799270 */ \
>> >> > > >
>> >> > > > Wouldn't make sense to add the erratum in the Kconfig and re-use it in the
>> >> > > > generic v7_exit_coherency_flush macro instead of redefining it ?
>> >> > >
>> >> > > The implementation of the erratum (the dummy device register load) is
>> >> > > platform specific I'm afraid.
>> >> > >
>> >> > > Is TC2 also concerned by this erratum, or is this Samsung specific?
>> >> >
>> >> > Sounds like it is ARM Cortex-A15MP specific:
>> >> >
>> >> > http://infocenter.arm.com/help/topic/com.arm.doc.epm028090/cortex_a15_mpcore_software_developers_errata_notice_r2_v12.pdf
>> >> >
>> >> > Page 31.
>> >>
>> >> The condition for hitting the erratum is: "The L2 cache block has been
>> >> idle for 256 or more cycles with no memory requests from any core, no
>> >> external snoops, and no ACP requests".
>> >>
>> >> Given that the operation that almost immediately precedes the
>> >> problematic mcr is a call to v7_flush_dcache_louis or
>> >> v7_flush_dcache_all, is this possible that the condition for the erratum
>> >> could ever be met?
>> >
>> > Yes, if we execute out of the I-cache (no-data accesses) I think this might be
>> > triggered.
>> >
>> > On a side note, the errata affects also the setting of SMP bit
>> > (__v7_ca15mp_setup and cpu_v7_do_resume) but we do nothing there (it
>> > is a different situation though, MMU there is off and D-cache off...but
>> > I-cache is allowed to be on, so I am actually checking if we should be doing
>> > something there too).
>> >
>> >> If no then we should document that v7_exit_coherency_flush(() is safe
>> >> against erratum 799270 and use it here.
>> >
>> > Eh, I wish we could use it safely, I am currently investigating and will get
>> > back to you asap.
>>
>> Have you been able to come to a conclusion regarding the necessity of
>> the erratum workaround ? We are currently carrying the workaround in
>> the merged exynos back-end.
>
> Yes. There is a way to avoid it, but we would end up deviating from the
> standard power down procedure, so I am keener on duplicating the macro and
> adding errata code there (current code) rather than rewriting the whole
> thing, which has to be retested and we will still have a procedure that
> differs from the standard one to work around that issue. At least as it is
> it is explicit.
Thanks for checking. I will leave the code as it is then.

Regards,
Abhilash
>
> Thanks,
> Lorenzo
>
diff mbox

Patch

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index fc8bf18..1602abc 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -110,4 +110,12 @@  config SOC_EXYNOS5440
 
 endmenu
 
+config EXYNOS5420_MCPM
+	bool "Exynos5420 Multi-Cluster PM support"
+	depends on MCPM && SOC_EXYNOS5420
+	select ARM_CCI
+	help
+	  This is needed to provide CPU and cluster power management
+	  on Exynos5420 implementing big.LITTLE.
+
 endif
diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
index a656dbe..01bc9b9 100644
--- a/arch/arm/mach-exynos/Makefile
+++ b/arch/arm/mach-exynos/Makefile
@@ -29,3 +29,5 @@  obj-$(CONFIG_ARCH_EXYNOS)	+= firmware.o
 
 plus_sec := $(call as-instr,.arch_extension sec,+sec)
 AFLAGS_exynos-smc.o		:=-Wa,-march=armv7-a$(plus_sec)
+
+obj-$(CONFIG_EXYNOS5420_MCPM)	+= mcpm-exynos.o
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
new file mode 100644
index 0000000..49b9031
--- /dev/null
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -0,0 +1,345 @@ 
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * arch/arm/mach-exynos/mcpm-exynos.c
+ *
+ * Based on arch/arm/mach-vexpress/dcscb.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/delay.h>
+#include <linux/arm-cci.h>
+
+#include <asm/mcpm.h>
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+
+#include <plat/cpu.h>
+#include "regs-pmu.h"
+
+#define EXYNOS5420_CPUS_PER_CLUSTER	4
+#define EXYNOS5420_NR_CLUSTERS		2
+
+/* Secondary CPU entry point */
+#define REG_ENTRY_ADDR		(S5P_VA_SYSRAM_NS + 0x1C)
+
+#define EXYNOS_CORE_LOCAL_PWR_EN		0x3
+#define EXYNOS_CORE_LOCAL_PWR_DIS		0x0
+
+#define EXYNOS_ARM_COMMON_CONFIGURATION		S5P_PMUREG(0x2500)
+#define EXYNOS_ARM_L2_CONFIGURATION		S5P_PMUREG(0x2600)
+
+#define EXYNOS_ARM_CORE_CONFIGURATION(_nr)	\
+			(S5P_ARM_CORE0_CONFIGURATION + ((_nr) * 0x80))
+#define EXYNOS_ARM_CORE_STATUS(_nr)		\
+			(S5P_ARM_CORE0_STATUS + ((_nr) * 0x80))
+
+#define EXYNOS_COMMON_CONFIGURATION(_nr)	\
+			(EXYNOS_ARM_COMMON_CONFIGURATION + ((_nr) * 0x80))
+#define EXYNOS_COMMON_STATUS(_nr)		\
+			(EXYNOS_COMMON_CONFIGURATION(_nr) + 0x4)
+
+#define EXYNOS_L2_CONFIGURATION(_nr)		\
+			(EXYNOS_ARM_L2_CONFIGURATION + ((_nr) * 0x80))
+#define EXYNOS_L2_STATUS(_nr)			\
+			(EXYNOS_L2_CONFIGURATION(_nr) + 0x4)
+
+/*
+ * The common v7_exit_coherency_flush API could not be used because of the
+ * Erratum 799270 workaround. This macro is the same as the common one (in
+ * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
+ */
+#define exynos_v7_exit_coherency_flush(level) \
+	asm volatile( \
+	"stmfd	sp!, {fp, ip}\n\t"\
+	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
+	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
+	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
+	"isb\n\t"\
+	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
+	"clrex\n\t"\
+	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
+	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
+	/* Dummy Load of a device register to avoid Erratum 799270 */ \
+	"ldr	r4, [%0]\n\t" \
+	"and	r4, r4, #0\n\t" \
+	"orr	r0, r0, r4\n\t" \
+	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
+	"isb\n\t" \
+	"dsb\n\t" \
+	"ldmfd	sp!, {fp, ip}" \
+	: \
+	: "Ir" (S5P_INFORM0) \
+	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
+	  "r9", "r10", "lr", "memory")
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int
+cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
+
+static bool exynos_core_power_state(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int val;
+	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
+
+	val = __raw_readl(EXYNOS_ARM_CORE_STATUS(cpunr)) &
+				EXYNOS_CORE_LOCAL_PWR_EN;
+	return !!val;
+}
+
+static void exynos_core_power_control(unsigned int cpu, unsigned int cluster,
+						int enable)
+{
+	unsigned int val = EXYNOS_CORE_LOCAL_PWR_DIS;
+	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
+
+	if (exynos_core_power_state(cpu, cluster) == enable)
+		return;
+
+	if (enable)
+		val = EXYNOS_CORE_LOCAL_PWR_EN;
+	__raw_writel(val, EXYNOS_ARM_CORE_CONFIGURATION(cpunr));
+}
+
+static void exynos_cluster_power_control(unsigned int cluster, int enable)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(20);
+	unsigned int status, val = EXYNOS_CORE_LOCAL_PWR_DIS;
+
+	if (enable)
+		val = EXYNOS_CORE_LOCAL_PWR_EN;
+
+	status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
+	if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
+		return;
+
+	__raw_writel(val, EXYNOS_COMMON_CONFIGURATION(cluster));
+	/* Wait until cluster power control is applied */
+	while (time_before(jiffies, timeout)) {
+		status = __raw_readl(EXYNOS_COMMON_STATUS(cluster));
+
+		if ((status & EXYNOS_CORE_LOCAL_PWR_EN) == val)
+			return;
+
+		cpu_relax();
+	}
+	pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
+		enable ? "on" : "off");
+}
+
+static int exynos_power_up(unsigned int cpu, unsigned int cluster)
+{
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
+		cluster >= EXYNOS5420_NR_CLUSTERS)
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&exynos_mcpm_lock);
+
+	cpu_use_count[cpu][cluster]++;
+	if (cpu_use_count[cpu][cluster] == 1) {
+		bool was_cluster_down =
+			__mcpm_cluster_state(cluster) == CLUSTER_DOWN;
+
+		/*
+		 * Turn on the cluster (L2/COMMON) and then power on the cores.
+		 * TODO: Turn off the clusters when all cores in the cluster
+		 * are down to achieve significant power savings.
+		 */
+		if (was_cluster_down)
+			exynos_cluster_power_control(cluster, 1);
+		exynos_core_power_control(cpu, cluster, 1);
+
+		/* CPU should be powered up there */
+		/* Also setup Cluster Power Sequence here */
+	} else if (cpu_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&exynos_mcpm_lock);
+	local_irq_enable();
+
+	return 0;
+}
+
+static void exynos_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster, cpumask;
+	bool last_man = false, skip_wfi = false;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	cpumask = (1 << cpu);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
+			cluster >= EXYNOS5420_NR_CLUSTERS);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&exynos_mcpm_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	cpu_use_count[cpu][cluster]--;
+	if (cpu_use_count[cpu][cluster] == 0) {
+		int cnt = 0, i;
+
+		exynos_core_power_control(cpu, cluster, 0);
+		for (i = 0; i < EXYNOS5420_CPUS_PER_CLUSTER; i++)
+			cnt += cpu_use_count[i][cluster];
+		if (cnt == 0)
+			last_man = true;
+	} else if (cpu_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else {
+		BUG();
+	}
+
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&exynos_mcpm_lock);
+
+		/* Flush all cache levels for this cluster. */
+		exynos_v7_exit_coherency_flush(all);
+
+		/*
+		 * Disable cluster-level coherency by masking
+		 * incoming snoops and DVM messages:
+		 */
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		arch_spin_unlock(&exynos_mcpm_lock);
+
+		/* Disable and flush the local CPU cache. */
+		exynos_v7_exit_coherency_flush(louis);
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static int exynos_power_down_finish(unsigned int cpu, unsigned int cluster)
+{
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
+			cluster >= EXYNOS5420_NR_CLUSTERS);
+
+	/* Wait for the core state to be OFF */
+	while (exynos_core_power_state(cpu, cluster) != 0x0)
+		;
+
+	return 0; /* success: the CPU is halted */
+}
+
+static const struct mcpm_platform_ops exynos_power_ops = {
+	.power_up		= exynos_power_up,
+	.power_down		= exynos_power_down,
+	.power_down_finish	= exynos_power_down_finish,
+};
+
+static void __init exynos_mcpm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER  ||
+			cluster >= EXYNOS5420_NR_CLUSTERS);
+
+	cpu_use_count[cpu][cluster] = 1;
+}
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ */
+static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
+{
+	asm volatile ("\n"
+	"cmp	r0, #1\n"
+	"bxne	lr\n"
+	"b	cci_enable_port_for_self");
+}
+
+static int __init exynos_mcpm_init(void)
+{
+	int ret = 0;
+
+	if (!soc_is_exynos5420())
+		return -ENODEV;
+
+	if (!cci_probed())
+		return -ENODEV;
+
+	/*
+	 * To increase the stability of KFC reset we need to program
+	 * the PMU SPARE3 register
+	 */
+	__raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
+
+	exynos_mcpm_usage_count_init();
+
+	ret = mcpm_platform_register(&exynos_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(exynos_pm_power_up_setup);
+	if (ret)
+		return ret;
+
+	mcpm_smp_set_ops();
+
+	pr_info("Exynos MCPM support installed\n");
+
+	/*
+	 * Future entries into the kernel can now go
+	 * through the cluster entry vectors.
+	 */
+	__raw_writel(virt_to_phys(mcpm_entry_point),
+		REG_ENTRY_ADDR);
+
+	return ret;
+}
+
+early_initcall(exynos_mcpm_init);
diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
index cfbfc575..43fe7a0 100644
--- a/arch/arm/mach-exynos/regs-pmu.h
+++ b/arch/arm/mach-exynos/regs-pmu.h
@@ -38,6 +38,7 @@ 
 #define S5P_INFORM5				S5P_PMUREG(0x0814)
 #define S5P_INFORM6				S5P_PMUREG(0x0818)
 #define S5P_INFORM7				S5P_PMUREG(0x081C)
+#define S5P_PMU_SPARE3				S5P_PMUREG(0x090C)
 
 #define EXYNOS_IROM_DATA2			S5P_PMUREG(0x0988)
 
@@ -540,5 +541,6 @@ 
 #define EXYNOS5420_KFC_USE_STANDBY_WFE3				(1 << 23)
 
 #define DUR_WAIT_RESET				0xF
+#define EXYNOS5420_SWRESET_KFC_SEL		0x3
 
 #endif /* __ASM_ARCH_REGS_PMU_H */