diff mbox

[v4,5/5] arm: exynos: Add MCPM call-back functions

Message ID 1399044359-15784-1-git-send-email-a.kesavan@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Abhilash Kesavan May 2, 2014, 3:25 p.m. UTC
Add machine-dependent MCPM call-backs for Exynos5420. These are used
to power up/down the secondary CPUs during boot, shutdown, s2r and
switching.

Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
---
 arch/arm/mach-exynos/Kconfig       |    8 +
 arch/arm/mach-exynos/Makefile      |    2 +
 arch/arm/mach-exynos/mcpm-exynos.c |  345 ++++++++++++++++++++++++++++++++++++
 arch/arm/mach-exynos/regs-pmu.h    |    3 +
 4 files changed, 358 insertions(+)
 create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c

Comments

Nicolas Pitre May 2, 2014, 6:16 p.m. UTC | #1
On Fri, 2 May 2014, Abhilash Kesavan wrote:

> Add machine-dependent MCPM call-backs for Exynos5420. These are used
> to power up/down the secondary CPUs during boot, shutdown, s2r and
> switching.
> 
> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>

OK.... There is still a detail wrong.  At least we are converging.

> ---
>  arch/arm/mach-exynos/Kconfig       |    8 +
>  arch/arm/mach-exynos/Makefile      |    2 +
>  arch/arm/mach-exynos/mcpm-exynos.c |  345 ++++++++++++++++++++++++++++++++++++
>  arch/arm/mach-exynos/regs-pmu.h    |    3 +
>  4 files changed, 358 insertions(+)
>  create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c
> 
> diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
> index 5c34dc2..138070e 100644
> --- a/arch/arm/mach-exynos/Kconfig
> +++ b/arch/arm/mach-exynos/Kconfig
> @@ -73,4 +73,12 @@ config SOC_EXYNOS5440
>  
>  endmenu
>  
> +config EXYNOS5420_MCPM
> +	bool "Exynos5420 Multi-Cluster PM support"
> +	depends on MCPM && SOC_EXYNOS5420
> +	select ARM_CCI
> +	help
> +	  This is needed to provide CPU and cluster power management
> +	  on Exynos5420 implementing big.LITTLE.
> +
>  endif
> diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
> index a656dbe..01bc9b9 100644
> --- a/arch/arm/mach-exynos/Makefile
> +++ b/arch/arm/mach-exynos/Makefile
> @@ -29,3 +29,5 @@ obj-$(CONFIG_ARCH_EXYNOS)	+= firmware.o
>  
>  plus_sec := $(call as-instr,.arch_extension sec,+sec)
>  AFLAGS_exynos-smc.o		:=-Wa,-march=armv7-a$(plus_sec)
> +
> +obj-$(CONFIG_EXYNOS5420_MCPM)	+= mcpm-exynos.o
> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
> new file mode 100644
> index 0000000..d0f7461
> --- /dev/null
> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
> @@ -0,0 +1,345 @@
> +/*
> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
> + *		http://www.samsung.com
> + *
> + * arch/arm/mach-exynos/mcpm-exynos.c
> + *
> + * Based on arch/arm/mach-vexpress/dcscb.c
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/arm-cci.h>
> +#include <linux/delay.h>
> +#include <linux/io.h>
> +#include <linux/of_address.h>
> +
> +#include <asm/cputype.h>
> +#include <asm/cp15.h>
> +#include <asm/mcpm.h>
> +
> +#include "regs-pmu.h"
> +#include "common.h"
> +
> +#define EXYNOS5420_CPUS_PER_CLUSTER	4
> +#define EXYNOS5420_NR_CLUSTERS		2
> +
> +/* Non-secure iRAM base address */
> +static void __iomem *ns_sram_base_addr;
> +
> +/*
> + * The common v7_exit_coherency_flush API could not be used because of the
> + * Erratum 799270 workaround. This macro is the same as the common one (in
> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
> + */
> +#define exynos_v7_exit_coherency_flush(level) \
> +	asm volatile( \
> +	"stmfd	sp!, {fp, ip}\n\t"\
> +	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
> +	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
> +	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
> +	"isb\n\t"\
> +	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
> +	"clrex\n\t"\
> +	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
> +	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
> +	/* Dummy Load of a device register to avoid Erratum 799270 */ \
> +	"ldr	r4, [%0]\n\t" \
> +	"and	r4, r4, #0\n\t" \
> +	"orr	r0, r0, r4\n\t" \
> +	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
> +	"isb\n\t" \
> +	"dsb\n\t" \
> +	"ldmfd	sp!, {fp, ip}" \
> +	: \
> +	: "Ir" (S5P_INFORM0) \
> +	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
> +	  "r9", "r10", "lr", "memory")
> +
> +/*
> + * We can't use regular spinlocks. In the switcher case, it is possible
> + * for an outbound CPU to call power_down() after its inbound counterpart
> + * is already live using the same logical CPU number which trips lockdep
> + * debugging.
> + */
> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> +static int
> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
> +
> +#define exynos_cluster_unused(cluster) \
> +	(!cpu_use_count[0][cluster] && \
> +	 !cpu_use_count[1][cluster] && \
> +	 !cpu_use_count[2][cluster] && \
> +	 !cpu_use_count[3][cluster])
> +
> +static int exynos_cluster_power_control(unsigned int cluster, int enable)
> +{
> +	unsigned int tries = 100;
> +	unsigned int val;
> +
> +	if (enable) {
> +		exynos_cluster_powerup(cluster);
> +		val = S5P_CORE_LOCAL_PWR_EN;
> +	} else {
> +		exynos_cluster_powerdown(cluster);
> +		val = 0;
> +	}
> +
> +	/* Wait until cluster power control is applied */
> +	while (tries--) {
> +		if (exynos_cluster_power_state(cluster) == val)
> +			return 0;
> +
> +		cpu_relax();
> +	}
> +	pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
> +		enable ? "on" : "off");
> +
> +	return -ETIMEDOUT;
> +}
> +
> +static int exynos_power_up(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> +	int err = 0;
> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +		cluster >= EXYNOS5420_NR_CLUSTERS)
> +		return -EINVAL;
> +
> +	/*
> +	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
> +	 * variant exists, we need to disable IRQs manually here.
> +	 */
> +	local_irq_disable();
> +	arch_spin_lock(&exynos_mcpm_lock);
> +
> +	cpu_use_count[cpu][cluster]++;
> +	if (cpu_use_count[cpu][cluster] == 1) {
> +		bool was_cluster_down =
> +			__mcpm_cluster_state(cluster) == CLUSTER_DOWN;

This is racy.  I probably made this comment already.  The MCPM cluster 
state may change in mcpm-head.S where concurrency protection is achieved 
with a different mechanism.

What you should do instead is to redefine exynos_cluster_unused() into 
exynos_cluster_usecnt() and simply add all counts together.  You could 
even have:

#define exynos_cluster_unused(cluster) !exynos_cluster_usecnt(cluster)

Yet, here you should use:

	bool was_cluster_down = (exynos_cluster_usecnt(cluster) == 1);

> +
> +		/*
> +		 * Turn on the cluster (L2/COMMON) and then power on the
> +		 * cores.
> +		 */
> +		if (was_cluster_down)
> +			err = exynos_cluster_power_control(cluster, 1);
> +
> +		if (!err)
> +			exynos_cpu_powerup(cpunr);
> +		else
> +			exynos_cluster_power_control(cluster, 0);
> +	} else if (cpu_use_count[cpu][cluster] != 2) {
> +		/*
> +		 * The only possible values are:
> +		 * 0 = CPU down
> +		 * 1 = CPU (still) up
> +		 * 2 = CPU requested to be up before it had a chance
> +		 *     to actually make itself down.
> +		 * Any other value is a bug.
> +		 */
> +		BUG();
> +	}
> +
> +	arch_spin_unlock(&exynos_mcpm_lock);
> +	local_irq_enable();
> +
> +	return err;
> +}
> +
> +static void exynos_power_down(void)
> +{
> +	unsigned int mpidr, cpu, cluster;
> +	bool last_man = false, skip_wfi = false;
> +	unsigned int cpunr;
> +
> +	mpidr = read_cpuid_mpidr();
> +	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +	cpunr =  cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
> +			cluster >= EXYNOS5420_NR_CLUSTERS);
> +
> +	__mcpm_cpu_going_down(cpu, cluster);
> +
> +	arch_spin_lock(&exynos_mcpm_lock);
> +	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
> +	cpu_use_count[cpu][cluster]--;
> +	if (cpu_use_count[cpu][cluster] == 0) {
> +		exynos_cpu_powerdown(cpunr);
> +
> +		if (exynos_cluster_unused(cluster))
> +			last_man = true;
> +	} else if (cpu_use_count[cpu][cluster] == 1) {
> +		/*
> +		 * A power_up request went ahead of us.
> +		 * Even if we do not want to shut this CPU down,
> +		 * the caller expects a certain state as if the WFI
> +		 * was aborted.  So let's continue with cache cleaning.
> +		 */
> +		skip_wfi = true;
> +	} else {
> +		BUG();
> +	}
> +
> +	/*
> +	 * TODO: Turn off the clusters when all cores in the cluster
> +	 * are down to achieve significant power savings.
> +	 */

This comment should actually be located right after the 
"if (exynos_cluster_unused(cluster))" above.  That is where the cluster 
control should be applied, assuming it'll be effective only when WFI is 
executed.


> +	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
> +		arch_spin_unlock(&exynos_mcpm_lock);
> +
> +		/* Flush all cache levels for this cluster. */
> +		exynos_v7_exit_coherency_flush(all);
> +
> +		/*
> +		 * Disable cluster-level coherency by masking
> +		 * incoming snoops and DVM messages:
> +		 */
> +		cci_disable_port_by_cpu(mpidr);
> +
> +		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
> +	} else {
> +		arch_spin_unlock(&exynos_mcpm_lock);
> +
> +		if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
> +			/*
> +			 * On the Cortex-A15 we need to disable
> +			 * L2 prefetching before flushing the cache.
> +			 */
> +			asm volatile(
> +			"mcr	p15, 1, %0, c15, c0, 3\n\t"
> +			"isb\n\t"
> +			"dsb"
> +			: : "r" (0x400));
> +		}

This doesn't belong here.  That is for the last_man only to do, right 
before the "Flush all cache levels for this cluster" comment.

The rest looks fine to me.


Nicolas
Andrew Bresticker May 2, 2014, 6:23 p.m. UTC | #2
>> +     /*
>> +      * TODO: Turn off the clusters when all cores in the cluster
>> +      * are down to achieve significant power savings.
>> +      */
>
> This comment should actually be located right after the
> "if (exynos_cluster_unused(cluster))" above.  That is where the cluster
> control should be applied, assuming it'll be effective only when WFI is
> executed.

Correct me if I'm wrong Samsung folks, but I thought it was not
possible to apply cluster power control from a CPU within the cluster
being powered down, i.e. a CPU in the other cluster must be the one to
apply the cluster power control to power down the outbound cluster.

-Andrew
Nicolas Pitre May 2, 2014, 6:37 p.m. UTC | #3
On Fri, 2 May 2014, Andrew Bresticker wrote:

> >> +     /*
> >> +      * TODO: Turn off the clusters when all cores in the cluster
> >> +      * are down to achieve significant power savings.
> >> +      */
> >
> > This comment should actually be located right after the
> > "if (exynos_cluster_unused(cluster))" above.  That is where the cluster
> > control should be applied, assuming it'll be effective only when WFI is
> > executed.
> 
> Correct me if I'm wrong Samsung folks, but I thought it was not
> possible to apply cluster power control from a CPU within the cluster
> being powered down, i.e. a CPU in the other cluster must be the one to
> apply the cluster power control to power down the outbound cluster.

Is this true even for deep idle C-states?


Nicolas
Abhilash Kesavan May 5, 2014, 4:25 p.m. UTC | #4
Hi Nicolas,

On Fri, May 2, 2014 at 11:46 PM, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> On Fri, 2 May 2014, Abhilash Kesavan wrote:
>
>> Add machine-dependent MCPM call-backs for Exynos5420. These are used
>> to power up/down the secondary CPUs during boot, shutdown, s2r and
>> switching.
>>
>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> Signed-off-by: Inderpal Singh <inderpal.s@samsung.com>
>> Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
>> Signed-off-by: Abhilash Kesavan <a.kesavan@samsung.com>
>
> OK.... There is still a detail wrong.  At least we are converging.
>
>> ---
>>  arch/arm/mach-exynos/Kconfig       |    8 +
>>  arch/arm/mach-exynos/Makefile      |    2 +
>>  arch/arm/mach-exynos/mcpm-exynos.c |  345 ++++++++++++++++++++++++++++++++++++
>>  arch/arm/mach-exynos/regs-pmu.h    |    3 +
>>  4 files changed, 358 insertions(+)
>>  create mode 100644 arch/arm/mach-exynos/mcpm-exynos.c
>>
>> diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
>> index 5c34dc2..138070e 100644
>> --- a/arch/arm/mach-exynos/Kconfig
>> +++ b/arch/arm/mach-exynos/Kconfig
>> @@ -73,4 +73,12 @@ config SOC_EXYNOS5440
>>
>>  endmenu
>>
>> +config EXYNOS5420_MCPM
>> +     bool "Exynos5420 Multi-Cluster PM support"
>> +     depends on MCPM && SOC_EXYNOS5420
>> +     select ARM_CCI
>> +     help
>> +       This is needed to provide CPU and cluster power management
>> +       on Exynos5420 implementing big.LITTLE.
>> +
>>  endif
>> diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
>> index a656dbe..01bc9b9 100644
>> --- a/arch/arm/mach-exynos/Makefile
>> +++ b/arch/arm/mach-exynos/Makefile
>> @@ -29,3 +29,5 @@ obj-$(CONFIG_ARCH_EXYNOS)   += firmware.o
>>
>>  plus_sec := $(call as-instr,.arch_extension sec,+sec)
>>  AFLAGS_exynos-smc.o          :=-Wa,-march=armv7-a$(plus_sec)
>> +
>> +obj-$(CONFIG_EXYNOS5420_MCPM)        += mcpm-exynos.o
>> diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
>> new file mode 100644
>> index 0000000..d0f7461
>> --- /dev/null
>> +++ b/arch/arm/mach-exynos/mcpm-exynos.c
>> @@ -0,0 +1,345 @@
>> +/*
>> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
>> + *           http://www.samsung.com
>> + *
>> + * arch/arm/mach-exynos/mcpm-exynos.c
>> + *
>> + * Based on arch/arm/mach-vexpress/dcscb.c
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <linux/arm-cci.h>
>> +#include <linux/delay.h>
>> +#include <linux/io.h>
>> +#include <linux/of_address.h>
>> +
>> +#include <asm/cputype.h>
>> +#include <asm/cp15.h>
>> +#include <asm/mcpm.h>
>> +
>> +#include "regs-pmu.h"
>> +#include "common.h"
>> +
>> +#define EXYNOS5420_CPUS_PER_CLUSTER  4
>> +#define EXYNOS5420_NR_CLUSTERS               2
>> +
>> +/* Non-secure iRAM base address */
>> +static void __iomem *ns_sram_base_addr;
>> +
>> +/*
>> + * The common v7_exit_coherency_flush API could not be used because of the
>> + * Erratum 799270 workaround. This macro is the same as the common one (in
>> + * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
>> + */
>> +#define exynos_v7_exit_coherency_flush(level) \
>> +     asm volatile( \
>> +     "stmfd  sp!, {fp, ip}\n\t"\
>> +     "mrc    p15, 0, r0, c1, c0, 0   @ get SCTLR\n\t" \
>> +     "bic    r0, r0, #"__stringify(CR_C)"\n\t" \
>> +     "mcr    p15, 0, r0, c1, c0, 0   @ set SCTLR\n\t" \
>> +     "isb\n\t"\
>> +     "bl     v7_flush_dcache_"__stringify(level)"\n\t" \
>> +     "clrex\n\t"\
>> +     "mrc    p15, 0, r0, c1, c0, 1   @ get ACTLR\n\t" \
>> +     "bic    r0, r0, #(1 << 6)       @ disable local coherency\n\t" \
>> +     /* Dummy Load of a device register to avoid Erratum 799270 */ \
>> +     "ldr    r4, [%0]\n\t" \
>> +     "and    r4, r4, #0\n\t" \
>> +     "orr    r0, r0, r4\n\t" \
>> +     "mcr    p15, 0, r0, c1, c0, 1   @ set ACTLR\n\t" \
>> +     "isb\n\t" \
>> +     "dsb\n\t" \
>> +     "ldmfd  sp!, {fp, ip}" \
>> +     : \
>> +     : "Ir" (S5P_INFORM0) \
>> +     : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
>> +       "r9", "r10", "lr", "memory")
>> +
>> +/*
>> + * We can't use regular spinlocks. In the switcher case, it is possible
>> + * for an outbound CPU to call power_down() after its inbound counterpart
>> + * is already live using the same logical CPU number which trips lockdep
>> + * debugging.
>> + */
>> +static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
>> +static int
>> +cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
>> +
>> +#define exynos_cluster_unused(cluster) \
>> +     (!cpu_use_count[0][cluster] && \
>> +      !cpu_use_count[1][cluster] && \
>> +      !cpu_use_count[2][cluster] && \
>> +      !cpu_use_count[3][cluster])
>> +
>> +static int exynos_cluster_power_control(unsigned int cluster, int enable)
>> +{
>> +     unsigned int tries = 100;
>> +     unsigned int val;
>> +
>> +     if (enable) {
>> +             exynos_cluster_powerup(cluster);
>> +             val = S5P_CORE_LOCAL_PWR_EN;
>> +     } else {
>> +             exynos_cluster_powerdown(cluster);
>> +             val = 0;
>> +     }
>> +
>> +     /* Wait until cluster power control is applied */
>> +     while (tries--) {
>> +             if (exynos_cluster_power_state(cluster) == val)
>> +                     return 0;
>> +
>> +             cpu_relax();
>> +     }
>> +     pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
>> +             enable ? "on" : "off");
>> +
>> +     return -ETIMEDOUT;
>> +}
>> +
>> +static int exynos_power_up(unsigned int cpu, unsigned int cluster)
>> +{
>> +     unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
>> +     int err = 0;
>> +
>> +     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +     if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +             cluster >= EXYNOS5420_NR_CLUSTERS)
>> +             return -EINVAL;
>> +
>> +     /*
>> +      * Since this is called with IRQs enabled, and no arch_spin_lock_irq
>> +      * variant exists, we need to disable IRQs manually here.
>> +      */
>> +     local_irq_disable();
>> +     arch_spin_lock(&exynos_mcpm_lock);
>> +
>> +     cpu_use_count[cpu][cluster]++;
>> +     if (cpu_use_count[cpu][cluster] == 1) {
>> +             bool was_cluster_down =
>> +                     __mcpm_cluster_state(cluster) == CLUSTER_DOWN;
>
> This is racy.  I probably made this comment already.  The MCPM cluster
> state may change in mcpm-head.S where concurrency protection is achieved
> with a different mechanism.
>
> What you should do instead is to redefine exynos_cluster_unused() into
> exynos_cluster_usecnt() and simply add all counts together.  You could
> even have:
>
> #define exynos_cluster_unused(cluster) !exynos_cluster_usecnt(cluster)
>
> Yet, here you should use:
>
>         bool was_cluster_down = (exynos_cluster_usecnt(cluster) == 1);
Fixed as per suggestion.
>
>> +
>> +             /*
>> +              * Turn on the cluster (L2/COMMON) and then power on the
>> +              * cores.
>> +              */
>> +             if (was_cluster_down)
>> +                     err = exynos_cluster_power_control(cluster, 1);
>> +
>> +             if (!err)
>> +                     exynos_cpu_powerup(cpunr);
>> +             else
>> +                     exynos_cluster_power_control(cluster, 0);
>> +     } else if (cpu_use_count[cpu][cluster] != 2) {
>> +             /*
>> +              * The only possible values are:
>> +              * 0 = CPU down
>> +              * 1 = CPU (still) up
>> +              * 2 = CPU requested to be up before it had a chance
>> +              *     to actually make itself down.
>> +              * Any other value is a bug.
>> +              */
>> +             BUG();
>> +     }
>> +
>> +     arch_spin_unlock(&exynos_mcpm_lock);
>> +     local_irq_enable();
>> +
>> +     return err;
>> +}
>> +
>> +static void exynos_power_down(void)
>> +{
>> +     unsigned int mpidr, cpu, cluster;
>> +     bool last_man = false, skip_wfi = false;
>> +     unsigned int cpunr;
>> +
>> +     mpidr = read_cpuid_mpidr();
>> +     cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
>> +     cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
>> +     cpunr =  cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
>> +
>> +     pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
>> +     BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
>> +                     cluster >= EXYNOS5420_NR_CLUSTERS);
>> +
>> +     __mcpm_cpu_going_down(cpu, cluster);
>> +
>> +     arch_spin_lock(&exynos_mcpm_lock);
>> +     BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
>> +     cpu_use_count[cpu][cluster]--;
>> +     if (cpu_use_count[cpu][cluster] == 0) {
>> +             exynos_cpu_powerdown(cpunr);
>> +
>> +             if (exynos_cluster_unused(cluster))
>> +                     last_man = true;
>> +     } else if (cpu_use_count[cpu][cluster] == 1) {
>> +             /*
>> +              * A power_up request went ahead of us.
>> +              * Even if we do not want to shut this CPU down,
>> +              * the caller expects a certain state as if the WFI
>> +              * was aborted.  So let's continue with cache cleaning.
>> +              */
>> +             skip_wfi = true;
>> +     } else {
>> +             BUG();
>> +     }
>> +
>> +     /*
>> +      * TODO: Turn off the clusters when all cores in the cluster
>> +      * are down to achieve significant power savings.
>> +      */
>
> This comment should actually be located right after the
> "if (exynos_cluster_unused(cluster))" above.  That is where the cluster
> control should be applied, assuming it'll be effective only when WFI is
> executed.
OK.
>
>
>> +     if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
>> +             arch_spin_unlock(&exynos_mcpm_lock);
>> +
>> +             /* Flush all cache levels for this cluster. */
>> +             exynos_v7_exit_coherency_flush(all);
>> +
>> +             /*
>> +              * Disable cluster-level coherency by masking
>> +              * incoming snoops and DVM messages:
>> +              */
>> +             cci_disable_port_by_cpu(mpidr);
>> +
>> +             __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
>> +     } else {
>> +             arch_spin_unlock(&exynos_mcpm_lock);
>> +
>> +             if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
>> +                     /*
>> +                      * On the Cortex-A15 we need to disable
>> +                      * L2 prefetching before flushing the cache.
>> +                      */
>> +                     asm volatile(
>> +                     "mcr    p15, 1, %0, c15, c0, 3\n\t"
>> +                     "isb\n\t"
>> +                     "dsb"
>> +                     : : "r" (0x400));
>> +             }
>
> This doesn't belong here.  That is for the last_man only to do, right
> before the "Flush all cache levels for this cluster" comment.
This was a bad miss on my part. Will fix.
>
> The rest looks fine to me.
Will post v5 soon.

Regards,
Abhilash
>
>
> Nicolas
Abhilash Kesavan May 5, 2014, 4:26 p.m. UTC | #5
Hi Andrew,

On Fri, May 2, 2014 at 11:53 PM, Andrew Bresticker
<abrestic@chromium.org> wrote:
>>> +     /*
>>> +      * TODO: Turn off the clusters when all cores in the cluster
>>> +      * are down to achieve significant power savings.
>>> +      */
>>
>> This comment should actually be located right after the
>> "if (exynos_cluster_unused(cluster))" above.  That is where the cluster
>> control should be applied, assuming it'll be effective only when WFI is
>> executed.
>
> Correct me if I'm wrong Samsung folks, but I thought it was not
> possible to apply cluster power control from a CPU within the cluster
> being powered down, i.e. a CPU in the other cluster must be the one to
> apply the cluster power control to power down the outbound cluster.
I was under the same impression until quite recently. However, based
on inputs from the hardware team, there are bits available (in the
*COMMON_OPTION register) that ensure "ARM_CORE0~3 are turned off
earlier and then ARM_COMMON_L2 is turned off finally". This allows us
to turn off the cluster from a cpu of the same cluster. We have used
these bits in our cluster power down cpuidle state implementation as
well.


Regards,
Abhilash
>
> -Andrew
diff mbox

Patch

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 5c34dc2..138070e 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -73,4 +73,12 @@  config SOC_EXYNOS5440
 
 endmenu
 
+config EXYNOS5420_MCPM
+	bool "Exynos5420 Multi-Cluster PM support"
+	depends on MCPM && SOC_EXYNOS5420
+	select ARM_CCI
+	help
+	  This is needed to provide CPU and cluster power management
+	  on Exynos5420 implementing big.LITTLE.
+
 endif
diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
index a656dbe..01bc9b9 100644
--- a/arch/arm/mach-exynos/Makefile
+++ b/arch/arm/mach-exynos/Makefile
@@ -29,3 +29,5 @@  obj-$(CONFIG_ARCH_EXYNOS)	+= firmware.o
 
 plus_sec := $(call as-instr,.arch_extension sec,+sec)
 AFLAGS_exynos-smc.o		:=-Wa,-march=armv7-a$(plus_sec)
+
+obj-$(CONFIG_EXYNOS5420_MCPM)	+= mcpm-exynos.o
diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c
new file mode 100644
index 0000000..d0f7461
--- /dev/null
+++ b/arch/arm/mach-exynos/mcpm-exynos.c
@@ -0,0 +1,345 @@ 
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * arch/arm/mach-exynos/mcpm-exynos.c
+ *
+ * Based on arch/arm/mach-vexpress/dcscb.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/arm-cci.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+
+#include <asm/cputype.h>
+#include <asm/cp15.h>
+#include <asm/mcpm.h>
+
+#include "regs-pmu.h"
+#include "common.h"
+
+#define EXYNOS5420_CPUS_PER_CLUSTER	4
+#define EXYNOS5420_NR_CLUSTERS		2
+
+/* Non-secure iRAM base address */
+static void __iomem *ns_sram_base_addr;
+
+/*
+ * The common v7_exit_coherency_flush API could not be used because of the
+ * Erratum 799270 workaround. This macro is the same as the common one (in
+ * arch/arm/include/asm/cacheflush.h) except for the erratum handling.
+ */
+#define exynos_v7_exit_coherency_flush(level) \
+	asm volatile( \
+	"stmfd	sp!, {fp, ip}\n\t"\
+	"mrc	p15, 0, r0, c1, c0, 0	@ get SCTLR\n\t" \
+	"bic	r0, r0, #"__stringify(CR_C)"\n\t" \
+	"mcr	p15, 0, r0, c1, c0, 0	@ set SCTLR\n\t" \
+	"isb\n\t"\
+	"bl	v7_flush_dcache_"__stringify(level)"\n\t" \
+	"clrex\n\t"\
+	"mrc	p15, 0, r0, c1, c0, 1	@ get ACTLR\n\t" \
+	"bic	r0, r0, #(1 << 6)	@ disable local coherency\n\t" \
+	/* Dummy Load of a device register to avoid Erratum 799270 */ \
+	"ldr	r4, [%0]\n\t" \
+	"and	r4, r4, #0\n\t" \
+	"orr	r0, r0, r4\n\t" \
+	"mcr	p15, 0, r0, c1, c0, 1	@ set ACTLR\n\t" \
+	"isb\n\t" \
+	"dsb\n\t" \
+	"ldmfd	sp!, {fp, ip}" \
+	: \
+	: "Ir" (S5P_INFORM0) \
+	: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
+	  "r9", "r10", "lr", "memory")
+
+/*
+ * We can't use regular spinlocks. In the switcher case, it is possible
+ * for an outbound CPU to call power_down() after its inbound counterpart
+ * is already live using the same logical CPU number which trips lockdep
+ * debugging.
+ */
+static arch_spinlock_t exynos_mcpm_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+static int
+cpu_use_count[EXYNOS5420_CPUS_PER_CLUSTER][EXYNOS5420_NR_CLUSTERS];
+
+#define exynos_cluster_unused(cluster) \
+	(!cpu_use_count[0][cluster] && \
+	 !cpu_use_count[1][cluster] && \
+	 !cpu_use_count[2][cluster] && \
+	 !cpu_use_count[3][cluster])
+
+static int exynos_cluster_power_control(unsigned int cluster, int enable)
+{
+	unsigned int tries = 100;
+	unsigned int val;
+
+	if (enable) {
+		exynos_cluster_powerup(cluster);
+		val = S5P_CORE_LOCAL_PWR_EN;
+	} else {
+		exynos_cluster_powerdown(cluster);
+		val = 0;
+	}
+
+	/* Wait until cluster power control is applied */
+	while (tries--) {
+		if (exynos_cluster_power_state(cluster) == val)
+			return 0;
+
+		cpu_relax();
+	}
+	pr_warn("timed out waiting for cluster %u to power %s\n", cluster,
+		enable ? "on" : "off");
+
+	return -ETIMEDOUT;
+}
+
+static int exynos_power_up(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
+	int err = 0;
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	if (cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
+		cluster >= EXYNOS5420_NR_CLUSTERS)
+		return -EINVAL;
+
+	/*
+	 * Since this is called with IRQs enabled, and no arch_spin_lock_irq
+	 * variant exists, we need to disable IRQs manually here.
+	 */
+	local_irq_disable();
+	arch_spin_lock(&exynos_mcpm_lock);
+
+	cpu_use_count[cpu][cluster]++;
+	if (cpu_use_count[cpu][cluster] == 1) {
+		bool was_cluster_down =
+			__mcpm_cluster_state(cluster) == CLUSTER_DOWN;
+
+		/*
+		 * Turn on the cluster (L2/COMMON) and then power on the
+		 * cores.
+		 */
+		if (was_cluster_down)
+			err = exynos_cluster_power_control(cluster, 1);
+
+		if (!err)
+			exynos_cpu_powerup(cpunr);
+		else
+			exynos_cluster_power_control(cluster, 0);
+	} else if (cpu_use_count[cpu][cluster] != 2) {
+		/*
+		 * The only possible values are:
+		 * 0 = CPU down
+		 * 1 = CPU (still) up
+		 * 2 = CPU requested to be up before it had a chance
+		 *     to actually make itself down.
+		 * Any other value is a bug.
+		 */
+		BUG();
+	}
+
+	arch_spin_unlock(&exynos_mcpm_lock);
+	local_irq_enable();
+
+	return err;
+}
+
+static void exynos_power_down(void)
+{
+	unsigned int mpidr, cpu, cluster;
+	bool last_man = false, skip_wfi = false;
+	unsigned int cpunr;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	cpunr =  cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
+			cluster >= EXYNOS5420_NR_CLUSTERS);
+
+	__mcpm_cpu_going_down(cpu, cluster);
+
+	arch_spin_lock(&exynos_mcpm_lock);
+	BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP);
+	cpu_use_count[cpu][cluster]--;
+	if (cpu_use_count[cpu][cluster] == 0) {
+		exynos_cpu_powerdown(cpunr);
+
+		if (exynos_cluster_unused(cluster))
+			last_man = true;
+	} else if (cpu_use_count[cpu][cluster] == 1) {
+		/*
+		 * A power_up request went ahead of us.
+		 * Even if we do not want to shut this CPU down,
+		 * the caller expects a certain state as if the WFI
+		 * was aborted.  So let's continue with cache cleaning.
+		 */
+		skip_wfi = true;
+	} else {
+		BUG();
+	}
+
+	/*
+	 * TODO: Turn off the clusters when all cores in the cluster
+	 * are down to achieve significant power savings.
+	 */
+	if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
+		arch_spin_unlock(&exynos_mcpm_lock);
+
+		/* Flush all cache levels for this cluster. */
+		exynos_v7_exit_coherency_flush(all);
+
+		/*
+		 * Disable cluster-level coherency by masking
+		 * incoming snoops and DVM messages:
+		 */
+		cci_disable_port_by_cpu(mpidr);
+
+		__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	} else {
+		arch_spin_unlock(&exynos_mcpm_lock);
+
+		if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+			/*
+			 * On the Cortex-A15 we need to disable
+			 * L2 prefetching before flushing the cache.
+			 */
+			asm volatile(
+			"mcr	p15, 1, %0, c15, c0, 3\n\t"
+			"isb\n\t"
+			"dsb"
+			: : "r" (0x400));
+		}
+
+		/* Disable and flush the local CPU cache. */
+		exynos_v7_exit_coherency_flush(louis);
+	}
+
+	__mcpm_cpu_down(cpu, cluster);
+
+	/* Now we are prepared for power-down, do it: */
+	if (!skip_wfi)
+		wfi();
+
+	/* Not dead at this point?  Let our caller cope. */
+}
+
+static int exynos_power_down_finish(unsigned int cpu, unsigned int cluster)
+{
+	unsigned int tries = 100;
+	unsigned int cpunr = cpu + (cluster * EXYNOS5420_CPUS_PER_CLUSTER);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER ||
+			cluster >= EXYNOS5420_NR_CLUSTERS);
+
+	/* Wait for the core state to be OFF */
+	while (tries--) {
+		if (ACCESS_ONCE(cpu_use_count[cpu][cluster]) == 0) {
+			if ((exynos_cpu_power_state(cpunr) == 0))
+				return 0; /* success: the CPU is halted */
+		}
+
+		/* Otherwise, wait and retry: */
+		msleep(1);
+	}
+
+	return -ETIMEDOUT; /* timeout */
+}
+
+static const struct mcpm_platform_ops exynos_power_ops = {
+	.power_up		= exynos_power_up,
+	.power_down		= exynos_power_down,
+	.power_down_finish	= exynos_power_down_finish,
+};
+
+static void __init exynos_mcpm_usage_count_init(void)
+{
+	unsigned int mpidr, cpu, cluster;
+
+	mpidr = read_cpuid_mpidr();
+	cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cpu >= EXYNOS5420_CPUS_PER_CLUSTER  ||
+			cluster >= EXYNOS5420_NR_CLUSTERS);
+
+	cpu_use_count[cpu][cluster] = 1;
+}
+
+/*
+ * Enable cluster-level coherency, in preparation for turning on the MMU.
+ */
+static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
+{
+	asm volatile ("\n"
+	"cmp	r0, #1\n"
+	"bxne	lr\n"
+	"b	cci_enable_port_for_self");
+}
+
+static int __init exynos_mcpm_init(void)
+{
+	struct device_node *node;
+	int ret = 0;
+
+	node = of_find_compatible_node(NULL, NULL, "samsung,exynos5420");
+	if (!node)
+		return -ENODEV;
+	of_node_put(node);
+
+	if (!cci_probed())
+		return -ENODEV;
+
+	node = of_find_compatible_node(NULL, NULL,
+			"samsung,exynos4210-sram-ns");
+	if (!node)
+		return -ENODEV;
+
+	ns_sram_base_addr = of_iomap(node, 0);
+	of_node_put(node);
+	if (!ns_sram_base_addr) {
+		pr_err("failed to map non-secure iRAM base address\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * To increase the stability of KFC reset we need to program
+	 * the PMU SPARE3 register
+	 */
+	__raw_writel(EXYNOS5420_SWRESET_KFC_SEL, S5P_PMU_SPARE3);
+
+	exynos_mcpm_usage_count_init();
+
+	ret = mcpm_platform_register(&exynos_power_ops);
+	if (!ret)
+		ret = mcpm_sync_init(exynos_pm_power_up_setup);
+	if (ret) {
+		iounmap(ns_sram_base_addr);
+		return ret;
+	}
+
+	mcpm_smp_set_ops();
+
+	pr_info("Exynos MCPM support installed\n");
+
+	/*
+	 * Future entries into the kernel can now go
+	 * through the cluster entry vectors.
+	 */
+	__raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 0x1c);
+
+	return ret;
+}
+
+early_initcall(exynos_mcpm_init);
diff --git a/arch/arm/mach-exynos/regs-pmu.h b/arch/arm/mach-exynos/regs-pmu.h
index 6685ebf..f44d318 100644
--- a/arch/arm/mach-exynos/regs-pmu.h
+++ b/arch/arm/mach-exynos/regs-pmu.h
@@ -38,6 +38,7 @@ 
 #define S5P_INFORM5				S5P_PMUREG(0x0814)
 #define S5P_INFORM6				S5P_PMUREG(0x0818)
 #define S5P_INFORM7				S5P_PMUREG(0x081C)
+#define S5P_PMU_SPARE3				S5P_PMUREG(0x090C)
 
 #define S5P_ARM_CORE0_LOWPWR			S5P_PMUREG(0x1000)
 #define S5P_DIS_IRQ_CORE0			S5P_PMUREG(0x1004)
@@ -325,4 +326,6 @@ 
 
 #define EXYNOS5_OPTION_USE_RETENTION				(1 << 4)
 
+#define EXYNOS5420_SWRESET_KFC_SEL				0x3
+
 #endif /* __ASM_ARCH_REGS_PMU_H */