diff mbox

[1/3] ARM: EXYNOS: remove non-working AFTR mode support

Message ID 1372241627-22695-2-git-send-email-b.zolnierkie@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bartlomiej Zolnierkiewicz June 26, 2013, 10:13 a.m. UTC
AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
in v3.4 kernel it hasn't worked as supposed and this is still the case
with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
not turned on by default):

- on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
  (on this revision only one core is usable so entry to AFTR mode is
  always attempted because the code tries to go into AFTR mode when all
  other CPUs except CPU0 are offlined)

- on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
  CPU1 is offlined (i.e. echo 0 > /sys/devices/system/cpu/cpu1/online)

- on later Exynos4/5 SoCs wrong registers may be accessed when all CPUs
  except CPU0 are offlined resulting in panic/lockup (REG_DIRECTGO_ADDR
  and REG_DIRECTGO_FLAG register selections was implemented only for
  Exynos4210)

Just remove AFTR mode support for now.

Cc: Jaecheol Lee <jc.lee@samsung.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 arch/arm/mach-exynos/cpuidle.c | 131 +----------------------------------------
 1 file changed, 1 insertion(+), 130 deletions(-)

Comments

Daniel Lezcano June 26, 2013, 10:36 a.m. UTC | #1
On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
> in v3.4 kernel it hasn't worked as supposed and this is still the case
> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
> not turned on by default):
> 
> - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
>   (on this revision only one core is usable so entry to AFTR mode is
>   always attempted because the code tries to go into AFTR mode when all
>   other CPUs except CPU0 are offlined)
> 
> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
>   CPU1 is offlined (i.e. echo 0 > /sys/devices/system/cpu/cpu1/online)
> 
> - on later Exynos4/5 SoCs wrong registers may be accessed when all CPUs
>   except CPU0 are offlined resulting in panic/lockup (REG_DIRECTGO_ADDR
>   and REG_DIRECTGO_FLAG register selections was implemented only for
>   Exynos4210)
> 
> Just remove AFTR mode support for now.

Ok, I will jump on the opportunity to talk about this state.

1. I tried different ways to make the AFTR state to be entered with
*both* cpu online. It goes successfully to this state. The CPU0 is
correctly woken up but the CPU1 is never woken up, why is it happening ?

https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518

2. The CPU1 hotplug bug should been fixed and if I am not wrong there is
a patch somewhere fixing this.

https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382

3. What is the fix for Exynos5 ?

https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253

It sounds like depending on Hypervisor mode is enabled or not, the AFTR
does not work correctly.

In other words, instead of removing the AFTR state I suggest to fix it:
both core being online, split driver for exynos4 and 5.

Thanks
  -- Daniel

> Cc: Jaecheol Lee <jc.lee@samsung.com>
> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
> Cc: Tomasz Figa <t.figa@samsung.com>
> Cc: Kukjin Kim <kgene.kim@samsung.com>
> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
> ---
>  arch/arm/mach-exynos/cpuidle.c | 131 +----------------------------------------
>  1 file changed, 1 insertion(+), 130 deletions(-)
> 
> diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
> index 17a18ff..0a657ac 100644
> --- a/arch/arm/mach-exynos/cpuidle.c
> +++ b/arch/arm/mach-exynos/cpuidle.c
> @@ -17,30 +17,11 @@
>  #include <linux/time.h>
>  
>  #include <asm/proc-fns.h>
> -#include <asm/smp_scu.h>
> -#include <asm/suspend.h>
> -#include <asm/unified.h>
>  #include <asm/cpuidle.h>
>  #include <mach/regs-clock.h>
> -#include <mach/regs-pmu.h>
> -
> -#include <plat/cpu.h>
>  
>  #include "common.h"
>  
> -#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> -			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> -			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
> -#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> -			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> -			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
> -
> -#define S5P_CHECK_AFTR		0xFCBA0D10
> -
> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> -				struct cpuidle_driver *drv,
> -				int index);
> -
>  static DEFINE_PER_CPU(struct cpuidle_device, exynos4_cpuidle_device);
>  
>  static struct cpuidle_driver exynos4_idle_driver = {
> @@ -48,117 +29,11 @@ static struct cpuidle_driver exynos4_idle_driver = {
>  	.owner			= THIS_MODULE,
>  	.states = {
>  		[0] = ARM_CPUIDLE_WFI_STATE,
> -		[1] = {
> -			.enter			= exynos4_enter_lowpower,
> -			.exit_latency		= 300,
> -			.target_residency	= 100000,
> -			.flags			= CPUIDLE_FLAG_TIME_VALID,
> -			.name			= "C1",
> -			.desc			= "ARM power down",
> -		},
>  	},
> -	.state_count = 2,
> +	.state_count = 1,
>  	.safe_state_index = 0,
>  };
>  
> -/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
> -static void exynos4_set_wakeupmask(void)
> -{
> -	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
> -}
> -
> -static unsigned int g_pwr_ctrl, g_diag_reg;
> -
> -static void save_cpu_arch_register(void)
> -{
> -	/*read power control register*/
> -	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
> -	/*read diagnostic register*/
> -	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
> -	return;
> -}
> -
> -static void restore_cpu_arch_register(void)
> -{
> -	/*write power control register*/
> -	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
> -	/*write diagnostic register*/
> -	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
> -	return;
> -}
> -
> -static int idle_finisher(unsigned long flags)
> -{
> -	cpu_do_idle();
> -	return 1;
> -}
> -
> -static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
> -				struct cpuidle_driver *drv,
> -				int index)
> -{
> -	unsigned long tmp;
> -
> -	exynos4_set_wakeupmask();
> -
> -	/* Set value of power down register for aftr mode */
> -	exynos_sys_powerdown_conf(SYS_AFTR);
> -
> -	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
> -	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
> -
> -	save_cpu_arch_register();
> -
> -	/* Setting Central Sequence Register for power down mode */
> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> -	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
> -	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> -
> -	cpu_pm_enter();
> -	cpu_suspend(0, idle_finisher);
> -
> -#ifdef CONFIG_SMP
> -	if (!soc_is_exynos5250())
> -		scu_enable(S5P_VA_SCU);
> -#endif
> -	cpu_pm_exit();
> -
> -	restore_cpu_arch_register();
> -
> -	/*
> -	 * If PMU failed while entering sleep mode, WFI will be
> -	 * ignored by PMU and then exiting cpu_do_idle().
> -	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
> -	 * in this situation.
> -	 */
> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> -	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
> -		tmp |= S5P_CENTRAL_LOWPWR_CFG;
> -		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> -	}
> -
> -	/* Clear wakeup state register */
> -	__raw_writel(0x0, S5P_WAKEUP_STAT);
> -
> -	return index;
> -}
> -
> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> -				struct cpuidle_driver *drv,
> -				int index)
> -{
> -	int new_index = index;
> -
> -	/* This mode only can be entered when other core's are offline */
> -	if (num_online_cpus() > 1)
> -		new_index = drv->safe_state_index;
> -
> -	if (new_index == 0)
> -		return arm_cpuidle_simple_enter(dev, drv, new_index);
> -	else
> -		return exynos4_enter_core0_aftr(dev, drv, new_index);
> -}
> -
>  static void __init exynos5_core_down_clk(void)
>  {
>  	unsigned int tmp;
> @@ -209,10 +84,6 @@ static int __init exynos4_init_cpuidle(void)
>  		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
>  		device->cpu = cpu_id;
>  
> -		/* Support IDLE only */
> -		if (cpu_id != 0)
> -			device->state_count = 1;
> -
>  		ret = cpuidle_register_device(device);
>  		if (ret) {
>  			printk(KERN_ERR "CPUidle register device failed\n");
>
Bartlomiej Zolnierkiewicz June 27, 2013, 6:10 p.m. UTC | #2
On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> > AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
> > support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
> > in v3.4 kernel it hasn't worked as supposed and this is still the case
> > with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
> > not turned on by default):
> > 
> > - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
> >   (on this revision only one core is usable so entry to AFTR mode is
> >   always attempted because the code tries to go into AFTR mode when all
> >   other CPUs except CPU0 are offlined)
> > 
> > - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
> >   CPU1 is offlined (i.e. echo 0 > /sys/devices/system/cpu/cpu1/online)
> > 
> > - on later Exynos4/5 SoCs wrong registers may be accessed when all CPUs
> >   except CPU0 are offlined resulting in panic/lockup (REG_DIRECTGO_ADDR
> >   and REG_DIRECTGO_FLAG register selections was implemented only for
> >   Exynos4210)
> > 
> > Just remove AFTR mode support for now.
> 
> Ok, I will jump on the opportunity to talk about this state.
> 
> 1. I tried different ways to make the AFTR state to be entered with
> *both* cpu online. It goes successfully to this state. The CPU0 is
> correctly woken up but the CPU1 is never woken up, why is it happening ?
> 
> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518

No idea here, AFTR doesn't work for me with upstream kernels even if
only one CPU is online.

Also the documentation says that before entering system-level power-down
mode (such as AFTR) when multiple CPUs cores are used all other CPU cores
should stop interrupt service so I'm not sure how the way attempted by
you should work.

> 2. The CPU1 hotplug bug should been fixed and if I am not wrong there is
> a patch somewhere fixing this.
> 
> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382

Unfortunately none of the patches there helps with my issues.

> 3. What is the fix for Exynos5 ?
> 
> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
> 
> It sounds like depending on Hypervisor mode is enabled or not, the AFTR
> does not work correctly.

Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
may be different than EXYNOS4210 defaults (at least on EXYNOS4412 they
indeed are different, unfortunately I lack any info needed for EXYNOS5
support). You are lucky that it even works in some cases on EXYNOS5250.

> In other words, instead of removing the AFTR state I suggest to fix it:
> both core being online, split driver for exynos4 and 5.

My main problem is that with the upstream kernel even on EXYNOS4210
rev0 (only one core useable due to hardware issues) the kernel goes
into AFTR state for the first few times during boot and then it just
lockups (after going into cpu_do_idle() which is really cpu_v7_do_idle()
and which does wfi call) and doesn't wake up CPU0. I have currently
no idea how to fix or debug it further.

The issue happens with every upstream kernel version tried (from v3.4
to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
offlined by hand and then cpuidle driver tries to go into AFTR mode
(because by default it doesn't go into AFTR mode on any SoC except
EXYNOS4210 rev0).

I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
mode has never worked (even on hardware that it was originally developed
on) since its introduction in v3.4 (which was released on 20th May 2012).

IOW for over the year nobody cared to make it work and I have currently
no fix at hand so the corrent upstream resolution is to just remove the
known non-working code and re-introduce it later when/if needed (I can
just disable it with a small fix but we don't keep such long-term broken
code as placeholder in the upstream kernel).  If left as it is people
can hit the known issues and waste time debugging them, just like this
happenend for me [1].

If you have AFTR mode working (especially on EXYNOS4210) in Linaro
kernels please get fixes upstream ASAP. However I still wonder whether
the maintanance nightmare (bugs for different cases in your launchpad)
is worth gains over standard idle mode as the rumor around here is that
they are not that great (unfortunately no numbers were provided during
original feature addition).

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

[1] Somebody enabled CONFIG_CPU_IDLE in the default config in one of our
internal kernel trees on a target on which this feature "works" (since
CPUs are not hot-unplugged by default on all targets except EXYNOS4210
rev0 the code for AFTR is not used) and resulted in lockups on boot on
my default testing target. It took my long time to find out that problem
is actually caused by just enabling exynos cpuidle driver.

> Thanks
>   -- Daniel
> 
> > Cc: Jaecheol Lee <jc.lee@samsung.com>
> > Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> > Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
> > Cc: Tomasz Figa <t.figa@samsung.com>
> > Cc: Kukjin Kim <kgene.kim@samsung.com>
> > Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> > Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> > Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> > Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
> > ---
> >  arch/arm/mach-exynos/cpuidle.c | 131 +----------------------------------------
> >  1 file changed, 1 insertion(+), 130 deletions(-)
> > 
> > diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
> > index 17a18ff..0a657ac 100644
> > --- a/arch/arm/mach-exynos/cpuidle.c
> > +++ b/arch/arm/mach-exynos/cpuidle.c
> > @@ -17,30 +17,11 @@
> >  #include <linux/time.h>
> >  
> >  #include <asm/proc-fns.h>
> > -#include <asm/smp_scu.h>
> > -#include <asm/suspend.h>
> > -#include <asm/unified.h>
> >  #include <asm/cpuidle.h>
> >  #include <mach/regs-clock.h>
> > -#include <mach/regs-pmu.h>
> > -
> > -#include <plat/cpu.h>
> >  
> >  #include "common.h"
> >  
> > -#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> > -			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> > -			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
> > -#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> > -			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> > -			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
> > -
> > -#define S5P_CHECK_AFTR		0xFCBA0D10
> > -
> > -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> > -				struct cpuidle_driver *drv,
> > -				int index);
> > -
> >  static DEFINE_PER_CPU(struct cpuidle_device, exynos4_cpuidle_device);
> >  
> >  static struct cpuidle_driver exynos4_idle_driver = {
> > @@ -48,117 +29,11 @@ static struct cpuidle_driver exynos4_idle_driver = {
> >  	.owner			= THIS_MODULE,
> >  	.states = {
> >  		[0] = ARM_CPUIDLE_WFI_STATE,
> > -		[1] = {
> > -			.enter			= exynos4_enter_lowpower,
> > -			.exit_latency		= 300,
> > -			.target_residency	= 100000,
> > -			.flags			= CPUIDLE_FLAG_TIME_VALID,
> > -			.name			= "C1",
> > -			.desc			= "ARM power down",
> > -		},
> >  	},
> > -	.state_count = 2,
> > +	.state_count = 1,
> >  	.safe_state_index = 0,
> >  };
> >  
> > -/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
> > -static void exynos4_set_wakeupmask(void)
> > -{
> > -	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
> > -}
> > -
> > -static unsigned int g_pwr_ctrl, g_diag_reg;
> > -
> > -static void save_cpu_arch_register(void)
> > -{
> > -	/*read power control register*/
> > -	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
> > -	/*read diagnostic register*/
> > -	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
> > -	return;
> > -}
> > -
> > -static void restore_cpu_arch_register(void)
> > -{
> > -	/*write power control register*/
> > -	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
> > -	/*write diagnostic register*/
> > -	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
> > -	return;
> > -}
> > -
> > -static int idle_finisher(unsigned long flags)
> > -{
> > -	cpu_do_idle();
> > -	return 1;
> > -}
> > -
> > -static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
> > -				struct cpuidle_driver *drv,
> > -				int index)
> > -{
> > -	unsigned long tmp;
> > -
> > -	exynos4_set_wakeupmask();
> > -
> > -	/* Set value of power down register for aftr mode */
> > -	exynos_sys_powerdown_conf(SYS_AFTR);
> > -
> > -	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
> > -	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
> > -
> > -	save_cpu_arch_register();
> > -
> > -	/* Setting Central Sequence Register for power down mode */
> > -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> > -	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
> > -	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> > -
> > -	cpu_pm_enter();
> > -	cpu_suspend(0, idle_finisher);
> > -
> > -#ifdef CONFIG_SMP
> > -	if (!soc_is_exynos5250())
> > -		scu_enable(S5P_VA_SCU);
> > -#endif
> > -	cpu_pm_exit();
> > -
> > -	restore_cpu_arch_register();
> > -
> > -	/*
> > -	 * If PMU failed while entering sleep mode, WFI will be
> > -	 * ignored by PMU and then exiting cpu_do_idle().
> > -	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
> > -	 * in this situation.
> > -	 */
> > -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> > -	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
> > -		tmp |= S5P_CENTRAL_LOWPWR_CFG;
> > -		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> > -	}
> > -
> > -	/* Clear wakeup state register */
> > -	__raw_writel(0x0, S5P_WAKEUP_STAT);
> > -
> > -	return index;
> > -}
> > -
> > -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> > -				struct cpuidle_driver *drv,
> > -				int index)
> > -{
> > -	int new_index = index;
> > -
> > -	/* This mode only can be entered when other core's are offline */
> > -	if (num_online_cpus() > 1)
> > -		new_index = drv->safe_state_index;
> > -
> > -	if (new_index == 0)
> > -		return arm_cpuidle_simple_enter(dev, drv, new_index);
> > -	else
> > -		return exynos4_enter_core0_aftr(dev, drv, new_index);
> > -}
> > -
> >  static void __init exynos5_core_down_clk(void)
> >  {
> >  	unsigned int tmp;
> > @@ -209,10 +84,6 @@ static int __init exynos4_init_cpuidle(void)
> >  		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
> >  		device->cpu = cpu_id;
> >  
> > -		/* Support IDLE only */
> > -		if (cpu_id != 0)
> > -			device->state_count = 1;
> > -
> >  		ret = cpuidle_register_device(device);
> >  		if (ret) {
> >  			printk(KERN_ERR "CPUidle register device failed\n");
Daniel Lezcano June 28, 2013, 9:57 a.m. UTC | #3
On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
> On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
>> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
>>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
>>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
>>> in v3.4 kernel it hasn't worked as supposed and this is still the case
>>> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
>>> not turned on by default):
>>>
>>> - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
>>>   (on this revision only one core is usable so entry to AFTR mode is
>>>   always attempted because the code tries to go into AFTR mode when all
>>>   other CPUs except CPU0 are offlined)
>>>
>>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
>>>   CPU1 is offlined (i.e. echo 0 > /sys/devices/system/cpu/cpu1/online)
>>>
>>> - on later Exynos4/5 SoCs wrong registers may be accessed when all CPUs
>>>   except CPU0 are offlined resulting in panic/lockup (REG_DIRECTGO_ADDR
>>>   and REG_DIRECTGO_FLAG register selections was implemented only for
>>>   Exynos4210)
>>>
>>> Just remove AFTR mode support for now.
>>
>> Ok, I will jump on the opportunity to talk about this state.
>>
>> 1. I tried different ways to make the AFTR state to be entered with
>> *both* cpu online. It goes successfully to this state. The CPU0 is
>> correctly woken up but the CPU1 is never woken up, why is it happening ?
>>
>> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518
> 
> No idea here, AFTR doesn't work for me with upstream kernels even if
> only one CPU is online.

What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
state is never reached ?

> Also the documentation says that before entering system-level power-down
> mode (such as AFTR) when multiple CPUs cores are used all other CPU cores
> should stop interrupt service so I'm not sure how the way attempted by
> you should work.

The cpu enters the idle mode with the interrupts disabled.

>> 2. The CPU1 hotplug bug should been fixed and if I am not wrong there is
>> a patch somewhere fixing this.
>>
>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
> 
> Unfortunately none of the patches there helps with my issues.
> 
>> 3. What is the fix for Exynos5 ?
>>
>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
>>
>> It sounds like depending on Hypervisor mode is enabled or not, the AFTR
>> does not work correctly.
> 
> Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
> registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
> may be different than EXYNOS4210 defaults (at least on EXYNOS4412 they
> indeed are different, unfortunately I lack any info needed for EXYNOS5
> support). You are lucky that it even works in some cases on EXYNOS5250.
> 
>> In other words, instead of removing the AFTR state I suggest to fix it:
>> both core being online, split driver for exynos4 and 5.
> 
> My main problem is that with the upstream kernel even on EXYNOS4210
> rev0 (only one core useable due to hardware issues) the kernel goes
> into AFTR state for the first few times during boot and then it just
> lockups (after going into cpu_do_idle() which is really cpu_v7_do_idle()
> and which does wfi call) and doesn't wake up CPU0. I have currently
> no idea how to fix or debug it further.

I have an Origen 4210 board Ver A. and it works without problem with the
AFTR mode (cpu1 unplugged).

> The issue happens with every upstream kernel version tried (from v3.4
> to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
> offlined by hand and then cpuidle driver tries to go into AFTR mode
> (because by default it doesn't go into AFTR mode on any SoC except
> EXYNOS4210 rev0).
> 
> I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
> mode has never worked (even on hardware that it was originally developed
> on) since its introduction in v3.4 (which was released on 20th May 2012).
> 
> IOW for over the year nobody cared to make it work and I have currently
> no fix at hand so the corrent upstream resolution is to just remove the
> known non-working code and re-introduce it later when/if needed (I can
> just disable it with a small fix but we don't keep such long-term broken
> code as placeholder in the upstream kernel).  If left as it is people
> can hit the known issues and waste time debugging them, just like this
> happenend for me [1].
> 
> If you have AFTR mode working (especially on EXYNOS4210) in Linaro
> kernels please get fixes upstream ASAP. However I still wonder whether
> the maintanance nightmare (bugs for different cases in your launchpad)
> is worth gains over standard idle mode as the rumor around here is that
> they are not that great (unfortunately no numbers were provided during
> original feature addition).

It works forme with a vanilla kernel 3.10.0-rc7.

Removing a feature because it seems not working is not a good solution.
The right way is to investigate what is happening and why.

Thanks
  -- Daniel

> --
> Bartlomiej Zolnierkiewicz
> Samsung R&D Institute Poland
> Samsung Electronics
> 
> [1] Somebody enabled CONFIG_CPU_IDLE in the default config in one of our
> internal kernel trees on a target on which this feature "works" (since
> CPUs are not hot-unplugged by default on all targets except EXYNOS4210
> rev0 the code for AFTR is not used) and resulted in lockups on boot on
> my default testing target. It took my long time to find out that problem
> is actually caused by just enabling exynos cpuidle driver.
> 
>> Thanks
>>   -- Daniel
>>
>>> Cc: Jaecheol Lee <jc.lee@samsung.com>
>>> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
>>> Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
>>> Cc: Tomasz Figa <t.figa@samsung.com>
>>> Cc: Kukjin Kim <kgene.kim@samsung.com>
>>> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
>>> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
>>> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
>>> Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
>>> ---
>>>  arch/arm/mach-exynos/cpuidle.c | 131 +----------------------------------------
>>>  1 file changed, 1 insertion(+), 130 deletions(-)
>>>
>>> diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
>>> index 17a18ff..0a657ac 100644
>>> --- a/arch/arm/mach-exynos/cpuidle.c
>>> +++ b/arch/arm/mach-exynos/cpuidle.c
>>> @@ -17,30 +17,11 @@
>>>  #include <linux/time.h>
>>>  
>>>  #include <asm/proc-fns.h>
>>> -#include <asm/smp_scu.h>
>>> -#include <asm/suspend.h>
>>> -#include <asm/unified.h>
>>>  #include <asm/cpuidle.h>
>>>  #include <mach/regs-clock.h>
>>> -#include <mach/regs-pmu.h>
>>> -
>>> -#include <plat/cpu.h>
>>>  
>>>  #include "common.h"
>>>  
>>> -#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
>>> -			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
>>> -			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
>>> -#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
>>> -			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
>>> -			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
>>> -
>>> -#define S5P_CHECK_AFTR		0xFCBA0D10
>>> -
>>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
>>> -				struct cpuidle_driver *drv,
>>> -				int index);
>>> -
>>>  static DEFINE_PER_CPU(struct cpuidle_device, exynos4_cpuidle_device);
>>>  
>>>  static struct cpuidle_driver exynos4_idle_driver = {
>>> @@ -48,117 +29,11 @@ static struct cpuidle_driver exynos4_idle_driver = {
>>>  	.owner			= THIS_MODULE,
>>>  	.states = {
>>>  		[0] = ARM_CPUIDLE_WFI_STATE,
>>> -		[1] = {
>>> -			.enter			= exynos4_enter_lowpower,
>>> -			.exit_latency		= 300,
>>> -			.target_residency	= 100000,
>>> -			.flags			= CPUIDLE_FLAG_TIME_VALID,
>>> -			.name			= "C1",
>>> -			.desc			= "ARM power down",
>>> -		},
>>>  	},
>>> -	.state_count = 2,
>>> +	.state_count = 1,
>>>  	.safe_state_index = 0,
>>>  };
>>>  
>>> -/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
>>> -static void exynos4_set_wakeupmask(void)
>>> -{
>>> -	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
>>> -}
>>> -
>>> -static unsigned int g_pwr_ctrl, g_diag_reg;
>>> -
>>> -static void save_cpu_arch_register(void)
>>> -{
>>> -	/*read power control register*/
>>> -	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
>>> -	/*read diagnostic register*/
>>> -	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
>>> -	return;
>>> -}
>>> -
>>> -static void restore_cpu_arch_register(void)
>>> -{
>>> -	/*write power control register*/
>>> -	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
>>> -	/*write diagnostic register*/
>>> -	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
>>> -	return;
>>> -}
>>> -
>>> -static int idle_finisher(unsigned long flags)
>>> -{
>>> -	cpu_do_idle();
>>> -	return 1;
>>> -}
>>> -
>>> -static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
>>> -				struct cpuidle_driver *drv,
>>> -				int index)
>>> -{
>>> -	unsigned long tmp;
>>> -
>>> -	exynos4_set_wakeupmask();
>>> -
>>> -	/* Set value of power down register for aftr mode */
>>> -	exynos_sys_powerdown_conf(SYS_AFTR);
>>> -
>>> -	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
>>> -	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
>>> -
>>> -	save_cpu_arch_register();
>>> -
>>> -	/* Setting Central Sequence Register for power down mode */
>>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
>>> -	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
>>> -	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
>>> -
>>> -	cpu_pm_enter();
>>> -	cpu_suspend(0, idle_finisher);
>>> -
>>> -#ifdef CONFIG_SMP
>>> -	if (!soc_is_exynos5250())
>>> -		scu_enable(S5P_VA_SCU);
>>> -#endif
>>> -	cpu_pm_exit();
>>> -
>>> -	restore_cpu_arch_register();
>>> -
>>> -	/*
>>> -	 * If PMU failed while entering sleep mode, WFI will be
>>> -	 * ignored by PMU and then exiting cpu_do_idle().
>>> -	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
>>> -	 * in this situation.
>>> -	 */
>>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
>>> -	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
>>> -		tmp |= S5P_CENTRAL_LOWPWR_CFG;
>>> -		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
>>> -	}
>>> -
>>> -	/* Clear wakeup state register */
>>> -	__raw_writel(0x0, S5P_WAKEUP_STAT);
>>> -
>>> -	return index;
>>> -}
>>> -
>>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
>>> -				struct cpuidle_driver *drv,
>>> -				int index)
>>> -{
>>> -	int new_index = index;
>>> -
>>> -	/* This mode only can be entered when other core's are offline */
>>> -	if (num_online_cpus() > 1)
>>> -		new_index = drv->safe_state_index;
>>> -
>>> -	if (new_index == 0)
>>> -		return arm_cpuidle_simple_enter(dev, drv, new_index);
>>> -	else
>>> -		return exynos4_enter_core0_aftr(dev, drv, new_index);
>>> -}
>>> -
>>>  static void __init exynos5_core_down_clk(void)
>>>  {
>>>  	unsigned int tmp;
>>> @@ -209,10 +84,6 @@ static int __init exynos4_init_cpuidle(void)
>>>  		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
>>>  		device->cpu = cpu_id;
>>>  
>>> -		/* Support IDLE only */
>>> -		if (cpu_id != 0)
>>> -			device->state_count = 1;
>>> -
>>>  		ret = cpuidle_register_device(device);
>>>  		if (ret) {
>>>  			printk(KERN_ERR "CPUidle register device failed\n");
>
Tomasz Figa June 28, 2013, 10:11 a.m. UTC | #4
Hi Daniel,

I've been fighting with this whole AFTR state as well, before Bartlomiej. 
Let me share my thoughts on this.

On Friday 28 of June 2013 11:57:25 Daniel Lezcano wrote:
> On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
> > On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
> >> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> >>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
> >>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
> >>> in v3.4 kernel it hasn't worked as supposed and this is still the
> >>> case
> >>> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
> >>> not turned on by default):
> >>> 
> >>> - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
> >>> 
> >>>   (on this revision only one core is usable so entry to AFTR mode is
> >>>   always attempted because the code tries to go into AFTR mode when
> >>>   all
> >>>   other CPUs except CPU0 are offlined)
> >>> 
> >>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
> >>> 
> >>>   CPU1 is offlined (i.e. echo 0 >
> >>>   /sys/devices/system/cpu/cpu1/online)
> >>> 
> >>> - on later Exynos4/5 SoCs wrong registers may be accessed when all
> >>> CPUs
> >>> 
> >>>   except CPU0 are offlined resulting in panic/lockup
> >>>   (REG_DIRECTGO_ADDR
> >>>   and REG_DIRECTGO_FLAG register selections was implemented only for
> >>>   Exynos4210)
> >>> 
> >>> Just remove AFTR mode support for now.
> >> 
> >> Ok, I will jump on the opportunity to talk about this state.
> >> 
> >> 1. I tried different ways to make the AFTR state to be entered with
> >> *both* cpu online. It goes successfully to this state. The CPU0 is
> >> correctly woken up but the CPU1 is never woken up, why is it happening
> >> ?
> >> 
> >> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518
> > 
> > No idea here, AFTR doesn't work for me with upstream kernels even if
> > only one CPU is online.
> 
> What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
> state is never reached ?

If you don't unplug all the CPUs >0 the state is obviously never reached. 
Otherwise the whole system hangs after it tries to enter this mode without 
any reaction for external events, other than reset.

> > Also the documentation says that before entering system-level
> > power-down
> > mode (such as AFTR) when multiple CPUs cores are used all other CPU
> > cores should stop interrupt service so I'm not sure how the way
> > attempted by you should work.
> 
> The cpu enters the idle mode with the interrupts disabled.

Hmm? What is supposed to wake it up then? AFAIK the whole idea of any idle 
or sleep is to sit in such low power mode until some interrupt fires (and so 
the name of the WFI, wait for interrupt, instruction).

> >> 2. The CPU1 hotplug bug should been fixed and if I am not wrong there
> >> is
> >> a patch somewhere fixing this.
> >> 
> >> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
> > 
> > Unfortunately none of the patches there helps with my issues.
> > 
> >> 3. What is the fix for Exynos5 ?
> >> 
> >> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
> >> 
> >> It sounds like depending on Hypervisor mode is enabled or not, the
> >> AFTR
> >> does not work correctly.
> > 
> > Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
> > registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
> > may be different than EXYNOS4210 defaults (at least on EXYNOS4412 they
> > indeed are different, unfortunately I lack any info needed for EXYNOS5
> > support). You are lucky that it even works in some cases on EXYNOS5250.
> > 
> >> In other words, instead of removing the AFTR state I suggest to fix
> >> it:
> >> both core being online, split driver for exynos4 and 5.
> > 
> > My main problem is that with the upstream kernel even on EXYNOS4210
> > rev0 (only one core useable due to hardware issues) the kernel goes
> > into AFTR state for the first few times during boot and then it just
> > lockups (after going into cpu_do_idle() which is really
> > cpu_v7_do_idle()
> > and which does wfi call) and doesn't wake up CPU0. I have currently
> > no idea how to fix or debug it further.
> 
> I have an Origen 4210 board Ver A. and it works without problem with the
> AFTR mode (cpu1 unplugged).

Great!

Since benefits of this feature are rather questionable, especially when you 
consider all the maintenance burden caused by it, could you do some 
measurements to check if power saving thanks to this mode is of any 
significance?

> > The issue happens with every upstream kernel version tried (from v3.4
> > to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
> > offlined by hand and then cpuidle driver tries to go into AFTR mode
> > (because by default it doesn't go into AFTR mode on any SoC except
> > EXYNOS4210 rev0).
> > 
> > I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
> > mode has never worked (even on hardware that it was originally
> > developed
> > on) since its introduction in v3.4 (which was released on 20th May
> > 2012).
> > 
> > IOW for over the year nobody cared to make it work and I have currently
> > no fix at hand so the corrent upstream resolution is to just remove the
> > known non-working code and re-introduce it later when/if needed (I can
> > just disable it with a small fix but we don't keep such long-term
> > broken
> > code as placeholder in the upstream kernel).  If left as it is people
> > can hit the known issues and waste time debugging them, just like this
> > happenend for me [1].
> > 
> > If you have AFTR mode working (especially on EXYNOS4210) in Linaro
> > kernels please get fixes upstream ASAP. However I still wonder whether
> > the maintanance nightmare (bugs for different cases in your launchpad)
> > is worth gains over standard idle mode as the rumor around here is that
> > they are not that great (unfortunately no numbers were provided during
> > original feature addition).
> 
> It works forme with a vanilla kernel 3.10.0-rc7.

As Bartek already said, I haven't worked on any of our Exynos 4210 based 
boards since it got introduced in Linux 3.4, with exactly the same effect we 
described.

> Removing a feature because it seems not working is not a good solution.
> The right way is to investigate what is happening and why.

I can agree only partially. Keeping a feature that is broken and without 
any significant benefits does not make sense for me. Neither does putting 
efforts into fixing it, only to find that it is of no use.

However this is purely a speculation. Could you test on your Origen, on 
which it is supposed to work, if this feature is of any use?

Best regards,
Tomasz

> Thanks
>   -- Daniel
> 
> > --
> > Bartlomiej Zolnierkiewicz
> > Samsung R&D Institute Poland
> > Samsung Electronics
> > 
> > [1] Somebody enabled CONFIG_CPU_IDLE in the default config in one of
> > our
> > internal kernel trees on a target on which this feature "works" (since
> > CPUs are not hot-unplugged by default on all targets except EXYNOS4210
> > rev0 the code for AFTR is not used) and resulted in lockups on boot on
> > my default testing target. It took my long time to find out that
> > problem
> > is actually caused by just enabling exynos cpuidle driver.
> > 
> >> Thanks
> >> 
> >>   -- Daniel
> >>> 
> >>> Cc: Jaecheol Lee <jc.lee@samsung.com>
> >>> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> >>> Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
> >>> Cc: Tomasz Figa <t.figa@samsung.com>
> >>> Cc: Kukjin Kim <kgene.kim@samsung.com>
> >>> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> >>> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> >>> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> >>> Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
> >>> ---
> >>> 
> >>>  arch/arm/mach-exynos/cpuidle.c | 131
> >>>  +---------------------------------------- 1 file changed, 1
> >>>  insertion(+), 130 deletions(-)
> >>> 
> >>> diff --git a/arch/arm/mach-exynos/cpuidle.c
> >>> b/arch/arm/mach-exynos/cpuidle.c index 17a18ff..0a657ac 100644
> >>> --- a/arch/arm/mach-exynos/cpuidle.c
> >>> +++ b/arch/arm/mach-exynos/cpuidle.c
> >>> @@ -17,30 +17,11 @@
> >>> 
> >>>  #include <linux/time.h>
> >>>  
> >>>  #include <asm/proc-fns.h>
> >>> 
> >>> -#include <asm/smp_scu.h>
> >>> -#include <asm/suspend.h>
> >>> -#include <asm/unified.h>
> >>> 
> >>>  #include <asm/cpuidle.h>
> >>>  #include <mach/regs-clock.h>
> >>> 
> >>> -#include <mach/regs-pmu.h>
> >>> -
> >>> -#include <plat/cpu.h>
> >>> 
> >>>  #include "common.h"
> >>> 
> >>> -#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> >>> -			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> >>> -			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
> >>> -#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> >>> -			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> >>> -			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
> >>> -
> >>> -#define S5P_CHECK_AFTR		0xFCBA0D10
> >>> -
> >>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> >>> -				struct cpuidle_driver *drv,
> >>> -				int index);
> >>> -
> >>> 
> >>>  static DEFINE_PER_CPU(struct cpuidle_device,
> >>>  exynos4_cpuidle_device);
> >>>  
> >>>  static struct cpuidle_driver exynos4_idle_driver = {
> >>> 
> >>> @@ -48,117 +29,11 @@ static struct cpuidle_driver exynos4_idle_driver
> >>> = {>>> 
> >>>  	.owner			= THIS_MODULE,
> >>>  	.states = {
> >>>  	
> >>>  		[0] = ARM_CPUIDLE_WFI_STATE,
> >>> 
> >>> -		[1] = {
> >>> -			.enter			= exynos4_enter_lowpower,
> >>> -			.exit_latency		= 300,
> >>> -			.target_residency	= 100000,
> >>> -			.flags			= CPUIDLE_FLAG_TIME_VALID,
> >>> -			.name			= "C1",
> >>> -			.desc			= "ARM power down",
> >>> -		},
> >>> 
> >>>  	},
> >>> 
> >>> -	.state_count = 2,
> >>> +	.state_count = 1,
> >>> 
> >>>  	.safe_state_index = 0,
> >>>  
> >>>  };
> >>> 
> >>> -/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
> >>> -static void exynos4_set_wakeupmask(void)
> >>> -{
> >>> -	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
> >>> -}
> >>> -
> >>> -static unsigned int g_pwr_ctrl, g_diag_reg;
> >>> -
> >>> -static void save_cpu_arch_register(void)
> >>> -{
> >>> -	/*read power control register*/
> >>> -	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
> >>> -	/*read diagnostic register*/
> >>> -	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
> >>> -	return;
> >>> -}
> >>> -
> >>> -static void restore_cpu_arch_register(void)
> >>> -{
> >>> -	/*write power control register*/
> >>> -	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
> >>> -	/*write diagnostic register*/
> >>> -	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
> >>> -	return;
> >>> -}
> >>> -
> >>> -static int idle_finisher(unsigned long flags)
> >>> -{
> >>> -	cpu_do_idle();
> >>> -	return 1;
> >>> -}
> >>> -
> >>> -static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
> >>> -				struct cpuidle_driver *drv,
> >>> -				int index)
> >>> -{
> >>> -	unsigned long tmp;
> >>> -
> >>> -	exynos4_set_wakeupmask();
> >>> -
> >>> -	/* Set value of power down register for aftr mode */
> >>> -	exynos_sys_powerdown_conf(SYS_AFTR);
> >>> -
> >>> -	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
> >>> -	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
> >>> -
> >>> -	save_cpu_arch_register();
> >>> -
> >>> -	/* Setting Central Sequence Register for power down mode */
> >>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> >>> -	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
> >>> -	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> >>> -
> >>> -	cpu_pm_enter();
> >>> -	cpu_suspend(0, idle_finisher);
> >>> -
> >>> -#ifdef CONFIG_SMP
> >>> -	if (!soc_is_exynos5250())
> >>> -		scu_enable(S5P_VA_SCU);
> >>> -#endif
> >>> -	cpu_pm_exit();
> >>> -
> >>> -	restore_cpu_arch_register();
> >>> -
> >>> -	/*
> >>> -	 * If PMU failed while entering sleep mode, WFI will be
> >>> -	 * ignored by PMU and then exiting cpu_do_idle().
> >>> -	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
> >>> -	 * in this situation.
> >>> -	 */
> >>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> >>> -	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
> >>> -		tmp |= S5P_CENTRAL_LOWPWR_CFG;
> >>> -		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> >>> -	}
> >>> -
> >>> -	/* Clear wakeup state register */
> >>> -	__raw_writel(0x0, S5P_WAKEUP_STAT);
> >>> -
> >>> -	return index;
> >>> -}
> >>> -
> >>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> >>> -				struct cpuidle_driver *drv,
> >>> -				int index)
> >>> -{
> >>> -	int new_index = index;
> >>> -
> >>> -	/* This mode only can be entered when other core's are offline */
> >>> -	if (num_online_cpus() > 1)
> >>> -		new_index = drv->safe_state_index;
> >>> -
> >>> -	if (new_index == 0)
> >>> -		return arm_cpuidle_simple_enter(dev, drv, new_index);
> >>> -	else
> >>> -		return exynos4_enter_core0_aftr(dev, drv, new_index);
> >>> -}
> >>> -
> >>> 
> >>>  static void __init exynos5_core_down_clk(void)
> >>>  {
> >>>  
> >>>  	unsigned int tmp;
> >>> 
> >>> @@ -209,10 +84,6 @@ static int __init exynos4_init_cpuidle(void)
> >>> 
> >>>  		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
> >>>  		device->cpu = cpu_id;
> >>> 
> >>> -		/* Support IDLE only */
> >>> -		if (cpu_id != 0)
> >>> -			device->state_count = 1;
> >>> -
> >>> 
> >>>  		ret = cpuidle_register_device(device);
> >>>  		if (ret) {
> >>>  		
> >>>  			printk(KERN_ERR "CPUidle register device failed\n");
Lorenzo Pieralisi June 28, 2013, 11:13 a.m. UTC | #5
On Fri, Jun 28, 2013 at 11:11:24AM +0100, Tomasz Figa wrote:
> Hi Daniel,
> 
> I've been fighting with this whole AFTR state as well, before Bartlomiej.
> Let me share my thoughts on this.
> 
> On Friday 28 of June 2013 11:57:25 Daniel Lezcano wrote:
> > On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
> > > On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
> > >> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> > >>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
> > >>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
> > >>> in v3.4 kernel it hasn't worked as supposed and this is still the
> > >>> case
> > >>> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
> > >>> not turned on by default):
> > >>>
> > >>> - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
> > >>>
> > >>>   (on this revision only one core is usable so entry to AFTR mode is
> > >>>   always attempted because the code tries to go into AFTR mode when
> > >>>   all
> > >>>   other CPUs except CPU0 are offlined)
> > >>>
> > >>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
> > >>>
> > >>>   CPU1 is offlined (i.e. echo 0 >
> > >>>   /sys/devices/system/cpu/cpu1/online)
> > >>>
> > >>> - on later Exynos4/5 SoCs wrong registers may be accessed when all
> > >>> CPUs
> > >>>
> > >>>   except CPU0 are offlined resulting in panic/lockup
> > >>>   (REG_DIRECTGO_ADDR
> > >>>   and REG_DIRECTGO_FLAG register selections was implemented only for
> > >>>   Exynos4210)
> > >>>
> > >>> Just remove AFTR mode support for now.
> > >>
> > >> Ok, I will jump on the opportunity to talk about this state.
> > >>
> > >> 1. I tried different ways to make the AFTR state to be entered with
> > >> *both* cpu online. It goes successfully to this state. The CPU0 is
> > >> correctly woken up but the CPU1 is never woken up, why is it happening
> > >> ?
> > >>
> > >> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518
> > >
> > > No idea here, AFTR doesn't work for me with upstream kernels even if
> > > only one CPU is online.
> >
> > What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
> > state is never reached ?
> 
> If you don't unplug all the CPUs >0 the state is obviously never reached.
> Otherwise the whole system hangs after it tries to enter this mode without
> any reaction for external events, other than reset.
> 
> > > Also the documentation says that before entering system-level
> > > power-down
> > > mode (such as AFTR) when multiple CPUs cores are used all other CPU
> > > cores should stop interrupt service so I'm not sure how the way
> > > attempted by you should work.
> >
> > The cpu enters the idle mode with the interrupts disabled.
> 
> Hmm? What is supposed to wake it up then? AFAIK the whole idea of any idle
> or sleep is to sit in such low power mode until some interrupt fires (and so
> the name of the WFI, wait for interrupt, instruction).

WFI completes upon IRQ even if irqs are disabled for a CPU, unless the
GIC CPU interface is disabled for that CPU as well. How the power
controller wakes up the CPUs from shutdown is strictly related to the
power controller behaviour. The problem here I think is completely different
and it is related to how the power controller behaves on Exynos.
I think the issues are a combination of power controller/boot firmware that
prevents CPU1 to go into idle independently of CPU0, but I might be mistaken.

If you want to get to the bottom of this you need to get detailed information
on how the firmware and power controller behave, trial and error won't work.

> > >> 2. The CPU1 hotplug bug should been fixed and if I am not wrong there
> > >> is
> > >> a patch somewhere fixing this.
> > >>
> > >> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
> > >
> > > Unfortunately none of the patches there helps with my issues.
> > >
> > >> 3. What is the fix for Exynos5 ?
> > >>
> > >> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
> > >>
> > >> It sounds like depending on Hypervisor mode is enabled or not, the
> > >> AFTR
> > >> does not work correctly.
> > >
> > > Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
> > > registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
> > > may be different than EXYNOS4210 defaults (at least on EXYNOS4412 they
> > > indeed are different, unfortunately I lack any info needed for EXYNOS5
> > > support). You are lucky that it even works in some cases on EXYNOS5250.
> > >
> > >> In other words, instead of removing the AFTR state I suggest to fix
> > >> it:
> > >> both core being online, split driver for exynos4 and 5.
> > >
> > > My main problem is that with the upstream kernel even on EXYNOS4210
> > > rev0 (only one core useable due to hardware issues) the kernel goes
> > > into AFTR state for the first few times during boot and then it just
> > > lockups (after going into cpu_do_idle() which is really
> > > cpu_v7_do_idle()
> > > and which does wfi call) and doesn't wake up CPU0. I have currently
> > > no idea how to fix or debug it further.
> >
> > I have an Origen 4210 board Ver A. and it works without problem with the
> > AFTR mode (cpu1 unplugged).
> 
> Great!
> 
> Since benefits of this feature are rather questionable, especially when you
> consider all the maintenance burden caused by it, could you do some
> measurements to check if power saving thanks to this mode is of any
> significance?

Questionable ? Questionable is the way this code has been put on the
backburner without upstreaming it properly.

> > > The issue happens with every upstream kernel version tried (from v3.4
> > > to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
> > > offlined by hand and then cpuidle driver tries to go into AFTR mode
> > > (because by default it doesn't go into AFTR mode on any SoC except
> > > EXYNOS4210 rev0).
> > >
> > > I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
> > > mode has never worked (even on hardware that it was originally
> > > developed
> > > on) since its introduction in v3.4 (which was released on 20th May
> > > 2012).
> > >
> > > IOW for over the year nobody cared to make it work and I have currently
> > > no fix at hand so the corrent upstream resolution is to just remove the
> > > known non-working code and re-introduce it later when/if needed (I can
> > > just disable it with a small fix but we don't keep such long-term
> > > broken
> > > code as placeholder in the upstream kernel).  If left as it is people
> > > can hit the known issues and waste time debugging them, just like this
> > > happenend for me [1].
> > >
> > > If you have AFTR mode working (especially on EXYNOS4210) in Linaro
> > > kernels please get fixes upstream ASAP. However I still wonder whether
> > > the maintanance nightmare (bugs for different cases in your launchpad)
> > > is worth gains over standard idle mode as the rumor around here is that
> > > they are not that great (unfortunately no numbers were provided during
> > > original feature addition).
> >
> > It works forme with a vanilla kernel 3.10.0-rc7.
> 
> As Bartek already said, I haven't worked on any of our Exynos 4210 based
> boards since it got introduced in Linux 3.4, with exactly the same effect we
> described.
> 
> > Removing a feature because it seems not working is not a good solution.
> > The right way is to investigate what is happening and why.
> 
> I can agree only partially. Keeping a feature that is broken and without
> any significant benefits does not make sense for me. Neither does putting
> efforts into fixing it, only to find that it is of no use.

Well, either power management HW works on these boards, so there are
significant benefits in fixing the code, or it does not and you are right
then.

If power management HW works there are benefits and they are significant for
manifold reasons (first and foremost, since these are dev boards, to show
how power management backends must be written, but honestly this driver
fails in that respect). It is absolutely worth putting effort in fixing the
behaviour, the problem is finding people who can answer questions on
how the power controller and firmware are *supposed* to be working,
otherwise we will never get to the bottom of this.

I understand that these bits of information are hard to come by, and for
that reason we might be forced to drop this code, but this does not mean
that these features are not useful.

> However this is purely a speculation. Could you test on your Origen, on
> which it is supposed to work, if this feature is of any use ?

If power management HW works properly on these boards, and I hope it
does (but the only way to check it is by measuring power drawn), this feature
is useful, now the question is to understand how and who is going to fix it,
if we can manage to do that.

Thanks,
Lorenzo
Daniel Lezcano June 28, 2013, 11:20 a.m. UTC | #6
On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> Hi Daniel,
> 
> I've been fighting with this whole AFTR state as well, before Bartlomiej. 
> Let me share my thoughts on this.
> 
> On Friday 28 of June 2013 11:57:25 Daniel Lezcano wrote:
>> On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
>>> On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
>>>> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
>>>>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
>>>>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
>>>>> in v3.4 kernel it hasn't worked as supposed and this is still the
>>>>> case
>>>>> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
>>>>> not turned on by default):
>>>>>
>>>>> - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
>>>>>
>>>>>   (on this revision only one core is usable so entry to AFTR mode is
>>>>>   always attempted because the code tries to go into AFTR mode when
>>>>>   all
>>>>>   other CPUs except CPU0 are offlined)
>>>>>
>>>>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
>>>>>
>>>>>   CPU1 is offlined (i.e. echo 0 >
>>>>>   /sys/devices/system/cpu/cpu1/online)
>>>>>
>>>>> - on later Exynos4/5 SoCs wrong registers may be accessed when all
>>>>> CPUs
>>>>>
>>>>>   except CPU0 are offlined resulting in panic/lockup
>>>>>   (REG_DIRECTGO_ADDR
>>>>>   and REG_DIRECTGO_FLAG register selections was implemented only for
>>>>>   Exynos4210)
>>>>>
>>>>> Just remove AFTR mode support for now.
>>>>
>>>> Ok, I will jump on the opportunity to talk about this state.
>>>>
>>>> 1. I tried different ways to make the AFTR state to be entered with
>>>> *both* cpu online. It goes successfully to this state. The CPU0 is
>>>> correctly woken up but the CPU1 is never woken up, why is it happening
>>>> ?
>>>>
>>>> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518
>>>
>>> No idea here, AFTR doesn't work for me with upstream kernels even if
>>> only one CPU is online.
>>
>> What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
>> state is never reached ?
> 
> If you don't unplug all the CPUs >0 the state is obviously never reached. 
> Otherwise the whole system hangs after it tries to enter this mode without 
> any reaction for external events, other than reset.

Need investigation.

What is the exynos board version where that occurs ?

>>> Also the documentation says that before entering system-level
>>> power-down
>>> mode (such as AFTR) when multiple CPUs cores are used all other CPU
>>> cores should stop interrupt service so I'm not sure how the way
>>> attempted by you should work.
>>
>> The cpu enters the idle mode with the interrupts disabled.
> 
> Hmm? What is supposed to wake it up then? AFAIK the whole idea of any idle 
> or sleep is to sit in such low power mode until some interrupt fires (and so 
> the name of the WFI, wait for interrupt, instruction).

It is handled by the hardware, for the exynos it should be the PMU. The
CPU stays clock/power gated and when an interrupt occurs the PMU wakes
up the CPU. This one continue its instructions after cpu_do_idle and
right after enables the local interrupts leading to the interrupt handling.

>>>> 2. The CPU1 hotplug bug should been fixed and if I am not wrong there
>>>> is
>>>> a patch somewhere fixing this.
>>>>
>>>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
>>>
>>> Unfortunately none of the patches there helps with my issues.
>>>
>>>> 3. What is the fix for Exynos5 ?
>>>>
>>>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
>>>>
>>>> It sounds like depending on Hypervisor mode is enabled or not, the
>>>> AFTR
>>>> does not work correctly.
>>>
>>> Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
>>> registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
>>> may be different than EXYNOS4210 defaults (at least on EXYNOS4412 they
>>> indeed are different, unfortunately I lack any info needed for EXYNOS5
>>> support). You are lucky that it even works in some cases on EXYNOS5250.
>>>
>>>> In other words, instead of removing the AFTR state I suggest to fix
>>>> it:
>>>> both core being online, split driver for exynos4 and 5.
>>>
>>> My main problem is that with the upstream kernel even on EXYNOS4210
>>> rev0 (only one core useable due to hardware issues) the kernel goes
>>> into AFTR state for the first few times during boot and then it just
>>> lockups (after going into cpu_do_idle() which is really
>>> cpu_v7_do_idle()
>>> and which does wfi call) and doesn't wake up CPU0. I have currently
>>> no idea how to fix or debug it further.
>>
>> I have an Origen 4210 board Ver A. and it works without problem with the
>> AFTR mode (cpu1 unplugged).
> 
> Great!
> 
> Since benefits of this feature are rather questionable, especially when you 
> consider all the maintenance burden caused by it, could you do some 
> measurements to check if power saving thanks to this mode is of any 
> significance?

No I can't, no spare time for that and furthermore this work has already
be done by Amit Daniel when he submitted the driver.

Amit Daniel is no longer a Linaro assignee but it is still part of the
Samsung company (changed the email address to reach him).

>>> The issue happens with every upstream kernel version tried (from v3.4
>>> to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
>>> offlined by hand and then cpuidle driver tries to go into AFTR mode
>>> (because by default it doesn't go into AFTR mode on any SoC except
>>> EXYNOS4210 rev0).
>>>
>>> I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
>>> mode has never worked (even on hardware that it was originally
>>> developed
>>> on) since its introduction in v3.4 (which was released on 20th May
>>> 2012).
>>>
>>> IOW for over the year nobody cared to make it work and I have currently
>>> no fix at hand so the corrent upstream resolution is to just remove the
>>> known non-working code and re-introduce it later when/if needed (I can
>>> just disable it with a small fix but we don't keep such long-term
>>> broken
>>> code as placeholder in the upstream kernel).  If left as it is people
>>> can hit the known issues and waste time debugging them, just like this
>>> happenend for me [1].
>>>
>>> If you have AFTR mode working (especially on EXYNOS4210) in Linaro
>>> kernels please get fixes upstream ASAP. However I still wonder whether
>>> the maintanance nightmare (bugs for different cases in your launchpad)
>>> is worth gains over standard idle mode as the rumor around here is that
>>> they are not that great (unfortunately no numbers were provided during
>>> original feature addition).
>>
>> It works forme with a vanilla kernel 3.10.0-rc7.
> 
> As Bartek already said, I haven't worked on any of our Exynos 4210 based 
> boards since it got introduced in Linux 3.4, with exactly the same effect we 
> described.
> 
>> Removing a feature because it seems not working is not a good solution.
>> The right way is to investigate what is happening and why.
> 
> I can agree only partially. Keeping a feature that is broken and without 
> any significant benefits does not make sense for me. Neither does putting 
> efforts into fixing it, only to find that it is of no use.
> 
> However this is purely a speculation. Could you test on your Origen, on 
> which it is supposed to work, if this feature is of any use?

It is useless to do that. This work is already done.

The kernel is not a playground where you can upstream code and then
remove it because a feature seems broken and you don't have an idea of why.

I asked several times the reasons of why the AFTR state couldn't work
with multiple CPUs and I had no answer.

Frankly speaking I have a couple of hypothesis:

1. something is not correctly setup and the PMU does not wake up the CPU1
2. there is a silicon bug and no one wants to tell it is the case

In any case, this must be investigated and identified. And then we can
take a decision about this state.

>>> --
>>> Bartlomiej Zolnierkiewicz
>>> Samsung R&D Institute Poland
>>> Samsung Electronics
>>>
>>> [1] Somebody enabled CONFIG_CPU_IDLE in the default config in one of
>>> our
>>> internal kernel trees on a target on which this feature "works" (since
>>> CPUs are not hot-unplugged by default on all targets except EXYNOS4210
>>> rev0 the code for AFTR is not used) and resulted in lockups on boot on
>>> my default testing target. It took my long time to find out that
>>> problem
>>> is actually caused by just enabling exynos cpuidle driver.
>>>
>>>> Thanks
>>>>
>>>>   -- Daniel
>>>>>
>>>>> Cc: Jaecheol Lee <jc.lee@samsung.com>
>>>>> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
>>>>> Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
>>>>> Cc: Tomasz Figa <t.figa@samsung.com>
>>>>> Cc: Kukjin Kim <kgene.kim@samsung.com>
>>>>> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>>> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
>>>>> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
>>>>> Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
>>>>> ---
>>>>>
>>>>>  arch/arm/mach-exynos/cpuidle.c | 131
>>>>>  +---------------------------------------- 1 file changed, 1
>>>>>  insertion(+), 130 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm/mach-exynos/cpuidle.c
>>>>> b/arch/arm/mach-exynos/cpuidle.c index 17a18ff..0a657ac 100644
>>>>> --- a/arch/arm/mach-exynos/cpuidle.c
>>>>> +++ b/arch/arm/mach-exynos/cpuidle.c
>>>>> @@ -17,30 +17,11 @@
>>>>>
>>>>>  #include <linux/time.h>
>>>>>  
>>>>>  #include <asm/proc-fns.h>
>>>>>
>>>>> -#include <asm/smp_scu.h>
>>>>> -#include <asm/suspend.h>
>>>>> -#include <asm/unified.h>
>>>>>
>>>>>  #include <asm/cpuidle.h>
>>>>>  #include <mach/regs-clock.h>
>>>>>
>>>>> -#include <mach/regs-pmu.h>
>>>>> -
>>>>> -#include <plat/cpu.h>
>>>>>
>>>>>  #include "common.h"
>>>>>
>>>>> -#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
>>>>> -			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
>>>>> -			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
>>>>> -#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
>>>>> -			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
>>>>> -			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
>>>>> -
>>>>> -#define S5P_CHECK_AFTR		0xFCBA0D10
>>>>> -
>>>>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
>>>>> -				struct cpuidle_driver *drv,
>>>>> -				int index);
>>>>> -
>>>>>
>>>>>  static DEFINE_PER_CPU(struct cpuidle_device,
>>>>>  exynos4_cpuidle_device);
>>>>>  
>>>>>  static struct cpuidle_driver exynos4_idle_driver = {
>>>>>
>>>>> @@ -48,117 +29,11 @@ static struct cpuidle_driver exynos4_idle_driver
>>>>> = {>>> 
>>>>>  	.owner			= THIS_MODULE,
>>>>>  	.states = {
>>>>>  	
>>>>>  		[0] = ARM_CPUIDLE_WFI_STATE,
>>>>>
>>>>> -		[1] = {
>>>>> -			.enter			= exynos4_enter_lowpower,
>>>>> -			.exit_latency		= 300,
>>>>> -			.target_residency	= 100000,
>>>>> -			.flags			= CPUIDLE_FLAG_TIME_VALID,
>>>>> -			.name			= "C1",
>>>>> -			.desc			= "ARM power down",
>>>>> -		},
>>>>>
>>>>>  	},
>>>>>
>>>>> -	.state_count = 2,
>>>>> +	.state_count = 1,
>>>>>
>>>>>  	.safe_state_index = 0,
>>>>>  
>>>>>  };
>>>>>
>>>>> -/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
>>>>> -static void exynos4_set_wakeupmask(void)
>>>>> -{
>>>>> -	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
>>>>> -}
>>>>> -
>>>>> -static unsigned int g_pwr_ctrl, g_diag_reg;
>>>>> -
>>>>> -static void save_cpu_arch_register(void)
>>>>> -{
>>>>> -	/*read power control register*/
>>>>> -	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
>>>>> -	/*read diagnostic register*/
>>>>> -	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
>>>>> -	return;
>>>>> -}
>>>>> -
>>>>> -static void restore_cpu_arch_register(void)
>>>>> -{
>>>>> -	/*write power control register*/
>>>>> -	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
>>>>> -	/*write diagnostic register*/
>>>>> -	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
>>>>> -	return;
>>>>> -}
>>>>> -
>>>>> -static int idle_finisher(unsigned long flags)
>>>>> -{
>>>>> -	cpu_do_idle();
>>>>> -	return 1;
>>>>> -}
>>>>> -
>>>>> -static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
>>>>> -				struct cpuidle_driver *drv,
>>>>> -				int index)
>>>>> -{
>>>>> -	unsigned long tmp;
>>>>> -
>>>>> -	exynos4_set_wakeupmask();
>>>>> -
>>>>> -	/* Set value of power down register for aftr mode */
>>>>> -	exynos_sys_powerdown_conf(SYS_AFTR);
>>>>> -
>>>>> -	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
>>>>> -	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
>>>>> -
>>>>> -	save_cpu_arch_register();
>>>>> -
>>>>> -	/* Setting Central Sequence Register for power down mode */
>>>>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
>>>>> -	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
>>>>> -	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
>>>>> -
>>>>> -	cpu_pm_enter();
>>>>> -	cpu_suspend(0, idle_finisher);
>>>>> -
>>>>> -#ifdef CONFIG_SMP
>>>>> -	if (!soc_is_exynos5250())
>>>>> -		scu_enable(S5P_VA_SCU);
>>>>> -#endif
>>>>> -	cpu_pm_exit();
>>>>> -
>>>>> -	restore_cpu_arch_register();
>>>>> -
>>>>> -	/*
>>>>> -	 * If PMU failed while entering sleep mode, WFI will be
>>>>> -	 * ignored by PMU and then exiting cpu_do_idle().
>>>>> -	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
>>>>> -	 * in this situation.
>>>>> -	 */
>>>>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
>>>>> -	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
>>>>> -		tmp |= S5P_CENTRAL_LOWPWR_CFG;
>>>>> -		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
>>>>> -	}
>>>>> -
>>>>> -	/* Clear wakeup state register */
>>>>> -	__raw_writel(0x0, S5P_WAKEUP_STAT);
>>>>> -
>>>>> -	return index;
>>>>> -}
>>>>> -
>>>>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
>>>>> -				struct cpuidle_driver *drv,
>>>>> -				int index)
>>>>> -{
>>>>> -	int new_index = index;
>>>>> -
>>>>> -	/* This mode only can be entered when other core's are offline */
>>>>> -	if (num_online_cpus() > 1)
>>>>> -		new_index = drv->safe_state_index;
>>>>> -
>>>>> -	if (new_index == 0)
>>>>> -		return arm_cpuidle_simple_enter(dev, drv, new_index);
>>>>> -	else
>>>>> -		return exynos4_enter_core0_aftr(dev, drv, new_index);
>>>>> -}
>>>>> -
>>>>>
>>>>>  static void __init exynos5_core_down_clk(void)
>>>>>  {
>>>>>  
>>>>>  	unsigned int tmp;
>>>>>
>>>>> @@ -209,10 +84,6 @@ static int __init exynos4_init_cpuidle(void)
>>>>>
>>>>>  		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
>>>>>  		device->cpu = cpu_id;
>>>>>
>>>>> -		/* Support IDLE only */
>>>>> -		if (cpu_id != 0)
>>>>> -			device->state_count = 1;
>>>>> -
>>>>>
>>>>>  		ret = cpuidle_register_device(device);
>>>>>  		if (ret) {
>>>>>  		
>>>>>  			printk(KERN_ERR "CPUidle register device failed\n");
Tomasz Figa June 28, 2013, 4:21 p.m. UTC | #7
Hi Lorenzo,

On Friday 28 of June 2013 12:13:33 Lorenzo Pieralisi wrote:
> On Fri, Jun 28, 2013 at 11:11:24AM +0100, Tomasz Figa wrote:
> > Hi Daniel,
> > 
> > I've been fighting with this whole AFTR state as well, before
> > Bartlomiej. Let me share my thoughts on this.
> > 
> > On Friday 28 of June 2013 11:57:25 Daniel Lezcano wrote:
> > > On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
> > > > On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
> > > >> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> > > >>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS:
> > > >>> Add
> > > >>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately
> > > >>> even
> > > >>> in v3.4 kernel it hasn't worked as supposed and this is still the
> > > >>> case
> > > >>> with v3.10-rc6 (it probably wasn't noticed because
> > > >>> CONFIG_CPU_IDLE is
> > > >>> not turned on by default):
> > > >>> 
> > > >>> - on revision 0 of Exynos4210 (Universal C210 board) it causes
> > > >>> lockup
> > > >>> 
> > > >>>   (on this revision only one core is usable so entry to AFTR mode
> > > >>>   is
> > > >>>   always attempted because the code tries to go into AFTR mode
> > > >>>   when
> > > >>>   all
> > > >>>   other CPUs except CPU0 are offlined)
> > > >>> 
> > > >>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup
> > > >>> when
> > > >>> 
> > > >>>   CPU1 is offlined (i.e. echo 0 >
> > > >>>   /sys/devices/system/cpu/cpu1/online)
> > > >>> 
> > > >>> - on later Exynos4/5 SoCs wrong registers may be accessed when
> > > >>> all
> > > >>> CPUs
> > > >>> 
> > > >>>   except CPU0 are offlined resulting in panic/lockup
> > > >>>   (REG_DIRECTGO_ADDR
> > > >>>   and REG_DIRECTGO_FLAG register selections was implemented only
> > > >>>   for
> > > >>>   Exynos4210)
> > > >>> 
> > > >>> Just remove AFTR mode support for now.
> > > >> 
> > > >> Ok, I will jump on the opportunity to talk about this state.
> > > >> 
> > > >> 1. I tried different ways to make the AFTR state to be entered
> > > >> with
> > > >> *both* cpu online. It goes successfully to this state. The CPU0 is
> > > >> correctly woken up but the CPU1 is never woken up, why is it
> > > >> happening
> > > >> ?
> > > >> 
> > > >> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/117151
> > > >> 8
> > > > 
> > > > No idea here, AFTR doesn't work for me with upstream kernels even
> > > > if
> > > > only one CPU is online.
> > > 
> > > What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
> > > state is never reached ?
> > 
> > If you don't unplug all the CPUs >0 the state is obviously never
> > reached. Otherwise the whole system hangs after it tries to enter this
> > mode without any reaction for external events, other than reset.
> > 
> > > > Also the documentation says that before entering system-level
> > > > power-down
> > > > mode (such as AFTR) when multiple CPUs cores are used all other CPU
> > > > cores should stop interrupt service so I'm not sure how the way
> > > > attempted by you should work.
> > > 
> > > The cpu enters the idle mode with the interrupts disabled.
> > 
> > Hmm? What is supposed to wake it up then? AFAIK the whole idea of any
> > idle or sleep is to sit in such low power mode until some interrupt
> > fires (and so the name of the WFI, wait for interrupt, instruction).
> 
> WFI completes upon IRQ even if irqs are disabled for a CPU, unless the
> GIC CPU interface is disabled for that CPU as well. How the power
> controller wakes up the CPUs from shutdown is strictly related to the
> power controller behaviour. The problem here I think is completely
> different and it is related to how the power controller behaves on
> Exynos. I think the issues are a combination of power controller/boot
> firmware that prevents CPU1 to go into idle independently of CPU0, but I
> might be mistaken.

We are not talking here about the normal WFI idle mode. This one works 
correctly and gives huge power savings.

AFTR (which stands for ARM off, TOP running, TOP being a power domain) is a 
mode that is supposed to be deeper than WFI, with all ARM cores powered off, 
but some of the blocks around them keep their state, so the whole system 
can be quickly woke up.

Based on our measurements on Exynos 4210 and 4x12, powering off ARM cores 
don't give any significant amount of power savings (few milliamperes per 
core), with most of the savings already achieved by WFI idle. Since AFTR is 
basically just powering off all ARM cores, including CPU0, I doubt it is 
going to give much higher savings than that.

> If you want to get to the bottom of this you need to get detailed
> information on how the firmware and power controller behave, trial and
> error won't work.

I don't think we want to put unjustified effort into this. If we are to fix 
it, we should first make sure that it is something that is worth this effort.

The commit that introduced this feature didn't have any rationale to begin 
with, not even saying about any evidence of actual power saving such as 
measurements.

Just for reference, here's commit message of that commit:

	ARM: EXYNOS: Add support AFTR mode on EXYNOS4210

	This patch adds support AFTR(ARM OFF TOP RUNNING) mode in
	cpuidle driver. L2 cache keeps their data in this mode.
	This patch ports the code to the latest interfaces to
	save/restore CPU state inclusive of CPU PM notifiers, l2
	resume and cpu_suspend/resume.

Nothing saying _why_ this is added, just _what_ is added.

> > > >> 2. The CPU1 hotplug bug should been fixed and if I am not wrong
> > > >> there
> > > >> is
> > > >> a patch somewhere fixing this.
> > > >> 
> > > >> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
> > > > 
> > > > Unfortunately none of the patches there helps with my issues.
> > > > 
> > > >> 3. What is the fix for Exynos5 ?
> > > >> 
> > > >> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
> > > >> 
> > > >> It sounds like depending on Hypervisor mode is enabled or not, the
> > > >> AFTR
> > > >> does not work correctly.
> > > > 
> > > > Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
> > > > registers used for s3c_cpu_resume address and 0xFCBA0D10 magic
> > > > number
> > > > may be different than EXYNOS4210 defaults (at least on EXYNOS4412
> > > > they
> > > > indeed are different, unfortunately I lack any info needed for
> > > > EXYNOS5
> > > > support). You are lucky that it even works in some cases on
> > > > EXYNOS5250.
> > > > 
> > > >> In other words, instead of removing the AFTR state I suggest to
> > > >> fix
> > > >> it:
> > > >> both core being online, split driver for exynos4 and 5.
> > > > 
> > > > My main problem is that with the upstream kernel even on EXYNOS4210
> > > > rev0 (only one core useable due to hardware issues) the kernel goes
> > > > into AFTR state for the first few times during boot and then it
> > > > just
> > > > lockups (after going into cpu_do_idle() which is really
> > > > cpu_v7_do_idle()
> > > > and which does wfi call) and doesn't wake up CPU0. I have currently
> > > > no idea how to fix or debug it further.
> > > 
> > > I have an Origen 4210 board Ver A. and it works without problem with
> > > the
> > > AFTR mode (cpu1 unplugged).
> > 
> > Great!
> > 
> > Since benefits of this feature are rather questionable, especially when
> > you consider all the maintenance burden caused by it, could you do
> > some measurements to check if power saving thanks to this mode is of
> > any significance?
> 
> Questionable ? Questionable is the way this code has been put on the
> backburner without upstreaming it properly.

I agree, it has not been upstreamed properly, becausE:
 1) it doesn't work,
 2) there was no rationale behind it or at least none was provided in patch 
descriptions.

> > > > The issue happens with every upstream kernel version tried (from
> > > > v3.4
> > > > to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1
> > > > is
> > > > offlined by hand and then cpuidle driver tries to go into AFTR mode
> > > > (because by default it doesn't go into AFTR mode on any SoC except
> > > > EXYNOS4210 rev0).
> > > > 
> > > > I don't have EXYNOS4210 rev1.0 but it seems that in the upstream
> > > > AFTR
> > > > mode has never worked (even on hardware that it was originally
> > > > developed
> > > > on) since its introduction in v3.4 (which was released on 20th May
> > > > 2012).
> > > > 
> > > > IOW for over the year nobody cared to make it work and I have
> > > > currently
> > > > no fix at hand so the corrent upstream resolution is to just remove
> > > > the
> > > > known non-working code and re-introduce it later when/if needed (I
> > > > can
> > > > just disable it with a small fix but we don't keep such long-term
> > > > broken
> > > > code as placeholder in the upstream kernel).  If left as it is
> > > > people
> > > > can hit the known issues and waste time debugging them, just like
> > > > this
> > > > happenend for me [1].
> > > > 
> > > > If you have AFTR mode working (especially on EXYNOS4210) in Linaro
> > > > kernels please get fixes upstream ASAP. However I still wonder
> > > > whether
> > > > the maintanance nightmare (bugs for different cases in your
> > > > launchpad)
> > > > is worth gains over standard idle mode as the rumor around here is
> > > > that
> > > > they are not that great (unfortunately no numbers were provided
> > > > during
> > > > original feature addition).
> > > 
> > > It works forme with a vanilla kernel 3.10.0-rc7.
> > 
> > As Bartek already said, I haven't worked on any of our Exynos 4210
> > based
> > boards since it got introduced in Linux 3.4, with exactly the same
> > effect we described.
> > 
> > > Removing a feature because it seems not working is not a good
> > > solution.
> > > The right way is to investigate what is happening and why.
> > 
> > I can agree only partially. Keeping a feature that is broken and
> > without
> > any significant benefits does not make sense for me. Neither does
> > putting efforts into fixing it, only to find that it is of no use.
> 
> Well, either power management HW works on these boards, so there are
> significant benefits in fixing the code, or it does not and you are right
> then.

This mode doesn't have anything to do with power management HW other than 
power management unit inside of the SoC, which is not board specific.

> If power management HW works there are benefits and they are significant
> for manifold reasons (first and foremost, since these are dev boards, to
> show how power management backends must be written, but honestly this
> driver fails in that respect). It is absolutely worth putting effort in
> fixing the behaviour, the problem is finding people who can answer
> questions on how the power controller and firmware are *supposed* to be
> working, otherwise we will never get to the bottom of this.

I don't see a point to have code that works, but doesn't do anything useful 
(other than some example/skeleton drivers which aren't supposed to be used 
in other way than as aid for developers). This is why I'd like to see some 
evidence that this feature is good to have, before we start putting effort 
into fixing it on other boards (because this is possibly firmware/bootloader 
specific issue).

> I understand that these bits of information are hard to come by, and for
> that reason we might be forced to drop this code, but this does not mean
> that these features are not useful.

I agree. This is not what I meant. What I wanted to say is that no 
rationale has ever been provided for this feature and so we don't know if 
it is worth putting effort into it.

Our internal measurements of similar features tell us that it is unlikely, 
but this is not a result that can be relied on, so I asked for measurements 
on boards for which it works.

> > However this is purely a speculation. Could you test on your Origen, on
> > which it is supposed to work, if this feature is of any use ?
> 
> If power management HW works properly on these boards, and I hope it
> does (but the only way to check it is by measuring power drawn), this
> feature is useful, now the question is to understand how and who is
> going to fix it, if we can manage to do that.

Again, this feature is only related to SoC internals and lowest level 
firmware (first stage bootloader, which resumes the CPU), so power saving is 
not board specific here, but SoC-specific.

Best regards,
Tomasz
Bartlomiej Zolnierkiewicz June 28, 2013, 4:27 p.m. UTC | #8
On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> > Hi Daniel,
> > 
> > I've been fighting with this whole AFTR state as well, before Bartlomiej. 
> > Let me share my thoughts on this.
> > 
> > On Friday 28 of June 2013 11:57:25 Daniel Lezcano wrote:
> >> On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
> >>> On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
> >>>> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> >>>>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS: Add
> >>>>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately even
> >>>>> in v3.4 kernel it hasn't worked as supposed and this is still the
> >>>>> case
> >>>>> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE is
> >>>>> not turned on by default):
> >>>>>
> >>>>> - on revision 0 of Exynos4210 (Universal C210 board) it causes lockup
> >>>>>
> >>>>>   (on this revision only one core is usable so entry to AFTR mode is
> >>>>>   always attempted because the code tries to go into AFTR mode when
> >>>>>   all
> >>>>>   other CPUs except CPU0 are offlined)
> >>>>>
> >>>>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup when
> >>>>>
> >>>>>   CPU1 is offlined (i.e. echo 0 >
> >>>>>   /sys/devices/system/cpu/cpu1/online)
> >>>>>
> >>>>> - on later Exynos4/5 SoCs wrong registers may be accessed when all
> >>>>> CPUs
> >>>>>
> >>>>>   except CPU0 are offlined resulting in panic/lockup
> >>>>>   (REG_DIRECTGO_ADDR
> >>>>>   and REG_DIRECTGO_FLAG register selections was implemented only for
> >>>>>   Exynos4210)
> >>>>>
> >>>>> Just remove AFTR mode support for now.
> >>>>
> >>>> Ok, I will jump on the opportunity to talk about this state.
> >>>>
> >>>> 1. I tried different ways to make the AFTR state to be entered with
> >>>> *both* cpu online. It goes successfully to this state. The CPU0 is
> >>>> correctly woken up but the CPU1 is never woken up, why is it happening
> >>>> ?
> >>>>
> >>>> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518
> >>>
> >>> No idea here, AFTR doesn't work for me with upstream kernels even if
> >>> only one CPU is online.
> >>
> >> What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
> >> state is never reached ?
> > 
> > If you don't unplug all the CPUs >0 the state is obviously never reached. 
> > Otherwise the whole system hangs after it tries to enter this mode without 
> > any reaction for external events, other than reset.
> 
> Need investigation.
> 
> What is the exynos board version where that occurs ?

Could you please tell me what exactly do you mean by that?

I already wrote that we can reproduce the problem on EXYNOS4210 rev0
and rev1.1 (we don't have rev1.0). Tomek has also reproduced the problem
on some later SoCs (I hope that he can give you exact revisions).

In our testing we didn't encounter the board on which the problem
doesn't occur. Our current working theory is that the problem may be
u-boot (or first stage bootloader) related.

> >>> Also the documentation says that before entering system-level
> >>> power-down
> >>> mode (such as AFTR) when multiple CPUs cores are used all other CPU
> >>> cores should stop interrupt service so I'm not sure how the way
> >>> attempted by you should work.
> >>
> >> The cpu enters the idle mode with the interrupts disabled.
> > 
> > Hmm? What is supposed to wake it up then? AFAIK the whole idea of any idle 
> > or sleep is to sit in such low power mode until some interrupt fires (and so 
> > the name of the WFI, wait for interrupt, instruction).
> 
> It is handled by the hardware, for the exynos it should be the PMU. The
> CPU stays clock/power gated and when an interrupt occurs the PMU wakes
> up the CPU. This one continue its instructions after cpu_do_idle and
> right after enables the local interrupts leading to the interrupt handling.
> 
> >>>> 2. The CPU1 hotplug bug should been fixed and if I am not wrong there
> >>>> is
> >>>> a patch somewhere fixing this.
> >>>>
> >>>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
> >>>
> >>> Unfortunately none of the patches there helps with my issues.
> >>>
> >>>> 3. What is the fix for Exynos5 ?
> >>>>
> >>>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
> >>>>
> >>>> It sounds like depending on Hypervisor mode is enabled or not, the
> >>>> AFTR
> >>>> does not work correctly.
> >>>
> >>> Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
> >>> registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
> >>> may be different than EXYNOS4210 defaults (at least on EXYNOS4412 they
> >>> indeed are different, unfortunately I lack any info needed for EXYNOS5
> >>> support). You are lucky that it even works in some cases on EXYNOS5250.
> >>>
> >>>> In other words, instead of removing the AFTR state I suggest to fix
> >>>> it:
> >>>> both core being online, split driver for exynos4 and 5.
> >>>
> >>> My main problem is that with the upstream kernel even on EXYNOS4210
> >>> rev0 (only one core useable due to hardware issues) the kernel goes
> >>> into AFTR state for the first few times during boot and then it just
> >>> lockups (after going into cpu_do_idle() which is really
> >>> cpu_v7_do_idle()
> >>> and which does wfi call) and doesn't wake up CPU0. I have currently
> >>> no idea how to fix or debug it further.
> >>
> >> I have an Origen 4210 board Ver A. and it works without problem with the
> >> AFTR mode (cpu1 unplugged).
> > 
> > Great!
> > 
> > Since benefits of this feature are rather questionable, especially when you 
> > consider all the maintenance burden caused by it, could you do some 
> > measurements to check if power saving thanks to this mode is of any 
> > significance?
> 
> No I can't, no spare time for that and furthermore this work has already
> be done by Amit Daniel when he submitted the driver.

Unfortunately the results were never made public...

> Amit Daniel is no longer a Linaro assignee but it is still part of the
> Samsung company (changed the email address to reach him).
> 
> >>> The issue happens with every upstream kernel version tried (from v3.4
> >>> to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
> >>> offlined by hand and then cpuidle driver tries to go into AFTR mode
> >>> (because by default it doesn't go into AFTR mode on any SoC except
> >>> EXYNOS4210 rev0).
> >>>
> >>> I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
> >>> mode has never worked (even on hardware that it was originally
> >>> developed
> >>> on) since its introduction in v3.4 (which was released on 20th May
> >>> 2012).
> >>>
> >>> IOW for over the year nobody cared to make it work and I have currently
> >>> no fix at hand so the corrent upstream resolution is to just remove the
> >>> known non-working code and re-introduce it later when/if needed (I can
> >>> just disable it with a small fix but we don't keep such long-term
> >>> broken
> >>> code as placeholder in the upstream kernel).  If left as it is people
> >>> can hit the known issues and waste time debugging them, just like this
> >>> happenend for me [1].
> >>>
> >>> If you have AFTR mode working (especially on EXYNOS4210) in Linaro
> >>> kernels please get fixes upstream ASAP. However I still wonder whether
> >>> the maintanance nightmare (bugs for different cases in your launchpad)
> >>> is worth gains over standard idle mode as the rumor around here is that
> >>> they are not that great (unfortunately no numbers were provided during
> >>> original feature addition).
> >>
> >> It works forme with a vanilla kernel 3.10.0-rc7.
> > 
> > As Bartek already said, I haven't worked on any of our Exynos 4210 based 
> > boards since it got introduced in Linux 3.4, with exactly the same effect we 
> > described.
> > 
> >> Removing a feature because it seems not working is not a good solution.
> >> The right way is to investigate what is happening and why.
> > 
> > I can agree only partially. Keeping a feature that is broken and without 
> > any significant benefits does not make sense for me. Neither does putting 
> > efforts into fixing it, only to find that it is of no use.
> > 
> > However this is purely a speculation. Could you test on your Origen, on 
> > which it is supposed to work, if this feature is of any use?
> 
> It is useless to do that. This work is already done.
> 
> The kernel is not a playground where you can upstream code and then
> remove it because a feature seems broken and you don't have an idea of why.

Neither me or Tomek did upstream this code and we couldn't react in
time because we haven't noticed that it is completely unusable for us
as EXYNOS cpuidle driver is not even used by default on EXYNOS (it is
not enabled either in defconfig or Kconfig).

Moreover the feature we are talking about (AFTR mode) is also not used
by default (except EXYNOS4210 rev0 on which it lockups system for us)
even with EXYNOS cpuidle driver being enabled (because this specific
feature depends on CPU hot-unplug which is not done automatically right
now).

Such things like unused/broken code removal is not something very
unusual in the upstream kernel (I'm speaking from the experience here
having maintained large subsystem for a couple of years). In this
particular case we are talking about ~130 lines of code which can
be trivially brought back later when/if needed.

Anyway if the code removal is controversial for you we can just disable
AFTR mode by default and enable it only when special command line option
is given (i.e. "aftr"). This would fix all the broken configuration
while still allowing the feature to be enabled on systems that had it
working previously (since you claim that it works on some chipset/u-boot
configurations).

> I asked several times the reasons of why the AFTR state couldn't work
> with multiple CPUs and I had no answer.

Unfortunately I don't know the answer for your question.

The AFTR mode doesn't work for us *at*all* (even with *one* CPU).

> Frankly speaking I have a couple of hypothesis:
> 
> 1. something is not correctly setup and the PMU does not wake up the CPU1
> 2. there is a silicon bug and no one wants to tell it is the case
> 
> In any case, this must be investigated and identified. And then we can
> take a decision about this state.

I don't have good idea currently how to investigate it further.

I also don't have any prove that the actual work is worth it
(and this work can easily take some weeks).

One of main responsibilities of the maintainer it to make sure that
the code does indeed work and that regressions (like these caused by
AFTR mode feature) are fixed in the timely manner, not let the code
sit in the limbo state for large periods of time.  It is already very
bad situation that the regression we are hitting was present since
v3.4 and we are in v3.10 now, I would like to have it fixed ASAP so
we may actually consider enabling cpuidle in our exynos_defconfig.

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

> >>> --
> >>> Bartlomiej Zolnierkiewicz
> >>> Samsung R&D Institute Poland
> >>> Samsung Electronics
> >>>
> >>> [1] Somebody enabled CONFIG_CPU_IDLE in the default config in one of
> >>> our
> >>> internal kernel trees on a target on which this feature "works" (since
> >>> CPUs are not hot-unplugged by default on all targets except EXYNOS4210
> >>> rev0 the code for AFTR is not used) and resulted in lockups on boot on
> >>> my default testing target. It took my long time to find out that
> >>> problem
> >>> is actually caused by just enabling exynos cpuidle driver.
> >>>
> >>>> Thanks
> >>>>
> >>>>   -- Daniel
> >>>>>
> >>>>> Cc: Jaecheol Lee <jc.lee@samsung.com>
> >>>>> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> >>>>> Cc: Amit Daniel Kachhap <amit.kachhap@linaro.org>
> >>>>> Cc: Tomasz Figa <t.figa@samsung.com>
> >>>>> Cc: Kukjin Kim <kgene.kim@samsung.com>
> >>>>> Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
> >>>>> Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
> >>>>> Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> >>>>> Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
> >>>>> ---
> >>>>>
> >>>>>  arch/arm/mach-exynos/cpuidle.c | 131
> >>>>>  +---------------------------------------- 1 file changed, 1
> >>>>>  insertion(+), 130 deletions(-)
> >>>>>
> >>>>> diff --git a/arch/arm/mach-exynos/cpuidle.c
> >>>>> b/arch/arm/mach-exynos/cpuidle.c index 17a18ff..0a657ac 100644
> >>>>> --- a/arch/arm/mach-exynos/cpuidle.c
> >>>>> +++ b/arch/arm/mach-exynos/cpuidle.c
> >>>>> @@ -17,30 +17,11 @@
> >>>>>
> >>>>>  #include <linux/time.h>
> >>>>>  
> >>>>>  #include <asm/proc-fns.h>
> >>>>>
> >>>>> -#include <asm/smp_scu.h>
> >>>>> -#include <asm/suspend.h>
> >>>>> -#include <asm/unified.h>
> >>>>>
> >>>>>  #include <asm/cpuidle.h>
> >>>>>  #include <mach/regs-clock.h>
> >>>>>
> >>>>> -#include <mach/regs-pmu.h>
> >>>>> -
> >>>>> -#include <plat/cpu.h>
> >>>>>
> >>>>>  #include "common.h"
> >>>>>
> >>>>> -#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> >>>>> -			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> >>>>> -			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
> >>>>> -#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
> >>>>> -			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
> >>>>> -			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
> >>>>> -
> >>>>> -#define S5P_CHECK_AFTR		0xFCBA0D10
> >>>>> -
> >>>>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> >>>>> -				struct cpuidle_driver *drv,
> >>>>> -				int index);
> >>>>> -
> >>>>>
> >>>>>  static DEFINE_PER_CPU(struct cpuidle_device,
> >>>>>  exynos4_cpuidle_device);
> >>>>>  
> >>>>>  static struct cpuidle_driver exynos4_idle_driver = {
> >>>>>
> >>>>> @@ -48,117 +29,11 @@ static struct cpuidle_driver exynos4_idle_driver
> >>>>> = {>>> 
> >>>>>  	.owner			= THIS_MODULE,
> >>>>>  	.states = {
> >>>>>  	
> >>>>>  		[0] = ARM_CPUIDLE_WFI_STATE,
> >>>>>
> >>>>> -		[1] = {
> >>>>> -			.enter			= exynos4_enter_lowpower,
> >>>>> -			.exit_latency		= 300,
> >>>>> -			.target_residency	= 100000,
> >>>>> -			.flags			= CPUIDLE_FLAG_TIME_VALID,
> >>>>> -			.name			= "C1",
> >>>>> -			.desc			= "ARM power down",
> >>>>> -		},
> >>>>>
> >>>>>  	},
> >>>>>
> >>>>> -	.state_count = 2,
> >>>>> +	.state_count = 1,
> >>>>>
> >>>>>  	.safe_state_index = 0,
> >>>>>  
> >>>>>  };
> >>>>>
> >>>>> -/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
> >>>>> -static void exynos4_set_wakeupmask(void)
> >>>>> -{
> >>>>> -	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
> >>>>> -}
> >>>>> -
> >>>>> -static unsigned int g_pwr_ctrl, g_diag_reg;
> >>>>> -
> >>>>> -static void save_cpu_arch_register(void)
> >>>>> -{
> >>>>> -	/*read power control register*/
> >>>>> -	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
> >>>>> -	/*read diagnostic register*/
> >>>>> -	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
> >>>>> -	return;
> >>>>> -}
> >>>>> -
> >>>>> -static void restore_cpu_arch_register(void)
> >>>>> -{
> >>>>> -	/*write power control register*/
> >>>>> -	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
> >>>>> -	/*write diagnostic register*/
> >>>>> -	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
> >>>>> -	return;
> >>>>> -}
> >>>>> -
> >>>>> -static int idle_finisher(unsigned long flags)
> >>>>> -{
> >>>>> -	cpu_do_idle();
> >>>>> -	return 1;
> >>>>> -}
> >>>>> -
> >>>>> -static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
> >>>>> -				struct cpuidle_driver *drv,
> >>>>> -				int index)
> >>>>> -{
> >>>>> -	unsigned long tmp;
> >>>>> -
> >>>>> -	exynos4_set_wakeupmask();
> >>>>> -
> >>>>> -	/* Set value of power down register for aftr mode */
> >>>>> -	exynos_sys_powerdown_conf(SYS_AFTR);
> >>>>> -
> >>>>> -	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
> >>>>> -	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
> >>>>> -
> >>>>> -	save_cpu_arch_register();
> >>>>> -
> >>>>> -	/* Setting Central Sequence Register for power down mode */
> >>>>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> >>>>> -	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
> >>>>> -	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> >>>>> -
> >>>>> -	cpu_pm_enter();
> >>>>> -	cpu_suspend(0, idle_finisher);
> >>>>> -
> >>>>> -#ifdef CONFIG_SMP
> >>>>> -	if (!soc_is_exynos5250())
> >>>>> -		scu_enable(S5P_VA_SCU);
> >>>>> -#endif
> >>>>> -	cpu_pm_exit();
> >>>>> -
> >>>>> -	restore_cpu_arch_register();
> >>>>> -
> >>>>> -	/*
> >>>>> -	 * If PMU failed while entering sleep mode, WFI will be
> >>>>> -	 * ignored by PMU and then exiting cpu_do_idle().
> >>>>> -	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
> >>>>> -	 * in this situation.
> >>>>> -	 */
> >>>>> -	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
> >>>>> -	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
> >>>>> -		tmp |= S5P_CENTRAL_LOWPWR_CFG;
> >>>>> -		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
> >>>>> -	}
> >>>>> -
> >>>>> -	/* Clear wakeup state register */
> >>>>> -	__raw_writel(0x0, S5P_WAKEUP_STAT);
> >>>>> -
> >>>>> -	return index;
> >>>>> -}
> >>>>> -
> >>>>> -static int exynos4_enter_lowpower(struct cpuidle_device *dev,
> >>>>> -				struct cpuidle_driver *drv,
> >>>>> -				int index)
> >>>>> -{
> >>>>> -	int new_index = index;
> >>>>> -
> >>>>> -	/* This mode only can be entered when other core's are offline */
> >>>>> -	if (num_online_cpus() > 1)
> >>>>> -		new_index = drv->safe_state_index;
> >>>>> -
> >>>>> -	if (new_index == 0)
> >>>>> -		return arm_cpuidle_simple_enter(dev, drv, new_index);
> >>>>> -	else
> >>>>> -		return exynos4_enter_core0_aftr(dev, drv, new_index);
> >>>>> -}
> >>>>> -
> >>>>>
> >>>>>  static void __init exynos5_core_down_clk(void)
> >>>>>  {
> >>>>>  
> >>>>>  	unsigned int tmp;
> >>>>>
> >>>>> @@ -209,10 +84,6 @@ static int __init exynos4_init_cpuidle(void)
> >>>>>
> >>>>>  		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
> >>>>>  		device->cpu = cpu_id;
> >>>>>
> >>>>> -		/* Support IDLE only */
> >>>>> -		if (cpu_id != 0)
> >>>>> -			device->state_count = 1;
> >>>>> -
> >>>>>
> >>>>>  		ret = cpuidle_register_device(device);
> >>>>>  		if (ret) {
> >>>>>  		
> >>>>>  			printk(KERN_ERR "CPUidle register device failed\n");
Tomasz Figa June 28, 2013, 5:31 p.m. UTC | #9
On Friday 28 of June 2013 13:20:09 Daniel Lezcano wrote:
> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> > Hi Daniel,
> > 
> > I've been fighting with this whole AFTR state as well, before
> > Bartlomiej. Let me share my thoughts on this.
> > 
> > On Friday 28 of June 2013 11:57:25 Daniel Lezcano wrote:
> >> On 06/27/2013 08:10 PM, Bartlomiej Zolnierkiewicz wrote:
> >>> On Wednesday, June 26, 2013 12:36:12 PM Daniel Lezcano wrote:
> >>>> On 06/26/2013 12:13 PM, Bartlomiej Zolnierkiewicz wrote:
> >>>>> AFTR mode support was introduced by commit 67173ca ("ARM: EXYNOS:
> >>>>> Add
> >>>>> support AFTR mode on EXYNOS4210") in v3.4 kernel.  Unfortunately
> >>>>> even
> >>>>> in v3.4 kernel it hasn't worked as supposed and this is still the
> >>>>> case
> >>>>> with v3.10-rc6 (it probably wasn't noticed because CONFIG_CPU_IDLE
> >>>>> is
> >>>>> not turned on by default):
> >>>>> 
> >>>>> - on revision 0 of Exynos4210 (Universal C210 board) it causes
> >>>>> lockup
> >>>>> 
> >>>>>   (on this revision only one core is usable so entry to AFTR mode
> >>>>>   is
> >>>>>   always attempted because the code tries to go into AFTR mode when
> >>>>>   all
> >>>>>   other CPUs except CPU0 are offlined)
> >>>>> 
> >>>>> - on revision 1.1 of Exynos4210 (Trats board) it causes a lockup
> >>>>> when
> >>>>> 
> >>>>>   CPU1 is offlined (i.e. echo 0 >
> >>>>>   /sys/devices/system/cpu/cpu1/online)
> >>>>> 
> >>>>> - on later Exynos4/5 SoCs wrong registers may be accessed when all
> >>>>> CPUs
> >>>>> 
> >>>>>   except CPU0 are offlined resulting in panic/lockup
> >>>>>   (REG_DIRECTGO_ADDR
> >>>>>   and REG_DIRECTGO_FLAG register selections was implemented only
> >>>>>   for
> >>>>>   Exynos4210)
> >>>>> 
> >>>>> Just remove AFTR mode support for now.
> >>>> 
> >>>> Ok, I will jump on the opportunity to talk about this state.
> >>>> 
> >>>> 1. I tried different ways to make the AFTR state to be entered with
> >>>> *both* cpu online. It goes successfully to this state. The CPU0 is
> >>>> correctly woken up but the CPU1 is never woken up, why is it
> >>>> happening
> >>>> ?
> >>>> 
> >>>> https://bugs.launchpad.net/linaro-landing-team-samsung/+bug/1171518
> >>> 
> >>> No idea here, AFTR doesn't work for me with upstream kernels even if
> >>> only one CPU is online.
> >> 
> >> What do you mean by "AFTR doesn't work" ? Is the kernel hanging ? The
> >> state is never reached ?
> > 
> > If you don't unplug all the CPUs >0 the state is obviously never
> > reached. Otherwise the whole system hangs after it tries to enter this
> > mode without any reaction for external events, other than reset.
> 
> Need investigation.

Not necessarily. If this feature is worth it, then sure, but otherwise it 
would be just wasted effort.

> What is the exynos board version where that occurs ?

If this is what you are asking about:
 - Universal C210 with Exynos 4210 rev 0.0,
 - Trats with Exynos 4210 rev. 1.1.

> >>> Also the documentation says that before entering system-level
> >>> power-down
> >>> mode (such as AFTR) when multiple CPUs cores are used all other CPU
> >>> cores should stop interrupt service so I'm not sure how the way
> >>> attempted by you should work.
> >> 
> >> The cpu enters the idle mode with the interrupts disabled.
> > 
> > Hmm? What is supposed to wake it up then? AFAIK the whole idea of any
> > idle or sleep is to sit in such low power mode until some interrupt
> > fires (and so the name of the WFI, wait for interrupt, instruction).
> 
> It is handled by the hardware, for the exynos it should be the PMU. The
> CPU stays clock/power gated and when an interrupt occurs the PMU wakes
> up the CPU. This one continue its instructions after cpu_do_idle and
> right after enables the local interrupts leading to the interrupt
> handling.

OK. I misunderstood you previously then, taking interrupts disabled as 
interrupts signals masked in the controller.

> >>>> 2. The CPU1 hotplug bug should been fixed and if I am not wrong
> >>>> there
> >>>> is
> >>>> a patch somewhere fixing this.
> >>>> 
> >>>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171382
> >>> 
> >>> Unfortunately none of the patches there helps with my issues.
> >>> 
> >>>> 3. What is the fix for Exynos5 ?
> >>>> 
> >>>> https://bugs.launchpad.net/linaro-power-kernel/+bug/1171253
> >>>> 
> >>>> It sounds like depending on Hypervisor mode is enabled or not, the
> >>>> AFTR
> >>>> does not work correctly.
> >>> 
> >>> Sorry no idea here either.  On any SoCs later than EXYNOS4210 the
> >>> registers used for s3c_cpu_resume address and 0xFCBA0D10 magic number
> >>> may be different than EXYNOS4210 defaults (at least on EXYNOS4412
> >>> they
> >>> indeed are different, unfortunately I lack any info needed for
> >>> EXYNOS5
> >>> support). You are lucky that it even works in some cases on
> >>> EXYNOS5250.
> >>> 
> >>>> In other words, instead of removing the AFTR state I suggest to fix
> >>>> it:
> >>>> both core being online, split driver for exynos4 and 5.
> >>> 
> >>> My main problem is that with the upstream kernel even on EXYNOS4210
> >>> rev0 (only one core useable due to hardware issues) the kernel goes
> >>> into AFTR state for the first few times during boot and then it just
> >>> lockups (after going into cpu_do_idle() which is really
> >>> cpu_v7_do_idle()
> >>> and which does wfi call) and doesn't wake up CPU0. I have currently
> >>> no idea how to fix or debug it further.
> >> 
> >> I have an Origen 4210 board Ver A. and it works without problem with
> >> the
> >> AFTR mode (cpu1 unplugged).
> > 
> > Great!
> > 
> > Since benefits of this feature are rather questionable, especially when
> > you consider all the maintenance burden caused by it, could you do
> > some measurements to check if power saving thanks to this mode is of
> > any significance?
> 
> No I can't, no spare time for that and furthermore this work has already
> be done by Amit Daniel when he submitted the driver.

I was unable to find any measurement results or even any other rationale. 
Would you mind pointing me to them? Thanks in advance.

> Amit Daniel is no longer a Linaro assignee but it is still part of the
> Samsung company (changed the email address to reach him).

OK, thanks.

> >>> The issue happens with every upstream kernel version tried (from v3.4
> >>> to v3.10-rc6).  Lockups also happen on EXYNOS4210 rev1.1 when CPU1 is
> >>> offlined by hand and then cpuidle driver tries to go into AFTR mode
> >>> (because by default it doesn't go into AFTR mode on any SoC except
> >>> EXYNOS4210 rev0).
> >>> 
> >>> I don't have EXYNOS4210 rev1.0 but it seems that in the upstream AFTR
> >>> mode has never worked (even on hardware that it was originally
> >>> developed
> >>> on) since its introduction in v3.4 (which was released on 20th May
> >>> 2012).
> >>> 
> >>> IOW for over the year nobody cared to make it work and I have
> >>> currently
> >>> no fix at hand so the corrent upstream resolution is to just remove
> >>> the
> >>> known non-working code and re-introduce it later when/if needed (I
> >>> can
> >>> just disable it with a small fix but we don't keep such long-term
> >>> broken
> >>> code as placeholder in the upstream kernel).  If left as it is people
> >>> can hit the known issues and waste time debugging them, just like
> >>> this
> >>> happenend for me [1].
> >>> 
> >>> If you have AFTR mode working (especially on EXYNOS4210) in Linaro
> >>> kernels please get fixes upstream ASAP. However I still wonder
> >>> whether
> >>> the maintanance nightmare (bugs for different cases in your
> >>> launchpad)
> >>> is worth gains over standard idle mode as the rumor around here is
> >>> that
> >>> they are not that great (unfortunately no numbers were provided
> >>> during
> >>> original feature addition).
> >> 
> >> It works forme with a vanilla kernel 3.10.0-rc7.
> > 
> > As Bartek already said, I haven't worked on any of our Exynos 4210
> > based
> > boards since it got introduced in Linux 3.4, with exactly the same
> > effect we described.
> > 
> >> Removing a feature because it seems not working is not a good
> >> solution.
> >> The right way is to investigate what is happening and why.
> > 
> > I can agree only partially. Keeping a feature that is broken and
> > without
> > any significant benefits does not make sense for me. Neither does
> > putting efforts into fixing it, only to find that it is of no use.
> > 
> > However this is purely a speculation. Could you test on your Origen, on
> > which it is supposed to work, if this feature is of any use?
> 
> It is useless to do that. This work is already done.

Hmm? As I said, I couldn't find any results of that work.

> The kernel is not a playground where you can upstream code and then
> remove it because a feature seems broken and you don't have an idea of
> why.

Well, first of all, it has not been upstreamed correctly: a) without any 
given rationale (or at least without any I could find) and b) without enough 
testing.

> I asked several times the reasons of why the AFTR state couldn't work
> with multiple CPUs and I had no answer.
> 
> Frankly speaking I have a couple of hypothesis:
> 
> 1. something is not correctly setup and the PMU does not wake up the CPU1
> 2. there is a silicon bug and no one wants to tell it is the case
> 
> In any case, this must be investigated and identified. And then we can
> take a decision about this state.

Well, everything you're saying is correct, assuming that this feature is 
useful, which needs confirmation. I'd still want any evidence of this 
feature being of any use first, to not waste time on something that is 
useless.

Best regards,
Tomasz
Daniel Lezcano June 28, 2013, 9:47 p.m. UTC | #10
On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
> On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
>> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
>>> Hi Daniel,
>>>
>>> I've been fighting with this whole AFTR state as well, before Bartlomiej. 
>>> Let me share my thoughts on this.
>>>

[ ... ]

>>>
>>> If you don't unplug all the CPUs >0 the state is obviously never reached. 
>>> Otherwise the whole system hangs after it tries to enter this mode without 
>>> any reaction for external events, other than reset.
>>
>> Need investigation.
>>
>> What is the exynos board version where that occurs ?
> 
> Could you please tell me what exactly do you mean by that?
> 
> I already wrote that we can reproduce the problem on EXYNOS4210 rev0
> and rev1.1 (we don't have rev1.0). Tomek has also reproduced the problem
> on some later SoCs (I hope that he can give you exact revisions).
> 
> In our testing we didn't encounter the board on which the problem
> doesn't occur. Our current working theory is that the problem may be
> u-boot (or first stage bootloader) related.

Ok, the status for what I know:

Origen Exynos4210, board ver A: works for me
Arndale Exynos5250: works for me but only if u-boot does not enable the
hypervisor mode.
Chromebook Exynos5250: works for me

I found the following drivers:

https://github.com/AndreiLux/Perseus-UNIVERSAL5410/blob/samsung/arch/arm/mach-exynos/cpuidle.c

https://github.com/CyanogenMod/hardkernel-kernel-4412/blob/cm-10.1/arch/arm/mach-exynos/cpuidle-exynos4.c

Sounds like the num cpus > 1 is still there.

[ ... ]

>> The kernel is not a playground where you can upstream code and then
>> remove it because a feature seems broken and you don't have an idea of why.
> 
> Neither me or Tomek did upstream this code and we couldn't react in
> time because we haven't noticed that it is completely unusable for us
> as EXYNOS cpuidle driver is not even used by default on EXYNOS (it is
> not enabled either in defconfig or Kconfig).
> 
> Moreover the feature we are talking about (AFTR mode) is also not used
> by default (except EXYNOS4210 rev0 on which it lockups system for us)
> even with EXYNOS cpuidle driver being enabled (because this specific
> feature depends on CPU hot-unplug which is not done automatically right
> now).
> 
> Such things like unused/broken code removal is not something very
> unusual in the upstream kernel (I'm speaking from the experience here
> having maintained large subsystem for a couple of years). In this
> particular case we are talking about ~130 lines of code which can
> be trivially brought back later when/if needed.
> 
> Anyway if the code removal is controversial for you we can just disable
> AFTR mode by default and enable it only when special command line option
> is given (i.e. "aftr"). This would fix all the broken configuration
> while still allowing the feature to be enabled on systems that had it
> working previously (since you claim that it works on some chipset/u-boot
> configurations).

Actually, there are several reasons I am not in favor for the moment to
remove this code:

1) code can't be pushed upstream and then removed so easily
2) I asked several times what was this cpu1 hack, I had no answer
3) I tried to make both cpus entering the AFTR state, but the cpu1 never
wakes up, I asked but no answer.

I would like to have some answers :)

Before taking the decision to remove this state (btw you can remove the
driver directly, no ? the default idle function is WFI), IMO it is worth
to investigate and to spend some time to clarify what is happening. Then
we can take a decision.

I am willing to help.

>> I asked several times the reasons of why the AFTR state couldn't work
>> with multiple CPUs and I had no answer.
> 
> Unfortunately I don't know the answer for your question.
> 
> The AFTR mode doesn't work for us *at*all* (even with *one* CPU).
> 
>> Frankly speaking I have a couple of hypothesis:
>>
>> 1. something is not correctly setup and the PMU does not wake up the CPU1
>> 2. there is a silicon bug and no one wants to tell it is the case
>>
>> In any case, this must be investigated and identified. And then we can
>> take a decision about this state.
> 
> I don't have good idea currently how to investigate it further.
> 
> I also don't have any prove that the actual work is worth it
> (and this work can easily take some weeks).
> 
> One of main responsibilities of the maintainer it to make sure that
> the code does indeed work and that regressions (like these caused by
> AFTR mode feature) are fixed in the timely manner, not let the code
> sit in the limbo state for large periods of time.  It is already very
> bad situation that the regression we are hitting was present since
> v3.4 and we are in v3.10 now, I would like to have it fixed ASAP so
> we may actually consider enabling cpuidle in our exynos_defconfig.

I agree.

Thanks
  -- Daniel
Daniel Lezcano June 28, 2013, 10:27 p.m. UTC | #11
On 06/28/2013 07:31 PM, Tomasz Figa wrote:
> On Friday 28 of June 2013 13:20:09 Daniel Lezcano wrote:

[ ... ]

>> The kernel is not a playground where you can upstream code and then
>> remove it because a feature seems broken and you don't have an idea of
>> why.
> 
> Well, first of all, it has not been upstreamed correctly: a) without any 
> given rationale (or at least without any I could find) and b) without enough 
> testing.

+1

>> I asked several times the reasons of why the AFTR state couldn't work
>> with multiple CPUs and I had no answer.
>>
>> Frankly speaking I have a couple of hypothesis:
>>
>> 1. something is not correctly setup and the PMU does not wake up the CPU1
>> 2. there is a silicon bug and no one wants to tell it is the case
>>
>> In any case, this must be investigated and identified. And then we can
>> take a decision about this state.
> 
> Well, everything you're saying is correct, assuming that this feature is 
> useful, which needs confirmation. I'd still want any evidence of this 
> feature being of any use first, to not waste time on something that is 
> useless.

IMHO, it is useful. That's sure a highly integrated hardware with a lot
of peripherals, the power saving is lost in the noise but with a small
embedded device, the power saving is significant.

What I am worried about is the different SoCs being introduced in this
driver without investigating this cpu1 restriction and it sounds like
the AFTR seems to work (I have an odroid-u2 with a 4412 I will test to
check if the AFTR works on it) and nobody seems to be hurt by this and,
as you stated, there is no rational about it. That means the Exynos will
become really more and more power aggressive with 4/A15 or 8/A15 based
boards if the AFTR state is not correctly supported.

AFAICT, there is also the LPA state and the DEEPSLEEP, right ? If the
AFTR does not work, I don't see these states working too.

The cpuidle driver is critical for the new b.L architecture. If we
aren't able to put a cluster of big cpus in a power down state, we lose
the benefit of all the work made up-front at the scheduler level (eg.
small task packing, workqueue/timer migration) and more generally we
lose the benefit of the b.L architecture (except if we do some hacks
which are orthogonal to the generic approach).

I should have added this point to 4) to the mail I sent previously to
Bartlomiej...

  -- Daniel
Tomasz Figa June 28, 2013, 10:47 p.m. UTC | #12
On Friday 28 of June 2013 23:47:49 Daniel Lezcano wrote:
> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
> > On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
> >> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> >>> Hi Daniel,
> >>> 
> >>> I've been fighting with this whole AFTR state as well, before
> >>> Bartlomiej. Let me share my thoughts on this.
> 
> [ ... ]
> 
> >>> If you don't unplug all the CPUs >0 the state is obviously never
> >>> reached. Otherwise the whole system hangs after it tries to enter
> >>> this mode without any reaction for external events, other than
> >>> reset.
> >> 
> >> Need investigation.
> >> 
> >> What is the exynos board version where that occurs ?
> > 
> > Could you please tell me what exactly do you mean by that?
> > 
> > I already wrote that we can reproduce the problem on EXYNOS4210 rev0
> > and rev1.1 (we don't have rev1.0). Tomek has also reproduced the
> > problem on some later SoCs (I hope that he can give you exact
> > revisions).
> > 
> > In our testing we didn't encounter the board on which the problem
> > doesn't occur. Our current working theory is that the problem may be
> > u-boot (or first stage bootloader) related.
> 
> Ok, the status for what I know:
> 
> Origen Exynos4210, board ver A: works for me
> Arndale Exynos5250: works for me but only if u-boot does not enable the
> hypervisor mode.
> Chromebook Exynos5250: works for me
> 
> I found the following drivers:
> 
> https://github.com/AndreiLux/Perseus-UNIVERSAL5410/blob/samsung/arch/arm
> /mach-exynos/cpuidle.c
> 
> https://github.com/CyanogenMod/hardkernel-kernel-4412/blob/cm-10.1/arch/
> arm/mach-exynos/cpuidle-exynos4.c
> 
> Sounds like the num cpus > 1 is still there.

As far as I know (and I can read from the documentation), AFTR is a state 
in which program execution context (e.g. PC, CPU registers, etc.) is lost. 
This is like sleep mode, but with much more state saved (all the 
peripherals and L2 cache), which lets you resume much faster.

Based on that, the execution flow is as following:
 - kernel stops all secondary cores,
 - kernel configures PMU to enter AFTR mode,
 - WFI is executed on boot CPU, which lets PMU enter the requested low 
power mode,
 - the system stays with all CPU cores powered off until a wake-up event 
happens,
 - a wake-up event causes a wake-up reset of the boot CPU, which resumes 
operation from reset vector (usually IROM),
 - here exactly the same procedure is used as on wake-up from sleep mode, 
with the exception that some assumptions can be made about kept state, 
like BL1 being still in IRAM/SYSRAM, without the need to reload it from 
boot device,
 - after going through all the boot code, it finally jumps to the resume 
handler saved by kernel in one of those INFORM registers or somewhere in 
SYSRAM,
 - resume handler reinitializes boot CPU, restores things like MMU, 
coprocessors, etc. and finally boots secondary cores again,
 - the system is operating normally again.

This is simplified a lot, with all things happened internally in PMU and 
boot code skipped, but should give the general picture. See all the 
analogies to normal sleep mode, which (IMHO) makes it understandable that 
this mode can't be entered with more than one core active. 

However, with something like coupled cpuidle, when possible, this mode can 
be entered by powering down remaining cores automatically, so this is not 
a problem with this mode itself, but rather with current approach of 
entering it.

Best regards,
Tomasz

> 
> [ ... ]
> 
> >> The kernel is not a playground where you can upstream code and then
> >> remove it because a feature seems broken and you don't have an idea
> >> of why.> 
> > Neither me or Tomek did upstream this code and we couldn't react in
> > time because we haven't noticed that it is completely unusable for us
> > as EXYNOS cpuidle driver is not even used by default on EXYNOS (it is
> > not enabled either in defconfig or Kconfig).
> > 
> > Moreover the feature we are talking about (AFTR mode) is also not used
> > by default (except EXYNOS4210 rev0 on which it lockups system for us)
> > even with EXYNOS cpuidle driver being enabled (because this specific
> > feature depends on CPU hot-unplug which is not done automatically
> > right
> > now).
> > 
> > Such things like unused/broken code removal is not something very
> > unusual in the upstream kernel (I'm speaking from the experience here
> > having maintained large subsystem for a couple of years). In this
> > particular case we are talking about ~130 lines of code which can
> > be trivially brought back later when/if needed.
> > 
> > Anyway if the code removal is controversial for you we can just
> > disable
> > AFTR mode by default and enable it only when special command line
> > option is given (i.e. "aftr"). This would fix all the broken
> > configuration while still allowing the feature to be enabled on
> > systems that had it working previously (since you claim that it works
> > on some chipset/u-boot configurations).
> 
> Actually, there are several reasons I am not in favor for the moment to
> remove this code:
> 
> 1) code can't be pushed upstream and then removed so easily
> 2) I asked several times what was this cpu1 hack, I had no answer
> 3) I tried to make both cpus entering the AFTR state, but the cpu1 never
> wakes up, I asked but no answer.
> 
> I would like to have some answers :)
> 
> Before taking the decision to remove this state (btw you can remove the
> driver directly, no ? the default idle function is WFI), IMO it is worth
> to investigate and to spend some time to clarify what is happening.
> Then we can take a decision.
> 
> I am willing to help.
> 
> >> I asked several times the reasons of why the AFTR state couldn't work
> >> with multiple CPUs and I had no answer.
> > 
> > Unfortunately I don't know the answer for your question.
> > 
> > The AFTR mode doesn't work for us *at*all* (even with *one* CPU).
> > 
> >> Frankly speaking I have a couple of hypothesis:
> >> 
> >> 1. something is not correctly setup and the PMU does not wake up the
> >> CPU1 2. there is a silicon bug and no one wants to tell it is the
> >> case
> >> 
> >> In any case, this must be investigated and identified. And then we
> >> can
> >> take a decision about this state.
> > 
> > I don't have good idea currently how to investigate it further.
> > 
> > I also don't have any prove that the actual work is worth it
> > (and this work can easily take some weeks).
> > 
> > One of main responsibilities of the maintainer it to make sure that
> > the code does indeed work and that regressions (like these caused by
> > AFTR mode feature) are fixed in the timely manner, not let the code
> > sit in the limbo state for large periods of time.  It is already very
> > bad situation that the regression we are hitting was present since
> > v3.4 and we are in v3.10 now, I would like to have it fixed ASAP so
> > we may actually consider enabling cpuidle in our exynos_defconfig.
> 
> I agree.
> 
> Thanks
>   -- Daniel
Tomasz Figa June 28, 2013, 11:07 p.m. UTC | #13
On Saturday 29 of June 2013 00:27:04 Daniel Lezcano wrote:
> On 06/28/2013 07:31 PM, Tomasz Figa wrote:
> > On Friday 28 of June 2013 13:20:09 Daniel Lezcano wrote:
> [ ... ]
> 
> >> The kernel is not a playground where you can upstream code and then
> >> remove it because a feature seems broken and you don't have an idea
> >> of
> >> why.
> > 
> > Well, first of all, it has not been upstreamed correctly: a) without
> > any given rationale (or at least without any I could find) and b)
> > without enough testing.
> 
> +1
> 
> >> I asked several times the reasons of why the AFTR state couldn't work
> >> with multiple CPUs and I had no answer.
> >> 
> >> Frankly speaking I have a couple of hypothesis:
> >> 
> >> 1. something is not correctly setup and the PMU does not wake up the
> >> CPU1 2. there is a silicon bug and no one wants to tell it is the
> >> case
> >> 
> >> In any case, this must be investigated and identified. And then we
> >> can
> >> take a decision about this state.
> > 
> > Well, everything you're saying is correct, assuming that this feature
> > is useful, which needs confirmation. I'd still want any evidence of
> > this feature being of any use first, to not waste time on something
> > that is useless.
> 
> IMHO, it is useful. That's sure a highly integrated hardware with a lot
> of peripherals, the power saving is lost in the noise but with a small
> embedded device, the power saving is significant.

Be aware that we might be talking here about savings of single 
milliamperes, because the highest, dynamic, power consumption is already 
bypassed in WFI idle mode, with stopped core clock.

Our measurements have shown that static leakage is pretty low on SoCs we 
checked, that is Exynos 4210 and Exynos 4412. I don't remember exactly 
ATM, because it was some time ago, but it was something around 2 mA per 
core.

Now it depends on overall power consumption of given system, whether such 
saving is significant or ignorable - in our case it is the latter, because 
consumption caused by rest of the system (think of display, wireless 
radios, etc.) is much higher.

> What I am worried about is the different SoCs being introduced in this
> driver without investigating this cpu1 restriction and it sounds like
> the AFTR seems to work (I have an odroid-u2 with a 4412 I will test to
> check if the AFTR works on it) and nobody seems to be hurt by this and,
> as you stated, there is no rational about it. That means the Exynos will
> become really more and more power aggressive with 4/A15 or 8/A15 based
> boards if the AFTR state is not correctly supported.

See my other reply, for a bit more information about this state and CPU1 
restriction.

> AFAICT, there is also the LPA state and the DEEPSLEEP, right ? If the
> AFTR does not work, I don't see these states working too.

Yes, there are other states as well. With similar execution flow, but 
different set of preserved and lost context, requiring more or less save 
and restore on kernel side.

> The cpuidle driver is critical for the new b.L architecture. If we
> aren't able to put a cluster of big cpus in a power down state, we lose
> the benefit of all the work made up-front at the scheduler level (eg.
> small task packing, workqueue/timer migration) and more generally we
> lose the benefit of the b.L architecture (except if we do some hacks
> which are orthogonal to the generic approach).

In this case just simple power down of CPU cores is enough, which doesn't 
require entering system-wide low power state, such as AFTR and friends. 
Such low power mode is needed only when you want to power off all the 
cores, which makes you lose program execution state.

When at least one of the cores is still active, the whole system is in 
control of kernel and no special care must be taken to bring up cores that 
are down and back, just standard SMP/hotplug ops.

Best regards,
Tomasz
Daniel Lezcano July 1, 2013, 9:09 a.m. UTC | #14
On 06/29/2013 12:47 AM, Tomasz Figa wrote:
> On Friday 28 of June 2013 23:47:49 Daniel Lezcano wrote:
>> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
>>> On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
>>>> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
>>>>> Hi Daniel,
>>>>>
>>>>> I've been fighting with this whole AFTR state as well, before
>>>>> Bartlomiej. Let me share my thoughts on this.
>>
>> [ ... ]
>>
>>>>> If you don't unplug all the CPUs >0 the state is obviously never
>>>>> reached. Otherwise the whole system hangs after it tries to enter
>>>>> this mode without any reaction for external events, other than
>>>>> reset.
>>>>
>>>> Need investigation.
>>>>
>>>> What is the exynos board version where that occurs ?
>>>
>>> Could you please tell me what exactly do you mean by that?
>>>
>>> I already wrote that we can reproduce the problem on EXYNOS4210 rev0
>>> and rev1.1 (we don't have rev1.0). Tomek has also reproduced the
>>> problem on some later SoCs (I hope that he can give you exact
>>> revisions).
>>>
>>> In our testing we didn't encounter the board on which the problem
>>> doesn't occur. Our current working theory is that the problem may be
>>> u-boot (or first stage bootloader) related.
>>
>> Ok, the status for what I know:
>>
>> Origen Exynos4210, board ver A: works for me
>> Arndale Exynos5250: works for me but only if u-boot does not enable the
>> hypervisor mode.
>> Chromebook Exynos5250: works for me
>>
>> I found the following drivers:
>>
>> https://github.com/AndreiLux/Perseus-UNIVERSAL5410/blob/samsung/arch/arm
>> /mach-exynos/cpuidle.c
>>
>> https://github.com/CyanogenMod/hardkernel-kernel-4412/blob/cm-10.1/arch/
>> arm/mach-exynos/cpuidle-exynos4.c
>>
>> Sounds like the num cpus > 1 is still there.
> 
> As far as I know (and I can read from the documentation), AFTR is a state 
> in which program execution context (e.g. PC, CPU registers, etc.) is lost. 
> This is like sleep mode, but with much more state saved (all the 
> peripherals and L2 cache), which lets you resume much faster.
> 
> Based on that, the execution flow is as following:
>  - kernel stops all secondary cores,
>  - kernel configures PMU to enter AFTR mode,
>  - WFI is executed on boot CPU, which lets PMU enter the requested low 
> power mode,
>  - the system stays with all CPU cores powered off until a wake-up event 
> happens,
>  - a wake-up event causes a wake-up reset of the boot CPU, which resumes 
> operation from reset vector (usually IROM),
>  - here exactly the same procedure is used as on wake-up from sleep mode, 
> with the exception that some assumptions can be made about kept state, 
> like BL1 being still in IRAM/SYSRAM, without the need to reload it from 
> boot device,
>  - after going through all the boot code, it finally jumps to the resume 
> handler saved by kernel in one of those INFORM registers or somewhere in 
> SYSRAM,
>  - resume handler reinitializes boot CPU, restores things like MMU, 
> coprocessors, etc. and finally boots secondary cores again,
>  - the system is operating normally again.
> 
> This is simplified a lot, with all things happened internally in PMU and 
> boot code skipped, but should give the general picture. See all the 
> analogies to normal sleep mode, which (IMHO) makes it understandable that 
> this mode can't be entered with more than one core active. 
> 
> However, with something like coupled cpuidle, when possible, this mode can 
> be entered by powering down remaining cores automatically, so this is not 
> a problem with this mode itself, but rather with current approach of 
> entering it.

Are you saying only CPU0 can program the PMU to enter the AFTR mode ?
And only CPU0 can be woken up ?

Is it possible to have CPU0 to enter the AFTR idle state (without
switching to this mode), save its context and go to WFI. And then when
CPU1 goes to this state also, it saves its context and set the PMU to
enter the AFTR mode ?
Daniel Lezcano July 1, 2013, 9:23 a.m. UTC | #15
On 06/29/2013 01:07 AM, Tomasz Figa wrote:
> On Saturday 29 of June 2013 00:27:04 Daniel Lezcano wrote:
>> On 06/28/2013 07:31 PM, Tomasz Figa wrote:
>>> On Friday 28 of June 2013 13:20:09 Daniel Lezcano wrote:
>> [ ... ]
>>
>>>> The kernel is not a playground where you can upstream code and then
>>>> remove it because a feature seems broken and you don't have an idea
>>>> of
>>>> why.
>>>
>>> Well, first of all, it has not been upstreamed correctly: a) without
>>> any given rationale (or at least without any I could find) and b)
>>> without enough testing.
>>
>> +1
>>
>>>> I asked several times the reasons of why the AFTR state couldn't work
>>>> with multiple CPUs and I had no answer.
>>>>
>>>> Frankly speaking I have a couple of hypothesis:
>>>>
>>>> 1. something is not correctly setup and the PMU does not wake up the
>>>> CPU1 2. there is a silicon bug and no one wants to tell it is the
>>>> case
>>>>
>>>> In any case, this must be investigated and identified. And then we
>>>> can
>>>> take a decision about this state.
>>>
>>> Well, everything you're saying is correct, assuming that this feature
>>> is useful, which needs confirmation. I'd still want any evidence of
>>> this feature being of any use first, to not waste time on something
>>> that is useless.
>>
>> IMHO, it is useful. That's sure a highly integrated hardware with a lot
>> of peripherals, the power saving is lost in the noise but with a small
>> embedded device, the power saving is significant.
> 
> Be aware that we might be talking here about savings of single 
> milliamperes, because the highest, dynamic, power consumption is already 
> bypassed in WFI idle mode, with stopped core clock.
> 
> Our measurements have shown that static leakage is pretty low on SoCs we 
> checked, that is Exynos 4210 and Exynos 4412. I don't remember exactly 
> ATM, because it was some time ago, but it was something around 2 mA per 
> core.

Ok I don't know exactly the values but I am a bit surprised by this
small value. When I measured the power consumption with a core offline
it saved roughly 100mA (AFAIR). Of course, there is no dynamic with the
hotplugging but there is really a big difference here.

With another board with similar hardware (but a different SoC), just
putting the cores in retention mode in the cpuidle drivers saved 13mA.

Is it possible for the exynos board, the target residency is too large
for AFTR mode ? Is this value really resulting from a measurement or
just put there arbitrarily ? And, may be, it does not enter enough in
this state to have a significant power saving.

> Now it depends on overall power consumption of given system, whether such 
> saving is significant or ignorable - in our case it is the latter, because 
> consumption caused by rest of the system (think of display, wireless 
> radios, etc.) is much higher.
> 
>> What I am worried about is the different SoCs being introduced in this
>> driver without investigating this cpu1 restriction and it sounds like
>> the AFTR seems to work (I have an odroid-u2 with a 4412 I will test to
>> check if the AFTR works on it) and nobody seems to be hurt by this and,
>> as you stated, there is no rational about it. That means the Exynos will
>> become really more and more power aggressive with 4/A15 or 8/A15 based
>> boards if the AFTR state is not correctly supported.
> 
> See my other reply, for a bit more information about this state and CPU1 
> restriction.
> 
>> AFAICT, there is also the LPA state and the DEEPSLEEP, right ? If the
>> AFTR does not work, I don't see these states working too.
> 
> Yes, there are other states as well. With similar execution flow, but 
> different set of preserved and lost context, requiring more or less save 
> and restore on kernel side.
> 
>> The cpuidle driver is critical for the new b.L architecture. If we
>> aren't able to put a cluster of big cpus in a power down state, we lose
>> the benefit of all the work made up-front at the scheduler level (eg.
>> small task packing, workqueue/timer migration) and more generally we
>> lose the benefit of the b.L architecture (except if we do some hacks
>> which are orthogonal to the generic approach).
> 
> In this case just simple power down of CPU cores is enough, which doesn't 
> require entering system-wide low power state, such as AFTR and friends. 
> Such low power mode is needed only when you want to power off all the 
> cores, which makes you lose program execution state.
> 
> When at least one of the cores is still active, the whole system is in 
> control of kernel and no special care must be taken to bring up cores that 
> are down and back, just standard SMP/hotplug ops.

Yes but unfortunately this approach has been nacked by the community
last year with the OMAP4.
Bartlomiej Zolnierkiewicz July 11, 2013, 1:14 p.m. UTC | #16
Hi,

On Friday, June 28, 2013 11:47:49 PM Daniel Lezcano wrote:
> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
> > On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
> >> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> >>> Hi Daniel,
> >>>
> >>> I've been fighting with this whole AFTR state as well, before Bartlomiej. 
> >>> Let me share my thoughts on this.
> >>>
> 
> [ ... ]
> 
> >>>
> >>> If you don't unplug all the CPUs >0 the state is obviously never reached. 
> >>> Otherwise the whole system hangs after it tries to enter this mode without 
> >>> any reaction for external events, other than reset.
> >>
> >> Need investigation.
> >>
> >> What is the exynos board version where that occurs ?
> > 
> > Could you please tell me what exactly do you mean by that?
> > 
> > I already wrote that we can reproduce the problem on EXYNOS4210 rev0
> > and rev1.1 (we don't have rev1.0). Tomek has also reproduced the problem
> > on some later SoCs (I hope that he can give you exact revisions).
> > 
> > In our testing we didn't encounter the board on which the problem
> > doesn't occur. Our current working theory is that the problem may be
> > u-boot (or first stage bootloader) related.
> 
> Ok, the status for what I know:
> 
> Origen Exynos4210, board ver A: works for me
> Arndale Exynos5250: works for me but only if u-boot does not enable the
> hypervisor mode.
> Chromebook Exynos5250: works for me

I've also done some more testing. First I tested on some Exynos4412 devices
(M0 and SLP_PQ) and AFTR was not working on them. Then I got my hands on
Origen Exynos4210 (thanks to Tomek Figa for providing it) and AFTR is working
just fine on it. Finally I tried Trats board again but with the upstream
u-boot instead of our custom modified version (thanks to help from Lukasz
Majewski) and I found out that after this change AFTR works fine on it! It
also gives quite nice power savings (~80mA less current drawn in AFTR mode
compared to just WFI one).

With the above findings it now seems that the issue is on our side and is
outside the kernel. Thanks for help with narrowing down the problem and
sorry for wasting your time.

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

> I found the following drivers:
> 
> https://github.com/AndreiLux/Perseus-UNIVERSAL5410/blob/samsung/arch/arm/mach-exynos/cpuidle.c
> 
> https://github.com/CyanogenMod/hardkernel-kernel-4412/blob/cm-10.1/arch/arm/mach-exynos/cpuidle-exynos4.c
> 
> Sounds like the num cpus > 1 is still there.
> 
> [ ... ]
> 
> >> The kernel is not a playground where you can upstream code and then
> >> remove it because a feature seems broken and you don't have an idea of why.
> > 
> > Neither me or Tomek did upstream this code and we couldn't react in
> > time because we haven't noticed that it is completely unusable for us
> > as EXYNOS cpuidle driver is not even used by default on EXYNOS (it is
> > not enabled either in defconfig or Kconfig).
> > 
> > Moreover the feature we are talking about (AFTR mode) is also not used
> > by default (except EXYNOS4210 rev0 on which it lockups system for us)
> > even with EXYNOS cpuidle driver being enabled (because this specific
> > feature depends on CPU hot-unplug which is not done automatically right
> > now).
> > 
> > Such things like unused/broken code removal is not something very
> > unusual in the upstream kernel (I'm speaking from the experience here
> > having maintained large subsystem for a couple of years). In this
> > particular case we are talking about ~130 lines of code which can
> > be trivially brought back later when/if needed.
> > 
> > Anyway if the code removal is controversial for you we can just disable
> > AFTR mode by default and enable it only when special command line option
> > is given (i.e. "aftr"). This would fix all the broken configuration
> > while still allowing the feature to be enabled on systems that had it
> > working previously (since you claim that it works on some chipset/u-boot
> > configurations).
> 
> Actually, there are several reasons I am not in favor for the moment to
> remove this code:
> 
> 1) code can't be pushed upstream and then removed so easily
> 2) I asked several times what was this cpu1 hack, I had no answer
> 3) I tried to make both cpus entering the AFTR state, but the cpu1 never
> wakes up, I asked but no answer.
> 
> I would like to have some answers :)
> 
> Before taking the decision to remove this state (btw you can remove the
> driver directly, no ? the default idle function is WFI), IMO it is worth
> to investigate and to spend some time to clarify what is happening. Then
> we can take a decision.
> 
> I am willing to help.
> 
> >> I asked several times the reasons of why the AFTR state couldn't work
> >> with multiple CPUs and I had no answer.
> > 
> > Unfortunately I don't know the answer for your question.
> > 
> > The AFTR mode doesn't work for us *at*all* (even with *one* CPU).
> > 
> >> Frankly speaking I have a couple of hypothesis:
> >>
> >> 1. something is not correctly setup and the PMU does not wake up the CPU1
> >> 2. there is a silicon bug and no one wants to tell it is the case
> >>
> >> In any case, this must be investigated and identified. And then we can
> >> take a decision about this state.
> > 
> > I don't have good idea currently how to investigate it further.
> > 
> > I also don't have any prove that the actual work is worth it
> > (and this work can easily take some weeks).
> > 
> > One of main responsibilities of the maintainer it to make sure that
> > the code does indeed work and that regressions (like these caused by
> > AFTR mode feature) are fixed in the timely manner, not let the code
> > sit in the limbo state for large periods of time.  It is already very
> > bad situation that the regression we are hitting was present since
> > v3.4 and we are in v3.10 now, I would like to have it fixed ASAP so
> > we may actually consider enabling cpuidle in our exynos_defconfig.
> 
> I agree.
> 
> Thanks
>   -- Daniel
Daniel Lezcano July 17, 2013, 12:57 p.m. UTC | #17
On 07/11/2013 03:14 PM, Bartlomiej Zolnierkiewicz wrote:
> 
> Hi,
> 
> On Friday, June 28, 2013 11:47:49 PM Daniel Lezcano wrote:
>> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
>>> On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
>>>> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
>>>>> Hi Daniel,
>>>>>
>>>>> I've been fighting with this whole AFTR state as well, before Bartlomiej. 
>>>>> Let me share my thoughts on this.
>>>>>
>>
>> [ ... ]
>>
>>>>>
>>>>> If you don't unplug all the CPUs >0 the state is obviously never reached. 
>>>>> Otherwise the whole system hangs after it tries to enter this mode without 
>>>>> any reaction for external events, other than reset.
>>>>
>>>> Need investigation.
>>>>
>>>> What is the exynos board version where that occurs ?
>>>
>>> Could you please tell me what exactly do you mean by that?
>>>
>>> I already wrote that we can reproduce the problem on EXYNOS4210 rev0
>>> and rev1.1 (we don't have rev1.0). Tomek has also reproduced the problem
>>> on some later SoCs (I hope that he can give you exact revisions).
>>>
>>> In our testing we didn't encounter the board on which the problem
>>> doesn't occur. Our current working theory is that the problem may be
>>> u-boot (or first stage bootloader) related.
>>
>> Ok, the status for what I know:
>>
>> Origen Exynos4210, board ver A: works for me
>> Arndale Exynos5250: works for me but only if u-boot does not enable the
>> hypervisor mode.
>> Chromebook Exynos5250: works for me
> 
> I've also done some more testing. First I tested on some Exynos4412 devices
> (M0 and SLP_PQ) and AFTR was not working on them. Then I got my hands on
> Origen Exynos4210 (thanks to Tomek Figa for providing it) and AFTR is working
> just fine on it. Finally I tried Trats board again but with the upstream
> u-boot instead of our custom modified version (thanks to help from Lukasz
> Majewski) and I found out that after this change AFTR works fine on it! It
> also gives quite nice power savings (~80mA less current drawn in AFTR mode
> compared to just WFI one).

Is the 80mA power saving comparing with:

 * (cpu0/cpu1 online) vs (cpu0 AFTR and cpu1 offline)
or
 * (cpu0 AFTR and cpu1 offline) vs (cpu0 WFI and cpu1 offline)

?

> With the above findings it now seems that the issue is on our side and is
> outside the kernel. Thanks for help with narrowing down the problem and
> sorry for wasting your time.

May be we were not working on the same tree.

I am on the linux-pm-next tree.

Now the merge window occurred, the AFTR is no longer working on my board.

After git bisecting:

commit 87107d89052bcec1fe91b309631de4ed294a5171
Author: Arnd Bergmann <arnd@arndb.de>
Date:   Wed Jun 19 01:36:52 2013 +0900

    ARM: EXYNOS: Remove legacy L2X0 initialization

    Since Exynos is now supporting only DT-based boot, the old L2X0
    initialization code is not needed anymore, so exynos4_l2x0_cache_init()
    can be greatly simplified.

    Signed-off-by: Arnd Bergmann <arnd@arndb.de>
    Signed-off-by: Tomasz Figa <t.figa@samsung.com>
    Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
    Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>

:040000 040000 79e1adfd6386256ba71b3f609592d5acf9c08222
9cb0026563b3b8657d906767493d26c501963269 M	arch

Reverting the patch solves the problem.

Any ideas ?

Thanks
  -- Daniel


> 
> Best regards,
> --
> Bartlomiej Zolnierkiewicz
> Samsung R&D Institute Poland
> Samsung Electronics
> 
>> I found the following drivers:
>>
>> https://github.com/AndreiLux/Perseus-UNIVERSAL5410/blob/samsung/arch/arm/mach-exynos/cpuidle.c
>>
>> https://github.com/CyanogenMod/hardkernel-kernel-4412/blob/cm-10.1/arch/arm/mach-exynos/cpuidle-exynos4.c
>>
>> Sounds like the num cpus > 1 is still there.
>>
>> [ ... ]
>>
>>>> The kernel is not a playground where you can upstream code and then
>>>> remove it because a feature seems broken and you don't have an idea of why.
>>>
>>> Neither me or Tomek did upstream this code and we couldn't react in
>>> time because we haven't noticed that it is completely unusable for us
>>> as EXYNOS cpuidle driver is not even used by default on EXYNOS (it is
>>> not enabled either in defconfig or Kconfig).
>>>
>>> Moreover the feature we are talking about (AFTR mode) is also not used
>>> by default (except EXYNOS4210 rev0 on which it lockups system for us)
>>> even with EXYNOS cpuidle driver being enabled (because this specific
>>> feature depends on CPU hot-unplug which is not done automatically right
>>> now).
>>>
>>> Such things like unused/broken code removal is not something very
>>> unusual in the upstream kernel (I'm speaking from the experience here
>>> having maintained large subsystem for a couple of years). In this
>>> particular case we are talking about ~130 lines of code which can
>>> be trivially brought back later when/if needed.
>>>
>>> Anyway if the code removal is controversial for you we can just disable
>>> AFTR mode by default and enable it only when special command line option
>>> is given (i.e. "aftr"). This would fix all the broken configuration
>>> while still allowing the feature to be enabled on systems that had it
>>> working previously (since you claim that it works on some chipset/u-boot
>>> configurations).
>>
>> Actually, there are several reasons I am not in favor for the moment to
>> remove this code:
>>
>> 1) code can't be pushed upstream and then removed so easily
>> 2) I asked several times what was this cpu1 hack, I had no answer
>> 3) I tried to make both cpus entering the AFTR state, but the cpu1 never
>> wakes up, I asked but no answer.
>>
>> I would like to have some answers :)
>>
>> Before taking the decision to remove this state (btw you can remove the
>> driver directly, no ? the default idle function is WFI), IMO it is worth
>> to investigate and to spend some time to clarify what is happening. Then
>> we can take a decision.
>>
>> I am willing to help.
>>
>>>> I asked several times the reasons of why the AFTR state couldn't work
>>>> with multiple CPUs and I had no answer.
>>>
>>> Unfortunately I don't know the answer for your question.
>>>
>>> The AFTR mode doesn't work for us *at*all* (even with *one* CPU).
>>>
>>>> Frankly speaking I have a couple of hypothesis:
>>>>
>>>> 1. something is not correctly setup and the PMU does not wake up the CPU1
>>>> 2. there is a silicon bug and no one wants to tell it is the case
>>>>
>>>> In any case, this must be investigated and identified. And then we can
>>>> take a decision about this state.
>>>
>>> I don't have good idea currently how to investigate it further.
>>>
>>> I also don't have any prove that the actual work is worth it
>>> (and this work can easily take some weeks).
>>>
>>> One of main responsibilities of the maintainer it to make sure that
>>> the code does indeed work and that regressions (like these caused by
>>> AFTR mode feature) are fixed in the timely manner, not let the code
>>> sit in the limbo state for large periods of time.  It is already very
>>> bad situation that the regression we are hitting was present since
>>> v3.4 and we are in v3.10 now, I would like to have it fixed ASAP so
>>> we may actually consider enabling cpuidle in our exynos_defconfig.
>>
>> I agree.
>>
>> Thanks
>>   -- Daniel
>
Lorenzo Pieralisi July 17, 2013, 1:31 p.m. UTC | #18
On Wed, Jul 17, 2013 at 01:57:30PM +0100, Daniel Lezcano wrote:
> On 07/11/2013 03:14 PM, Bartlomiej Zolnierkiewicz wrote:
> > 
> > Hi,
> > 
> > On Friday, June 28, 2013 11:47:49 PM Daniel Lezcano wrote:
> >> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
> >>> On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
> >>>> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> >>>>> Hi Daniel,
> >>>>>
> >>>>> I've been fighting with this whole AFTR state as well, before Bartlomiej. 
> >>>>> Let me share my thoughts on this.
> >>>>>
> >>
> >> [ ... ]
> >>
> >>>>>
> >>>>> If you don't unplug all the CPUs >0 the state is obviously never reached. 
> >>>>> Otherwise the whole system hangs after it tries to enter this mode without 
> >>>>> any reaction for external events, other than reset.
> >>>>
> >>>> Need investigation.
> >>>>
> >>>> What is the exynos board version where that occurs ?
> >>>
> >>> Could you please tell me what exactly do you mean by that?
> >>>
> >>> I already wrote that we can reproduce the problem on EXYNOS4210 rev0
> >>> and rev1.1 (we don't have rev1.0). Tomek has also reproduced the problem
> >>> on some later SoCs (I hope that he can give you exact revisions).
> >>>
> >>> In our testing we didn't encounter the board on which the problem
> >>> doesn't occur. Our current working theory is that the problem may be
> >>> u-boot (or first stage bootloader) related.
> >>
> >> Ok, the status for what I know:
> >>
> >> Origen Exynos4210, board ver A: works for me
> >> Arndale Exynos5250: works for me but only if u-boot does not enable the
> >> hypervisor mode.
> >> Chromebook Exynos5250: works for me
> > 
> > I've also done some more testing. First I tested on some Exynos4412 devices
> > (M0 and SLP_PQ) and AFTR was not working on them. Then I got my hands on
> > Origen Exynos4210 (thanks to Tomek Figa for providing it) and AFTR is working
> > just fine on it. Finally I tried Trats board again but with the upstream
> > u-boot instead of our custom modified version (thanks to help from Lukasz
> > Majewski) and I found out that after this change AFTR works fine on it! It
> > also gives quite nice power savings (~80mA less current drawn in AFTR mode
> > compared to just WFI one).
> 
> Is the 80mA power saving comparing with:
> 
>  * (cpu0/cpu1 online) vs (cpu0 AFTR and cpu1 offline)
> or
>  * (cpu0 AFTR and cpu1 offline) vs (cpu0 WFI and cpu1 offline)
> 
> ?
> 
> > With the above findings it now seems that the issue is on our side and is
> > outside the kernel. Thanks for help with narrowing down the problem and
> > sorry for wasting your time.
> 
> May be we were not working on the same tree.
> 
> I am on the linux-pm-next tree.
> 
> Now the merge window occurred, the AFTR is no longer working on my board.
> 
> After git bisecting:
> 
> commit 87107d89052bcec1fe91b309631de4ed294a5171
> Author: Arnd Bergmann <arnd@arndb.de>
> Date:   Wed Jun 19 01:36:52 2013 +0900
> 
>     ARM: EXYNOS: Remove legacy L2X0 initialization
> 
>     Since Exynos is now supporting only DT-based boot, the old L2X0
>     initialization code is not needed anymore, so exynos4_l2x0_cache_init()
>     can be greatly simplified.
> 
>     Signed-off-by: Arnd Bergmann <arnd@arndb.de>
>     Signed-off-by: Tomasz Figa <t.figa@samsung.com>
>     Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
>     Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
> 
> :040000 040000 79e1adfd6386256ba71b3f609592d5acf9c08222
> 9cb0026563b3b8657d906767493d26c501963269 M	arch
> 
> Reverting the patch solves the problem.
> 
> Any ideas ?

It is certainly a problem related to restoring L2 control registers in
plat-samsung/s5p-sleep.S, probably DT init misses some registers
(prefetch and power control) that are restored to zero values most
likely, or the cluster cannot be shut down owing to those values not
being programmed properly. Chander posted a patch to fix that but I could
not follow that thread, I have no idea where he got to and if that fixes
the issue, I think so.

Lorenzo
Tomasz Figa July 17, 2013, 2:07 p.m. UTC | #19
Hi,

On Wednesday 17 of July 2013 14:31:23 Lorenzo Pieralisi wrote:
> On Wed, Jul 17, 2013 at 01:57:30PM +0100, Daniel Lezcano wrote:
> > On 07/11/2013 03:14 PM, Bartlomiej Zolnierkiewicz wrote:
> > > Hi,
> > > 
> > > On Friday, June 28, 2013 11:47:49 PM Daniel Lezcano wrote:
> > >> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
> > >>> On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
> > >>>> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> > >>>>> Hi Daniel,
> > >>>>> 
> > >>>>> I've been fighting with this whole AFTR state as well, before
> > >>>>> Bartlomiej. Let me share my thoughts on this.
> > >> 
> > >> [ ... ]
> > >> 
> > >>>>> If you don't unplug all the CPUs >0 the state is obviously never
> > >>>>> reached. Otherwise the whole system hangs after it tries to
> > >>>>> enter this mode without any reaction for external events, other
> > >>>>> than reset.
> > >>>> 
> > >>>> Need investigation.
> > >>>> 
> > >>>> What is the exynos board version where that occurs ?
> > >>> 
> > >>> Could you please tell me what exactly do you mean by that?
> > >>> 
> > >>> I already wrote that we can reproduce the problem on EXYNOS4210
> > >>> rev0
> > >>> and rev1.1 (we don't have rev1.0). Tomek has also reproduced the
> > >>> problem
> > >>> on some later SoCs (I hope that he can give you exact revisions).
> > >>> 
> > >>> In our testing we didn't encounter the board on which the problem
> > >>> doesn't occur. Our current working theory is that the problem may
> > >>> be
> > >>> u-boot (or first stage bootloader) related.
> > >> 
> > >> Ok, the status for what I know:
> > >> 
> > >> Origen Exynos4210, board ver A: works for me
> > >> Arndale Exynos5250: works for me but only if u-boot does not enable
> > >> the
> > >> hypervisor mode.
> > >> Chromebook Exynos5250: works for me
> > > 
> > > I've also done some more testing. First I tested on some Exynos4412
> > > devices (M0 and SLP_PQ) and AFTR was not working on them. Then I got
> > > my hands on Origen Exynos4210 (thanks to Tomek Figa for providing
> > > it) and AFTR is working just fine on it. Finally I tried Trats board
> > > again but with the upstream u-boot instead of our custom modified
> > > version (thanks to help from Lukasz Majewski) and I found out that
> > > after this change AFTR works fine on it! It also gives quite nice
> > > power savings (~80mA less current drawn in AFTR mode compared to
> > > just WFI one).
> > 
> > Is the 80mA power saving comparing with:
> >  * (cpu0/cpu1 online) vs (cpu0 AFTR and cpu1 offline)
> > 
> > or
> > 
> >  * (cpu0 AFTR and cpu1 offline) vs (cpu0 WFI and cpu1 offline)
> > 
> > ?
> > 
> > > With the above findings it now seems that the issue is on our side
> > > and is outside the kernel. Thanks for help with narrowing down the
> > > problem and sorry for wasting your time.
> > 
> > May be we were not working on the same tree.
> > 
> > I am on the linux-pm-next tree.
> > 
> > Now the merge window occurred, the AFTR is no longer working on my
> > board.
> > 
> > After git bisecting:
> > 
> > commit 87107d89052bcec1fe91b309631de4ed294a5171
> > Author: Arnd Bergmann <arnd@arndb.de>
> > Date:   Wed Jun 19 01:36:52 2013 +0900
> > 
> >     ARM: EXYNOS: Remove legacy L2X0 initialization
> >     
> >     Since Exynos is now supporting only DT-based boot, the old L2X0
> >     initialization code is not needed anymore, so
> >     exynos4_l2x0_cache_init()
> >     can be greatly simplified.
> >     
> >     Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> >     Signed-off-by: Tomasz Figa <t.figa@samsung.com>
> >     Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
> >     Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
> > :
> > :040000 040000 79e1adfd6386256ba71b3f609592d5acf9c08222
> > 
> > 9cb0026563b3b8657d906767493d26c501963269 M	arch
> > 
> > Reverting the patch solves the problem.
> > 
> > Any ideas ?
> 
> It is certainly a problem related to restoring L2 control registers in
> plat-samsung/s5p-sleep.S, probably DT init misses some registers
> (prefetch and power control) that are restored to zero values most
> likely, or the cluster cannot be shut down owing to those values not
> being programmed properly.

Looks like it.

> Chander posted a patch to fix that but I could
> not follow that thread, I have no idea where he got to and if that fixes
> the issue, I think so.

I will be doing a lot of cleanup of PM code for Samsung platforms starting 
from next week. Mostly low level suspend/resume handling (Samsung PM core), 
but since AFTR mode has much in common with sleep mode (e.g. both go 
through s5p-sleep.S after waking up), so I can look at this issue too, on 
our internal boards and Origen.

As for L2 suspend/resume on Samsung platforms, it has to be reworked 
completely, because on newer boards which have secure firmware it can't be 
resumed normally, because this requires calling several firmware operations. 
I already have patches for this, but need to do more testing, including 
checking how this interferes with things like AFTR mode.

Best regards,
Tomasz
Bartlomiej Zolnierkiewicz July 17, 2013, 2:36 p.m. UTC | #20
On Wednesday, July 17, 2013 02:57:30 PM Daniel Lezcano wrote:
> On 07/11/2013 03:14 PM, Bartlomiej Zolnierkiewicz wrote:
> > 
> > Hi,
> > 
> > On Friday, June 28, 2013 11:47:49 PM Daniel Lezcano wrote:
> >> On 06/28/2013 06:27 PM, Bartlomiej Zolnierkiewicz wrote:
> >>> On Friday, June 28, 2013 01:20:09 PM Daniel Lezcano wrote:
> >>>> On 06/28/2013 12:11 PM, Tomasz Figa wrote:
> >>>>> Hi Daniel,
> >>>>>
> >>>>> I've been fighting with this whole AFTR state as well, before Bartlomiej. 
> >>>>> Let me share my thoughts on this.
> >>>>>
> >>
> >> [ ... ]
> >>
> >>>>>
> >>>>> If you don't unplug all the CPUs >0 the state is obviously never reached. 
> >>>>> Otherwise the whole system hangs after it tries to enter this mode without 
> >>>>> any reaction for external events, other than reset.
> >>>>
> >>>> Need investigation.
> >>>>
> >>>> What is the exynos board version where that occurs ?
> >>>
> >>> Could you please tell me what exactly do you mean by that?
> >>>
> >>> I already wrote that we can reproduce the problem on EXYNOS4210 rev0
> >>> and rev1.1 (we don't have rev1.0). Tomek has also reproduced the problem
> >>> on some later SoCs (I hope that he can give you exact revisions).
> >>>
> >>> In our testing we didn't encounter the board on which the problem
> >>> doesn't occur. Our current working theory is that the problem may be
> >>> u-boot (or first stage bootloader) related.
> >>
> >> Ok, the status for what I know:
> >>
> >> Origen Exynos4210, board ver A: works for me
> >> Arndale Exynos5250: works for me but only if u-boot does not enable the
> >> hypervisor mode.
> >> Chromebook Exynos5250: works for me
> > 
> > I've also done some more testing. First I tested on some Exynos4412 devices
> > (M0 and SLP_PQ) and AFTR was not working on them. Then I got my hands on
> > Origen Exynos4210 (thanks to Tomek Figa for providing it) and AFTR is working
> > just fine on it. Finally I tried Trats board again but with the upstream
> > u-boot instead of our custom modified version (thanks to help from Lukasz
> > Majewski) and I found out that after this change AFTR works fine on it! It
> > also gives quite nice power savings (~80mA less current drawn in AFTR mode
> > compared to just WFI one).
> 
> Is the 80mA power saving comparing with:
> 
>  * (cpu0/cpu1 online) vs (cpu0 AFTR and cpu1 offline)
> or
>  * (cpu0 AFTR and cpu1 offline) vs (cpu0 WFI and cpu1 offline)
> ?

The latter one and the following scenario:

 * (cpu0/cpu1 online) vs (cpu0 WFI and cpu1 offline)

gives only ~10mA saving. Maybe something is not working as it should be in
the hotplug support and the code is not doing as much as it could but these
are the actual results (using vanilla v3.10).

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics

> > With the above findings it now seems that the issue is on our side and is
> > outside the kernel. Thanks for help with narrowing down the problem and
> > sorry for wasting your time.
> 
> May be we were not working on the same tree.
> 
> I am on the linux-pm-next tree.
> 
> Now the merge window occurred, the AFTR is no longer working on my board.
> 
> After git bisecting:
> 
> commit 87107d89052bcec1fe91b309631de4ed294a5171
> Author: Arnd Bergmann <arnd@arndb.de>
> Date:   Wed Jun 19 01:36:52 2013 +0900
> 
>     ARM: EXYNOS: Remove legacy L2X0 initialization
> 
>     Since Exynos is now supporting only DT-based boot, the old L2X0
>     initialization code is not needed anymore, so exynos4_l2x0_cache_init()
>     can be greatly simplified.
> 
>     Signed-off-by: Arnd Bergmann <arnd@arndb.de>
>     Signed-off-by: Tomasz Figa <t.figa@samsung.com>
>     Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
>     Signed-off-by: Kukjin Kim <kgene.kim@samsung.com>
> 
> :040000 040000 79e1adfd6386256ba71b3f609592d5acf9c08222
> 9cb0026563b3b8657d906767493d26c501963269 M	arch
> 
> Reverting the patch solves the problem.
> 
> Any ideas ?
> 
> Thanks
>   -- Daniel
> 
> 
> > 
> > Best regards,
> > --
> > Bartlomiej Zolnierkiewicz
> > Samsung R&D Institute Poland
> > Samsung Electronics
> > 
> >> I found the following drivers:
> >>
> >> https://github.com/AndreiLux/Perseus-UNIVERSAL5410/blob/samsung/arch/arm/mach-exynos/cpuidle.c
> >>
> >> https://github.com/CyanogenMod/hardkernel-kernel-4412/blob/cm-10.1/arch/arm/mach-exynos/cpuidle-exynos4.c
> >>
> >> Sounds like the num cpus > 1 is still there.
> >>
> >> [ ... ]
> >>
> >>>> The kernel is not a playground where you can upstream code and then
> >>>> remove it because a feature seems broken and you don't have an idea of why.
> >>>
> >>> Neither me or Tomek did upstream this code and we couldn't react in
> >>> time because we haven't noticed that it is completely unusable for us
> >>> as EXYNOS cpuidle driver is not even used by default on EXYNOS (it is
> >>> not enabled either in defconfig or Kconfig).
> >>>
> >>> Moreover the feature we are talking about (AFTR mode) is also not used
> >>> by default (except EXYNOS4210 rev0 on which it lockups system for us)
> >>> even with EXYNOS cpuidle driver being enabled (because this specific
> >>> feature depends on CPU hot-unplug which is not done automatically right
> >>> now).
> >>>
> >>> Such things like unused/broken code removal is not something very
> >>> unusual in the upstream kernel (I'm speaking from the experience here
> >>> having maintained large subsystem for a couple of years). In this
> >>> particular case we are talking about ~130 lines of code which can
> >>> be trivially brought back later when/if needed.
> >>>
> >>> Anyway if the code removal is controversial for you we can just disable
> >>> AFTR mode by default and enable it only when special command line option
> >>> is given (i.e. "aftr"). This would fix all the broken configuration
> >>> while still allowing the feature to be enabled on systems that had it
> >>> working previously (since you claim that it works on some chipset/u-boot
> >>> configurations).
> >>
> >> Actually, there are several reasons I am not in favor for the moment to
> >> remove this code:
> >>
> >> 1) code can't be pushed upstream and then removed so easily
> >> 2) I asked several times what was this cpu1 hack, I had no answer
> >> 3) I tried to make both cpus entering the AFTR state, but the cpu1 never
> >> wakes up, I asked but no answer.
> >>
> >> I would like to have some answers :)
> >>
> >> Before taking the decision to remove this state (btw you can remove the
> >> driver directly, no ? the default idle function is WFI), IMO it is worth
> >> to investigate and to spend some time to clarify what is happening. Then
> >> we can take a decision.
> >>
> >> I am willing to help.
> >>
> >>>> I asked several times the reasons of why the AFTR state couldn't work
> >>>> with multiple CPUs and I had no answer.
> >>>
> >>> Unfortunately I don't know the answer for your question.
> >>>
> >>> The AFTR mode doesn't work for us *at*all* (even with *one* CPU).
> >>>
> >>>> Frankly speaking I have a couple of hypothesis:
> >>>>
> >>>> 1. something is not correctly setup and the PMU does not wake up the CPU1
> >>>> 2. there is a silicon bug and no one wants to tell it is the case
> >>>>
> >>>> In any case, this must be investigated and identified. And then we can
> >>>> take a decision about this state.
> >>>
> >>> I don't have good idea currently how to investigate it further.
> >>>
> >>> I also don't have any prove that the actual work is worth it
> >>> (and this work can easily take some weeks).
> >>>
> >>> One of main responsibilities of the maintainer it to make sure that
> >>> the code does indeed work and that regressions (like these caused by
> >>> AFTR mode feature) are fixed in the timely manner, not let the code
> >>> sit in the limbo state for large periods of time.  It is already very
> >>> bad situation that the regression we are hitting was present since
> >>> v3.4 and we are in v3.10 now, I would like to have it fixed ASAP so
> >>> we may actually consider enabling cpuidle in our exynos_defconfig.
> >>
> >> I agree.
> >>
> >> Thanks
> >>   -- Daniel
diff mbox

Patch

diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
index 17a18ff..0a657ac 100644
--- a/arch/arm/mach-exynos/cpuidle.c
+++ b/arch/arm/mach-exynos/cpuidle.c
@@ -17,30 +17,11 @@ 
 #include <linux/time.h>
 
 #include <asm/proc-fns.h>
-#include <asm/smp_scu.h>
-#include <asm/suspend.h>
-#include <asm/unified.h>
 #include <asm/cpuidle.h>
 #include <mach/regs-clock.h>
-#include <mach/regs-pmu.h>
-
-#include <plat/cpu.h>
 
 #include "common.h"
 
-#define REG_DIRECTGO_ADDR	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
-			S5P_INFORM7 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
-			(S5P_VA_SYSRAM + 0x24) : S5P_INFORM0))
-#define REG_DIRECTGO_FLAG	(samsung_rev() == EXYNOS4210_REV_1_1 ? \
-			S5P_INFORM6 : (samsung_rev() == EXYNOS4210_REV_1_0 ? \
-			(S5P_VA_SYSRAM + 0x20) : S5P_INFORM1))
-
-#define S5P_CHECK_AFTR		0xFCBA0D10
-
-static int exynos4_enter_lowpower(struct cpuidle_device *dev,
-				struct cpuidle_driver *drv,
-				int index);
-
 static DEFINE_PER_CPU(struct cpuidle_device, exynos4_cpuidle_device);
 
 static struct cpuidle_driver exynos4_idle_driver = {
@@ -48,117 +29,11 @@  static struct cpuidle_driver exynos4_idle_driver = {
 	.owner			= THIS_MODULE,
 	.states = {
 		[0] = ARM_CPUIDLE_WFI_STATE,
-		[1] = {
-			.enter			= exynos4_enter_lowpower,
-			.exit_latency		= 300,
-			.target_residency	= 100000,
-			.flags			= CPUIDLE_FLAG_TIME_VALID,
-			.name			= "C1",
-			.desc			= "ARM power down",
-		},
 	},
-	.state_count = 2,
+	.state_count = 1,
 	.safe_state_index = 0,
 };
 
-/* Ext-GIC nIRQ/nFIQ is the only wakeup source in AFTR */
-static void exynos4_set_wakeupmask(void)
-{
-	__raw_writel(0x0000ff3e, S5P_WAKEUP_MASK);
-}
-
-static unsigned int g_pwr_ctrl, g_diag_reg;
-
-static void save_cpu_arch_register(void)
-{
-	/*read power control register*/
-	asm("mrc p15, 0, %0, c15, c0, 0" : "=r"(g_pwr_ctrl) : : "cc");
-	/*read diagnostic register*/
-	asm("mrc p15, 0, %0, c15, c0, 1" : "=r"(g_diag_reg) : : "cc");
-	return;
-}
-
-static void restore_cpu_arch_register(void)
-{
-	/*write power control register*/
-	asm("mcr p15, 0, %0, c15, c0, 0" : : "r"(g_pwr_ctrl) : "cc");
-	/*write diagnostic register*/
-	asm("mcr p15, 0, %0, c15, c0, 1" : : "r"(g_diag_reg) : "cc");
-	return;
-}
-
-static int idle_finisher(unsigned long flags)
-{
-	cpu_do_idle();
-	return 1;
-}
-
-static int exynos4_enter_core0_aftr(struct cpuidle_device *dev,
-				struct cpuidle_driver *drv,
-				int index)
-{
-	unsigned long tmp;
-
-	exynos4_set_wakeupmask();
-
-	/* Set value of power down register for aftr mode */
-	exynos_sys_powerdown_conf(SYS_AFTR);
-
-	__raw_writel(virt_to_phys(s3c_cpu_resume), REG_DIRECTGO_ADDR);
-	__raw_writel(S5P_CHECK_AFTR, REG_DIRECTGO_FLAG);
-
-	save_cpu_arch_register();
-
-	/* Setting Central Sequence Register for power down mode */
-	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
-	tmp &= ~S5P_CENTRAL_LOWPWR_CFG;
-	__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
-
-	cpu_pm_enter();
-	cpu_suspend(0, idle_finisher);
-
-#ifdef CONFIG_SMP
-	if (!soc_is_exynos5250())
-		scu_enable(S5P_VA_SCU);
-#endif
-	cpu_pm_exit();
-
-	restore_cpu_arch_register();
-
-	/*
-	 * If PMU failed while entering sleep mode, WFI will be
-	 * ignored by PMU and then exiting cpu_do_idle().
-	 * S5P_CENTRAL_LOWPWR_CFG bit will not be set automatically
-	 * in this situation.
-	 */
-	tmp = __raw_readl(S5P_CENTRAL_SEQ_CONFIGURATION);
-	if (!(tmp & S5P_CENTRAL_LOWPWR_CFG)) {
-		tmp |= S5P_CENTRAL_LOWPWR_CFG;
-		__raw_writel(tmp, S5P_CENTRAL_SEQ_CONFIGURATION);
-	}
-
-	/* Clear wakeup state register */
-	__raw_writel(0x0, S5P_WAKEUP_STAT);
-
-	return index;
-}
-
-static int exynos4_enter_lowpower(struct cpuidle_device *dev,
-				struct cpuidle_driver *drv,
-				int index)
-{
-	int new_index = index;
-
-	/* This mode only can be entered when other core's are offline */
-	if (num_online_cpus() > 1)
-		new_index = drv->safe_state_index;
-
-	if (new_index == 0)
-		return arm_cpuidle_simple_enter(dev, drv, new_index);
-	else
-		return exynos4_enter_core0_aftr(dev, drv, new_index);
-}
-
 static void __init exynos5_core_down_clk(void)
 {
 	unsigned int tmp;
@@ -209,10 +84,6 @@  static int __init exynos4_init_cpuidle(void)
 		device = &per_cpu(exynos4_cpuidle_device, cpu_id);
 		device->cpu = cpu_id;
 
-		/* Support IDLE only */
-		if (cpu_id != 0)
-			device->state_count = 1;
-
 		ret = cpuidle_register_device(device);
 		if (ret) {
 			printk(KERN_ERR "CPUidle register device failed\n");