diff mbox

[V2,2/6] ARM: tegra20: cpuidle: add powered-down state for secondary CPU

Message ID 1354701715-24150-3-git-send-email-josephl@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Joseph Lo Dec. 5, 2012, 10:01 a.m. UTC
The powered-down state of Tegra20 requires power gating both CPU cores.
When the secondary CPU requests to enter powered-down state, it saves
its own contexts and then enters WFI. The Tegra20 had a limition to
power down both CPU cores. The secondary CPU must waits for CPU0 in
powered-down state too. If the secondary CPU be woken up before CPU0
entering powered-down state, then it needs to restore its CPU states
and waits for next chance.

Be aware of that, you may see the legacy power state "LP2" in the code
which is exactly the same meaning of "CPU power down".

Based on the work by:
Colin Cross <ccross@android.com>
Gary King <gking@nvidia.com>

Signed-off-by: Joseph Lo <josephl@nvidia.com>
---
V2:
* no change
---
 arch/arm/mach-tegra/cpuidle-tegra20.c |   84 +++++++++++++++++++
 arch/arm/mach-tegra/pm.c              |    2 +
 arch/arm/mach-tegra/sleep-tegra20.S   |  145 +++++++++++++++++++++++++++++++++
 arch/arm/mach-tegra/sleep.h           |   23 +++++
 4 files changed, 254 insertions(+), 0 deletions(-)

Comments

Lorenzo Pieralisi Dec. 5, 2012, 10:50 a.m. UTC | #1
On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> The powered-down state of Tegra20 requires power gating both CPU cores.
> When the secondary CPU requests to enter powered-down state, it saves
> its own contexts and then enters WFI. The Tegra20 had a limition to
> power down both CPU cores. The secondary CPU must waits for CPU0 in
> powered-down state too. If the secondary CPU be woken up before CPU0
> entering powered-down state, then it needs to restore its CPU states
> and waits for next chance.
> 
> Be aware of that, you may see the legacy power state "LP2" in the code
> which is exactly the same meaning of "CPU power down".
> 
> Based on the work by:
> Colin Cross <ccross@android.com>
> Gary King <gking@nvidia.com>
> 
> Signed-off-by: Joseph Lo <josephl@nvidia.com>
> ---
> V2:
> * no change
> ---
>  arch/arm/mach-tegra/cpuidle-tegra20.c |   84 +++++++++++++++++++
>  arch/arm/mach-tegra/pm.c              |    2 +
>  arch/arm/mach-tegra/sleep-tegra20.S   |  145 +++++++++++++++++++++++++++++++++
>  arch/arm/mach-tegra/sleep.h           |   23 +++++
>  4 files changed, 254 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
> index d32e8b0..9d59a46 100644
> --- a/arch/arm/mach-tegra/cpuidle-tegra20.c
> +++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
> @@ -22,21 +22,105 @@
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/cpuidle.h>
> +#include <linux/cpu_pm.h>
> +#include <linux/clockchips.h>
> 
>  #include <asm/cpuidle.h>
> +#include <asm/proc-fns.h>
> +#include <asm/suspend.h>
> +#include <asm/smp_plat.h>
> +
> +#include "pm.h"
> +#include "sleep.h"
> +
> +#ifdef CONFIG_PM_SLEEP
> +static int tegra20_idle_lp2(struct cpuidle_device *dev,
> +                           struct cpuidle_driver *drv,
> +                           int index);
> +#endif
> 
>  static struct cpuidle_driver tegra_idle_driver = {
>         .name = "tegra_idle",
>         .owner = THIS_MODULE,
>         .en_core_tk_irqen = 1,
> +#ifdef CONFIG_PM_SLEEP
> +       .state_count = 2,
> +#else
>         .state_count = 1,
> +#endif

These hardcoded values are not nice.

>         .states = {
>                 [0] = ARM_CPUIDLE_WFI_STATE_PWR(600),
> +#ifdef CONFIG_PM_SLEEP
> +               [1] = {
> +                       .enter                  = tegra20_idle_lp2,
> +                       .exit_latency           = 5000,
> +                       .target_residency       = 10000,
> +                       .power_usage            = 0,
> +                       .flags                  = CPUIDLE_FLAG_TIME_VALID,
> +                       .name                   = "powered-down",
> +                       .desc                   = "CPU power gated",
> +               },
> +#endif
>         },
>  };
> 
>  static DEFINE_PER_CPU(struct cpuidle_device, tegra_idle_device);
> 
> +#ifdef CONFIG_PM_SLEEP
> +#ifdef CONFIG_SMP
> +static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
> +                                        struct cpuidle_driver *drv,
> +                                        int index)
> +{
> +       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
> +
> +       smp_wmb();

Can you explain to me the need for this barrier ?

> +
> +       cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
> +
> +       tegra20_cpu_clear_resettable();
> +
> +       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
> +
> +       return true;
> +}
> +#else
> +static inline bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
> +                                               struct cpuidle_driver *drv,
> +                                               int index)
> +{
> +       return true;
> +}
> +#endif
> +
> +static int __cpuinit tegra20_idle_lp2(struct cpuidle_device *dev,
> +                                     struct cpuidle_driver *drv,
> +                                     int index)
> +{
> +       u32 cpu = is_smp() ? cpu_logical_map(dev->cpu) : dev->cpu;
> +       bool entered_lp2 = false;
> +
> +       local_fiq_disable();
> +
> +       tegra_set_cpu_in_lp2(cpu);
> +       cpu_pm_enter();
> +
> +       if (cpu == 0)
> +               cpu_do_idle();
> +       else
> +               entered_lp2 = tegra20_idle_enter_lp2_cpu_1(dev, drv, index);
> +
> +       cpu_pm_exit();
> +       tegra_clear_cpu_in_lp2(cpu);
> +
> +       local_fiq_enable();
> +
> +       smp_rmb();
> +
> +       return (entered_lp2) ? index : 0;
> +}
> +#endif
> +
>  int __init tegra20_cpuidle_init(void)
>  {
>         int ret;
> diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
> index 1b11707..db72ea9 100644
> --- a/arch/arm/mach-tegra/pm.c
> +++ b/arch/arm/mach-tegra/pm.c
> @@ -173,6 +173,8 @@ bool __cpuinit tegra_set_cpu_in_lp2(int phy_cpu_id)
> 
>         if ((phy_cpu_id == 0) && cpumask_equal(cpu_lp2_mask, cpu_online_mask))
>                 last_cpu = true;
> +       else if (phy_cpu_id == 1)
> +               tegra20_cpu_set_resettable_soon();
> 
>         spin_unlock(&tegra_lp2_lock);
>         return last_cpu;
> diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
> index 72ce709..dfb2be5 100644
> --- a/arch/arm/mach-tegra/sleep-tegra20.S
> +++ b/arch/arm/mach-tegra/sleep-tegra20.S
> @@ -21,6 +21,8 @@
>  #include <linux/linkage.h>
> 
>  #include <asm/assembler.h>
> +#include <asm/proc-fns.h>
> +#include <asm/cp15.h>
> 
>  #include "sleep.h"
>  #include "flowctrl.h"
> @@ -78,3 +80,146 @@ ENTRY(tegra20_cpu_shutdown)
>         mov     pc, lr
>  ENDPROC(tegra20_cpu_shutdown)
>  #endif
> +
> +#ifdef CONFIG_PM_SLEEP
> +/*
> + * tegra_pen_lock
> + *
> + * spinlock implementation with no atomic test-and-set and no coherence
> + * using Peterson's algorithm on strongly-ordered registers
> + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> + *
> + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> + * on cpu 0:
> + * SCRATCH38 = r2 = flag[0]
> + * SCRATCH39 = r3 = flag[1]
> + * on cpu1:
> + * SCRATCH39 = r2 = flag[1]
> + * SCRATCH38 = r3 = flag[0]
> + *
> + * must be called with MMU on
> + * corrupts r0-r3, r12
> + */
> +ENTRY(tegra_pen_lock)
> +       mov32   r3, TEGRA_PMC_VIRT
> +       cpu_id  r0
> +       add     r1, r3, #PMC_SCRATCH37
> +       cmp     r0, #0
> +       addeq   r2, r3, #PMC_SCRATCH38
> +       addeq   r3, r3, #PMC_SCRATCH39
> +       addne   r2, r3, #PMC_SCRATCH39
> +       addne   r3, r3, #PMC_SCRATCH38
> +
> +       mov     r12, #1
> +       str     r12, [r2]               @ flag[cpu] = 1
> +       dsb
> +       str     r12, [r1]               @ !turn = cpu
> +1:     dsb
> +       ldr     r12, [r3]
> +       cmp     r12, #1                 @ flag[!cpu] == 1?
> +       ldreq   r12, [r1]
> +       cmpeq   r12, r0                 @ !turn == cpu?
> +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> +
> +       mov     pc, lr                  @ locked
> +ENDPROC(tegra_pen_lock)
> +
> +ENTRY(tegra_pen_unlock)
> +       dsb
> +       mov32   r3, TEGRA_PMC_VIRT
> +       cpu_id  r0
> +       cmp     r0, #0
> +       addeq   r2, r3, #PMC_SCRATCH38
> +       addne   r2, r3, #PMC_SCRATCH39
> +       mov     r12, #0
> +       str     r12, [r2]
> +       mov     pc, lr
> +ENDPROC(tegra_pen_unlock)

There is an ongoing work to make this locking scheme for MMU/coherency off
paths ARM generic, and we do not want to merge yet another platform specific
locking mechanism. I will point you to the patchset when it hits LAK.

> +
> +/*
> + * tegra20_cpu_clear_resettable(void)
> + *
> + * Called to clear the "resettable soon" flag in PMC_SCRATCH41 when
> + * it is expected that the secondary CPU will be idle soon.
> + */
> +ENTRY(tegra20_cpu_clear_resettable)
> +       mov32   r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
> +       mov     r12, #CPU_NOT_RESETTABLE
> +       str     r12, [r1]
> +       mov     pc, lr
> +ENDPROC(tegra20_cpu_clear_resettable)
> +
> +/*
> + * tegra20_cpu_set_resettable_soon(void)
> + *
> + * Called to set the "resettable soon" flag in PMC_SCRATCH41 when
> + * it is expected that the secondary CPU will be idle soon.
> + */
> +ENTRY(tegra20_cpu_set_resettable_soon)
> +       mov32   r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
> +       mov     r12, #CPU_RESETTABLE_SOON
> +       str     r12, [r1]
> +       mov     pc, lr
> +ENDPROC(tegra20_cpu_set_resettable_soon)
> +
> +/*
> + * tegra20_sleep_cpu_secondary_finish(unsigned long v2p)
> + *
> + * Enters WFI on secondary CPU by exiting coherency.
> + */
> +ENTRY(tegra20_sleep_cpu_secondary_finish)
> +       mrc     p15, 0, r11, c1, c0, 1  @ save actlr before exiting coherency
> +
> +       /* Flush and disable the L1 data cache */
> +       bl      tegra_disable_clean_inv_dcache
> +
> +       mov32   r0, TEGRA_PMC_VIRT + PMC_SCRATCH41
> +       mov     r3, #CPU_RESETTABLE
> +       str     r3, [r0]
> +
> +       bl      cpu_do_idle
> +
> +       /*
> +        * cpu may be reset while in wfi, which will return through
> +        * tegra_resume to cpu_resume
> +        * or interrupt may wake wfi, which will return here
> +        * cpu state is unchanged - MMU is on, cache is on, coherency
> +        * is off, and the data cache is off
> +        *
> +        * r11 contains the original actlr
> +        */
> +
> +       bl      tegra_pen_lock
> +
> +       mov32   r3, TEGRA_PMC_VIRT
> +       add     r0, r3, #PMC_SCRATCH41
> +       mov     r3, #CPU_NOT_RESETTABLE
> +       str     r3, [r0]
> +
> +       bl      tegra_pen_unlock
> +
> +       /* Re-enable the data cache */
> +       mrc     p15, 0, r10, c1, c0, 0
> +       orr     r10, r10, #CR_C
> +       mcr     p15, 0, r10, c1, c0, 0
> +       isb
> +
> +       mcr     p15, 0, r11, c1, c0, 1  @ reenable coherency
> +
> +       /* Invalidate the TLBs & BTAC */
> +       mov     r1, #0
> +       mcr     p15, 0, r1, c8, c3, 0   @ invalidate shared TLBs
> +       mcr     p15, 0, r1, c7, c1, 6   @ invalidate shared BTAC
> +       dsb
> +       isb
> +
> +       /* the cpu was running with coherency disabled,
> +        * caches may be out of date */
> +       bl      v7_flush_kern_cache_louis
> +
> +       ldmia   sp!, {r1 - r3}          @ pop phys pgd, virt SP, phys resume fn
> +       mov     r0, #0                  @ return success
> +       mov     sp, r2                  @ sp is stored in r2 by __cpu_suspend

This is not true. sp is retrieved from the r2 value popped above.
Anyway, all this code is a copy'n'paste from __cpu_suspend. Now if I got
what you want to do right, all you want to do is return to cpu_suspend
with r0 == 0.

(1) You should not rely on the register layout of __cpu_suspend since if
    it changes we have to patch this code as well. Do not rely on that.
(2) Why do you want to return with r0 == 0 ? To me that's a mistery.
    Is that because you want cpu_suspend to restore the page table
    pointer ? Even in that case I am afraid I am against this code.
    Usage of cpu_suspend must be consistent and you *must* not use it as a
    plaster or rely on any specific register layout, it has to be used
    for the purpose it has been designed for.

Lorenzo
Stephen Warren Dec. 5, 2012, 10:18 p.m. UTC | #2
On 12/05/2012 03:01 AM, Joseph Lo wrote:
> The powered-down state of Tegra20 requires power gating both CPU cores.
> When the secondary CPU requests to enter powered-down state, it saves
> its own contexts and then enters WFI. The Tegra20 had a limition to
> power down both CPU cores. The secondary CPU must waits for CPU0 in
> powered-down state too. If the secondary CPU be woken up before CPU0
> entering powered-down state, then it needs to restore its CPU states
> and waits for next chance.
> 
> Be aware of that, you may see the legacy power state "LP2" in the code
> which is exactly the same meaning of "CPU power down".

> diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c

> +static int __cpuinit tegra20_idle_lp2(struct cpuidle_device *dev,

> +	return (entered_lp2) ? index : 0;

No need for the brackets there.

BTW, could you Cc Colin Cross on any future revisions of these patches;
it'd be good to get his take on them.

> diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S

> +/*
> + * tegra_pen_lock

> + * on cpu 0:
> + * SCRATCH38 = r2 = flag[0]
> + * SCRATCH39 = r3 = flag[1]

It would be slightly clearer if that was written:

* r2 = flag[0] (in SCRATCH38)
* r3 = flag[1] (in SCRATCH39)

since the meaning of r2/r3 is what's being selected.

> +	mov	r12, #1
> +	str	r12, [r2]		@ flag[cpu] = 1
> +	dsb
> +	str	r12, [r1]		@ !turn = cpu

Should that be "str r0, [r1]"? Otherwise, you're always writing 1 there,
not writing the CPU ID as expected.
Joseph Lo Dec. 6, 2012, 7:22 a.m. UTC | #3
On Thu, 2012-12-06 at 06:18 +0800, Stephen Warren wrote:
> On 12/05/2012 03:01 AM, Joseph Lo wrote:
> > The powered-down state of Tegra20 requires power gating both CPU cores.
> > When the secondary CPU requests to enter powered-down state, it saves
> > its own contexts and then enters WFI. The Tegra20 had a limition to
> > power down both CPU cores. The secondary CPU must waits for CPU0 in
> > powered-down state too. If the secondary CPU be woken up before CPU0
> > entering powered-down state, then it needs to restore its CPU states
> > and waits for next chance.
> > 
> > Be aware of that, you may see the legacy power state "LP2" in the code
> > which is exactly the same meaning of "CPU power down".
> 
> > diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
> 
> > +static int __cpuinit tegra20_idle_lp2(struct cpuidle_device *dev,
> 
> > +	return (entered_lp2) ? index : 0;
> 
> No need for the brackets there.
> 
OK. Will fix.

> BTW, could you Cc Colin Cross on any future revisions of these patches;
> it'd be good to get his take on them.
> 
OK. The next version will directly support coupled cpuidle. I will add
Colin into Cc.

> > diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
> 
> > +/*
> > + * tegra_pen_lock
> 
> > + * on cpu 0:
> > + * SCRATCH38 = r2 = flag[0]
> > + * SCRATCH39 = r3 = flag[1]
> 
> It would be slightly clearer if that was written:
> 
> * r2 = flag[0] (in SCRATCH38)
> * r3 = flag[1] (in SCRATCH39)
> 
> since the meaning of r2/r3 is what's being selected.
> 
OK. Will fix.

> > +	mov	r12, #1
> > +	str	r12, [r2]		@ flag[cpu] = 1
> > +	dsb
> > +	str	r12, [r1]		@ !turn = cpu
> 
> Should that be "str r0, [r1]"? Otherwise, you're always writing 1 there,
> not writing the CPU ID as expected.

The CPU_ID was not used to lock or un-lock the flag. Wriging 1 to lock
and 0 to release.

Thanks,
Joseph
Joseph Lo Dec. 6, 2012, 8:25 a.m. UTC | #4
On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> >  static struct cpuidle_driver tegra_idle_driver = {
> >         .name = "tegra_idle",
> >         .owner = THIS_MODULE,
> >         .en_core_tk_irqen = 1,
> > +#ifdef CONFIG_PM_SLEEP
> > +       .state_count = 2,
> > +#else
> >         .state_count = 1,
> > +#endif
> 
> These hardcoded values are not nice.
> 
OK. I will change it to runtime detection when idle driver registration.

> >         .states = {
> >                 [0] = ARM_CPUIDLE_WFI_STATE_PWR(600),
> > +#ifdef CONFIG_PM_SLEEP
> > +               [1] = {
> > +                       .enter                  = tegra20_idle_lp2,
> > +                       .exit_latency           = 5000,
> > +                       .target_residency       = 10000,
> > +                       .power_usage            = 0,
> > +                       .flags                  = CPUIDLE_FLAG_TIME_VALID,
> > +                       .name                   = "powered-down",
> > +                       .desc                   = "CPU power gated",
> > +               },
> > +#endif
> >         },
> >  };
> >
> >  static DEFINE_PER_CPU(struct cpuidle_device, tegra_idle_device);
> >
> > +#ifdef CONFIG_PM_SLEEP
> > +#ifdef CONFIG_SMP
> > +static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
> > +                                        struct cpuidle_driver *drv,
> > +                                        int index)
> > +{
> > +       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
> > +
> > +       smp_wmb();
> 
> Can you explain to me the need for this barrier ?
> 
Ah. The barrier was no need here. This function was been designed for
MPCore usage before. After some migrations, now it was be used for CPU1
only. Will remove it. Good catch here, thanks.

> > +
> > +       cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
> > +
> > +       tegra20_cpu_clear_resettable();
> > +
> > +       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
> > +
> > +       return true;
> > +}
> > +#ifdef CONFIG_PM_SLEEP
> > +/*
> > + * tegra_pen_lock
> > + *
> > + * spinlock implementation with no atomic test-and-set and no coherence
> > + * using Peterson's algorithm on strongly-ordered registers
> > + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> > + *
> > + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> > + * on cpu 0:
> > + * SCRATCH38 = r2 = flag[0]
> > + * SCRATCH39 = r3 = flag[1]
> > + * on cpu1:
> > + * SCRATCH39 = r2 = flag[1]
> > + * SCRATCH38 = r3 = flag[0]
> > + *
> > + * must be called with MMU on
> > + * corrupts r0-r3, r12
> > + */
> > +ENTRY(tegra_pen_lock)
> > +       mov32   r3, TEGRA_PMC_VIRT
> > +       cpu_id  r0
> > +       add     r1, r3, #PMC_SCRATCH37
> > +       cmp     r0, #0
> > +       addeq   r2, r3, #PMC_SCRATCH38
> > +       addeq   r3, r3, #PMC_SCRATCH39
> > +       addne   r2, r3, #PMC_SCRATCH39
> > +       addne   r3, r3, #PMC_SCRATCH38
> > +
> > +       mov     r12, #1
> > +       str     r12, [r2]               @ flag[cpu] = 1
> > +       dsb
> > +       str     r12, [r1]               @ !turn = cpu
> > +1:     dsb
> > +       ldr     r12, [r3]
> > +       cmp     r12, #1                 @ flag[!cpu] == 1?
> > +       ldreq   r12, [r1]
> > +       cmpeq   r12, r0                 @ !turn == cpu?
> > +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> > +
> > +       mov     pc, lr                  @ locked
> > +ENDPROC(tegra_pen_lock)
> > +
> > +ENTRY(tegra_pen_unlock)
> > +       dsb
> > +       mov32   r3, TEGRA_PMC_VIRT
> > +       cpu_id  r0
> > +       cmp     r0, #0
> > +       addeq   r2, r3, #PMC_SCRATCH38
> > +       addne   r2, r3, #PMC_SCRATCH39
> > +       mov     r12, #0
> > +       str     r12, [r2]
> > +       mov     pc, lr
> > +ENDPROC(tegra_pen_unlock)
> 
> There is an ongoing work to make this locking scheme for MMU/coherency off
> paths ARM generic, and we do not want to merge yet another platform specific
> locking mechanism. I will point you to the patchset when it hits LAK.
> 
OK. So I need to wait the common locking scheme for this patchset. Does
it possible available before K3.9? Please remind me when it show up
here. Thanks.

> > +/*
> > + * tegra20_sleep_cpu_secondary_finish(unsigned long v2p)
> > + *
> > + * Enters WFI on secondary CPU by exiting coherency.
> > + */
> > +ENTRY(tegra20_sleep_cpu_secondary_finish)
> > +       mrc     p15, 0, r11, c1, c0, 1  @ save actlr before exiting coherency
> > +
> > +       /* Flush and disable the L1 data cache */
> > +       bl      tegra_disable_clean_inv_dcache
> > +
> > +       mov32   r0, TEGRA_PMC_VIRT + PMC_SCRATCH41
> > +       mov     r3, #CPU_RESETTABLE
> > +       str     r3, [r0]
> > +
> > +       bl      cpu_do_idle
> > +
> > +       /*
> > +        * cpu may be reset while in wfi, which will return through
> > +        * tegra_resume to cpu_resume
> > +        * or interrupt may wake wfi, which will return here
> > +        * cpu state is unchanged - MMU is on, cache is on, coherency
> > +        * is off, and the data cache is off
> > +        *
> > +        * r11 contains the original actlr
> > +        */
> > +
> > +       bl      tegra_pen_lock
> > +
> > +       mov32   r3, TEGRA_PMC_VIRT
> > +       add     r0, r3, #PMC_SCRATCH41
> > +       mov     r3, #CPU_NOT_RESETTABLE
> > +       str     r3, [r0]
> > +
> > +       bl      tegra_pen_unlock
> > +
> > +       /* Re-enable the data cache */
> > +       mrc     p15, 0, r10, c1, c0, 0
> > +       orr     r10, r10, #CR_C
> > +       mcr     p15, 0, r10, c1, c0, 0
> > +       isb
> > +
> > +       mcr     p15, 0, r11, c1, c0, 1  @ reenable coherency
> > +
> > +       /* Invalidate the TLBs & BTAC */
> > +       mov     r1, #0
> > +       mcr     p15, 0, r1, c8, c3, 0   @ invalidate shared TLBs
> > +       mcr     p15, 0, r1, c7, c1, 6   @ invalidate shared BTAC
> > +       dsb
> > +       isb
> > +
> > +       /* the cpu was running with coherency disabled,
> > +        * caches may be out of date */
> > +       bl      v7_flush_kern_cache_louis
> > +
> > +       ldmia   sp!, {r1 - r3}          @ pop phys pgd, virt SP, phys resume fn
> > +       mov     r0, #0                  @ return success
> > +       mov     sp, r2                  @ sp is stored in r2 by __cpu_suspend
> 
> This is not true. sp is retrieved from the r2 value popped above.
> Anyway, all this code is a copy'n'paste from __cpu_suspend. Now if I got
> what you want to do right, all you want to do is return to cpu_suspend
> with r0 == 0.
> 
> (1) You should not rely on the register layout of __cpu_suspend since if
>     it changes we have to patch this code as well. Do not rely on that.
> (2) Why do you want to return with r0 == 0 ? To me that's a mistery.
>     Is that because you want cpu_suspend to restore the page table
>     pointer ? Even in that case I am afraid I am against this code.
>     Usage of cpu_suspend must be consistent and you *must* not use it as a
>     plaster or rely on any specific register layout, it has to be used
>     for the purpose it has been designed for.
> 
Thank you for reviewing this so carefully.
I re-traced the sequence today. I think I only need to do something
below. And rely on the "cpu_suspend_abort" in "__cpu_suspend" to abort
"cpu_suspend".

ENTRY(tegra20_sleep_cpu_secondary_finish)
	stmfd sp!, {r4-r11, lr}

	...

	ldmfd sp!, {r4-r11, pc}
ENDPROC(tegra20_sleep_cpu_secondary_finish)

Thanks,
Joseph
Stephen Warren Dec. 6, 2012, 6:56 p.m. UTC | #5
On 12/06/2012 01:25 AM, Joseph Lo wrote:
> On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
>> On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
>>>  static struct cpuidle_driver tegra_idle_driver = {
>>>         .name = "tegra_idle",
>>>         .owner = THIS_MODULE,
>>>         .en_core_tk_irqen = 1,
>>> +#ifdef CONFIG_PM_SLEEP
>>> +       .state_count = 2,
>>> +#else
>>>         .state_count = 1,
>>> +#endif
>>
>> These hardcoded values are not nice.
>>
> OK. I will change it to runtime detection when idle driver registration.

Personally, I find doing this at compile-time much better; the
conditional definition of state_count is right next to the conditional
setup of the array whose size it defines. Assigning state_count at
run-time ense up moving the two pieces of logic apart, which seems
likely to cause more issues...

>>>         .states = {
>>>                 [0] = ARM_CPUIDLE_WFI_STATE_PWR(600),
>>> +#ifdef CONFIG_PM_SLEEP
>>> +               [1] = {
>>> +                       .enter                  = tegra20_idle_lp2,
>>> +                       .exit_latency           = 5000,
>>> +                       .target_residency       = 10000,
>>> +                       .power_usage            = 0,
>>> +                       .flags                  = CPUIDLE_FLAG_TIME_VALID,
>>> +                       .name                   = "powered-down",
>>> +                       .desc                   = "CPU power gated",
>>> +               },
>>> +#endif
>>>         },
>>>  };
Stephen Warren Dec. 6, 2012, 6:59 p.m. UTC | #6
On 12/06/2012 12:22 AM, Joseph Lo wrote:
> On Thu, 2012-12-06 at 06:18 +0800, Stephen Warren wrote:
>> On 12/05/2012 03:01 AM, Joseph Lo wrote:
>>> The powered-down state of Tegra20 requires power gating both CPU cores.
>>> When the secondary CPU requests to enter powered-down state, it saves
>>> its own contexts and then enters WFI. The Tegra20 had a limition to
>>> power down both CPU cores. The secondary CPU must waits for CPU0 in
>>> powered-down state too. If the secondary CPU be woken up before CPU0
>>> entering powered-down state, then it needs to restore its CPU states
>>> and waits for next chance.
>>>
>>> Be aware of that, you may see the legacy power state "LP2" in the code
>>> which is exactly the same meaning of "CPU power down".

>>> diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
>>
>>> +/*
>>> + * tegra_pen_lock
>>
>>> + * on cpu 0:
>>> + * SCRATCH38 = r2 = flag[0]
>>> + * SCRATCH39 = r3 = flag[1]
>>
>> It would be slightly clearer if that was written:
>>
>> * r2 = flag[0] (in SCRATCH38)
>> * r3 = flag[1] (in SCRATCH39)
>>
>> since the meaning of r2/r3 is what's being selected.
>>
> OK. Will fix.
> 
>>> +	mov	r12, #1
>>> +	str	r12, [r2]		@ flag[cpu] = 1

So here I can see we should hard-code the value that's written

>>> +	dsb
>>> +	str	r12, [r1]		@ !turn = cpu

But here we're also writing "1" always, whereas the comment says it's
writing "cpu", and given the algorithm description, I would expect to
write "cpu" here not "1".

>> Should that be "str r0, [r1]"? Otherwise, you're always writing 1 there,
>> not writing the CPU ID as expected.
> 
> The CPU_ID was not used to lock or un-lock the flag. Wriging 1 to lock
> and 0 to release.
Joseph Lo Jan. 11, 2013, 7:20 a.m. UTC | #7
Hi Lorenzo,

On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> > The powered-down state of Tegra20 requires power gating both CPU cores.
> > When the secondary CPU requests to enter powered-down state, it saves
> > its own contexts and then enters WFI. The Tegra20 had a limition to
> > power down both CPU cores. The secondary CPU must waits for CPU0 in
> > powered-down state too. If the secondary CPU be woken up before CPU0
> > entering powered-down state, then it needs to restore its CPU states
> > and waits for next chance.
> >
> > Be aware of that, you may see the legacy power state "LP2" in the code
> > which is exactly the same meaning of "CPU power down".
> >
> > Based on the work by:
> > Colin Cross <ccross@android.com>
> > Gary King <gking@nvidia.com>
> >
> > Signed-off-by: Joseph Lo <josephl@nvidia.com>
> > +
> > +#ifdef CONFIG_PM_SLEEP
> > +/*
> > + * tegra_pen_lock
> > + *
> > + * spinlock implementation with no atomic test-and-set and no coherence
> > + * using Peterson's algorithm on strongly-ordered registers
> > + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> > + *
> > + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> > + * on cpu 0:
> > + * SCRATCH38 = r2 = flag[0]
> > + * SCRATCH39 = r3 = flag[1]
> > + * on cpu1:
> > + * SCRATCH39 = r2 = flag[1]
> > + * SCRATCH38 = r3 = flag[0]
> > + *
> > + * must be called with MMU on
> > + * corrupts r0-r3, r12
> > + */
> > +ENTRY(tegra_pen_lock)
> > +       mov32   r3, TEGRA_PMC_VIRT
> > +       cpu_id  r0
> > +       add     r1, r3, #PMC_SCRATCH37
> > +       cmp     r0, #0
> > +       addeq   r2, r3, #PMC_SCRATCH38
> > +       addeq   r3, r3, #PMC_SCRATCH39
> > +       addne   r2, r3, #PMC_SCRATCH39
> > +       addne   r3, r3, #PMC_SCRATCH38
> > +
> > +       mov     r12, #1
> > +       str     r12, [r2]               @ flag[cpu] = 1
> > +       dsb
> > +       str     r12, [r1]               @ !turn = cpu
> > +1:     dsb
> > +       ldr     r12, [r3]
> > +       cmp     r12, #1                 @ flag[!cpu] == 1?
> > +       ldreq   r12, [r1]
> > +       cmpeq   r12, r0                 @ !turn == cpu?
> > +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> > +
> > +       mov     pc, lr                  @ locked
> > +ENDPROC(tegra_pen_lock)
> > +
> > +ENTRY(tegra_pen_unlock)
> > +       dsb
> > +       mov32   r3, TEGRA_PMC_VIRT
> > +       cpu_id  r0
> > +       cmp     r0, #0
> > +       addeq   r2, r3, #PMC_SCRATCH38
> > +       addne   r2, r3, #PMC_SCRATCH39
> > +       mov     r12, #0
> > +       str     r12, [r2]
> > +       mov     pc, lr
> > +ENDPROC(tegra_pen_unlock)
> 
> There is an ongoing work to make this locking scheme for MMU/coherency off
> paths ARM generic, and we do not want to merge yet another platform specific
> locking mechanism. I will point you to the patchset when it hits LAK.
> 

You did mention there is an ARM generic locking scheme for MMU/coherency
off case before. Do you mean the patch below?

https://patchwork.kernel.org/patch/1957911/
https://patchwork.kernel.org/patch/1957901/

I gave it a review today. Looks it can't fit our usage for CPU idle
powered-down mode on Tegra20.

The generic mechanism only can be used when CPUs in non coherent world.
But our usage needs the mechanism could be used in both coherent and non
coherent case. OK. The case is we need to sync the status about the CPU1
was ready to power down. In this case, the CPU0 is still in coherent
world but the CPU1 isn't. So we need the locking scheme could be still
safe in this situation.

And the proposed scheme looks only for b.L system?

Thanks,
Joseph
Lorenzo Pieralisi Jan. 11, 2013, 12:24 p.m. UTC | #8
On Fri, Jan 11, 2013 at 07:20:29AM +0000, Joseph Lo wrote:
> Hi Lorenzo,
> 
> On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> > On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> > > The powered-down state of Tegra20 requires power gating both CPU cores.
> > > When the secondary CPU requests to enter powered-down state, it saves
> > > its own contexts and then enters WFI. The Tegra20 had a limition to
> > > power down both CPU cores. The secondary CPU must waits for CPU0 in
> > > powered-down state too. If the secondary CPU be woken up before CPU0
> > > entering powered-down state, then it needs to restore its CPU states
> > > and waits for next chance.
> > >
> > > Be aware of that, you may see the legacy power state "LP2" in the code
> > > which is exactly the same meaning of "CPU power down".
> > >
> > > Based on the work by:
> > > Colin Cross <ccross@android.com>
> > > Gary King <gking@nvidia.com>
> > >
> > > Signed-off-by: Joseph Lo <josephl@nvidia.com>
> > > +
> > > +#ifdef CONFIG_PM_SLEEP
> > > +/*
> > > + * tegra_pen_lock
> > > + *
> > > + * spinlock implementation with no atomic test-and-set and no coherence
> > > + * using Peterson's algorithm on strongly-ordered registers
> > > + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> > > + *
> > > + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> > > + * on cpu 0:
> > > + * SCRATCH38 = r2 = flag[0]
> > > + * SCRATCH39 = r3 = flag[1]
> > > + * on cpu1:
> > > + * SCRATCH39 = r2 = flag[1]
> > > + * SCRATCH38 = r3 = flag[0]
> > > + *
> > > + * must be called with MMU on
> > > + * corrupts r0-r3, r12
> > > + */
> > > +ENTRY(tegra_pen_lock)
> > > +       mov32   r3, TEGRA_PMC_VIRT
> > > +       cpu_id  r0
> > > +       add     r1, r3, #PMC_SCRATCH37
> > > +       cmp     r0, #0
> > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > +       addeq   r3, r3, #PMC_SCRATCH39
> > > +       addne   r2, r3, #PMC_SCRATCH39
> > > +       addne   r3, r3, #PMC_SCRATCH38
> > > +
> > > +       mov     r12, #1
> > > +       str     r12, [r2]               @ flag[cpu] = 1
> > > +       dsb
> > > +       str     r12, [r1]               @ !turn = cpu
> > > +1:     dsb
> > > +       ldr     r12, [r3]
> > > +       cmp     r12, #1                 @ flag[!cpu] == 1?
> > > +       ldreq   r12, [r1]
> > > +       cmpeq   r12, r0                 @ !turn == cpu?
> > > +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> > > +
> > > +       mov     pc, lr                  @ locked
> > > +ENDPROC(tegra_pen_lock)
> > > +
> > > +ENTRY(tegra_pen_unlock)
> > > +       dsb
> > > +       mov32   r3, TEGRA_PMC_VIRT
> > > +       cpu_id  r0
> > > +       cmp     r0, #0
> > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > +       addne   r2, r3, #PMC_SCRATCH39
> > > +       mov     r12, #0
> > > +       str     r12, [r2]
> > > +       mov     pc, lr
> > > +ENDPROC(tegra_pen_unlock)
> > 
> > There is an ongoing work to make this locking scheme for MMU/coherency off
> > paths ARM generic, and we do not want to merge yet another platform specific
> > locking mechanism. I will point you to the patchset when it hits LAK.
> > 
> 
> You did mention there is an ARM generic locking scheme for MMU/coherency
> off case before. Do you mean the patch below?
> 
> https://patchwork.kernel.org/patch/1957911/
> https://patchwork.kernel.org/patch/1957901/

Those are used for first-man election when multiple CPUs come out of
idle at once.

You should have a look at the entire series and in particular:

https://patchwork.kernel.org/patch/1957891/
https://patchwork.kernel.org/patch/1957951/

> I gave it a review today. Looks it can't fit our usage for CPU idle
> powered-down mode on Tegra20.
> 
> The generic mechanism only can be used when CPUs in non coherent world.
> But our usage needs the mechanism could be used in both coherent and non
> coherent case. OK. The case is we need to sync the status about the CPU1
> was ready to power down. In this case, the CPU0 is still in coherent
> world but the CPU1 isn't. So we need the locking scheme could be still
> safe in this situation.

I know, I implemented something of that sort for an A9 based development
platform, that's why I pointed you to this new patchset.

Have a look at the series, it should do what you want.

> And the proposed scheme looks only for b.L system?

No it works also for single cluster systems. Please have a look and let
me know if you have any questions or comment on the thread so that
authors can help you understand the code.

Lorenzo
Nicolas Pitre Jan. 12, 2013, 5:33 p.m. UTC | #9
On Fri, 11 Jan 2013, Joseph Lo wrote:

> Hi Lorenzo,
> 
> On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> > On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> > > The powered-down state of Tegra20 requires power gating both CPU cores.
> > > When the secondary CPU requests to enter powered-down state, it saves
> > > its own contexts and then enters WFI. The Tegra20 had a limition to
> > > power down both CPU cores. The secondary CPU must waits for CPU0 in
> > > powered-down state too. If the secondary CPU be woken up before CPU0
> > > entering powered-down state, then it needs to restore its CPU states
> > > and waits for next chance.
> > >
> > > Be aware of that, you may see the legacy power state "LP2" in the code
> > > which is exactly the same meaning of "CPU power down".
> > >
> > > Based on the work by:
> > > Colin Cross <ccross@android.com>
> > > Gary King <gking@nvidia.com>
> > >
> > > Signed-off-by: Joseph Lo <josephl@nvidia.com>
> > > +
> > > +#ifdef CONFIG_PM_SLEEP
> > > +/*
> > > + * tegra_pen_lock
> > > + *
> > > + * spinlock implementation with no atomic test-and-set and no coherence
> > > + * using Peterson's algorithm on strongly-ordered registers
> > > + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> > > + *
> > > + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> > > + * on cpu 0:
> > > + * SCRATCH38 = r2 = flag[0]
> > > + * SCRATCH39 = r3 = flag[1]
> > > + * on cpu1:
> > > + * SCRATCH39 = r2 = flag[1]
> > > + * SCRATCH38 = r3 = flag[0]
> > > + *
> > > + * must be called with MMU on
> > > + * corrupts r0-r3, r12
> > > + */
> > > +ENTRY(tegra_pen_lock)
> > > +       mov32   r3, TEGRA_PMC_VIRT
> > > +       cpu_id  r0
> > > +       add     r1, r3, #PMC_SCRATCH37
> > > +       cmp     r0, #0
> > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > +       addeq   r3, r3, #PMC_SCRATCH39
> > > +       addne   r2, r3, #PMC_SCRATCH39
> > > +       addne   r3, r3, #PMC_SCRATCH38
> > > +
> > > +       mov     r12, #1
> > > +       str     r12, [r2]               @ flag[cpu] = 1
> > > +       dsb
> > > +       str     r12, [r1]               @ !turn = cpu
> > > +1:     dsb
> > > +       ldr     r12, [r3]
> > > +       cmp     r12, #1                 @ flag[!cpu] == 1?
> > > +       ldreq   r12, [r1]
> > > +       cmpeq   r12, r0                 @ !turn == cpu?
> > > +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> > > +
> > > +       mov     pc, lr                  @ locked
> > > +ENDPROC(tegra_pen_lock)
> > > +
> > > +ENTRY(tegra_pen_unlock)
> > > +       dsb
> > > +       mov32   r3, TEGRA_PMC_VIRT
> > > +       cpu_id  r0
> > > +       cmp     r0, #0
> > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > +       addne   r2, r3, #PMC_SCRATCH39
> > > +       mov     r12, #0
> > > +       str     r12, [r2]
> > > +       mov     pc, lr
> > > +ENDPROC(tegra_pen_unlock)
> > 
> > There is an ongoing work to make this locking scheme for MMU/coherency off
> > paths ARM generic, and we do not want to merge yet another platform specific
> > locking mechanism. I will point you to the patchset when it hits LAK.
> > 
> 
> You did mention there is an ARM generic locking scheme for MMU/coherency
> off case before. Do you mean the patch below?
> 
> https://patchwork.kernel.org/patch/1957911/
> https://patchwork.kernel.org/patch/1957901/
> 
> I gave it a review today. Looks it can't fit our usage for CPU idle
> powered-down mode on Tegra20.
> 
> The generic mechanism only can be used when CPUs in non coherent world.
> But our usage needs the mechanism could be used in both coherent and non
> coherent case. OK. The case is we need to sync the status about the CPU1
> was ready to power down. In this case, the CPU0 is still in coherent
> world but the CPU1 isn't. So we need the locking scheme could be still
> safe in this situation.

Those patches above are dealing with CPUs coming back up.  If you look 
at the rest of the series, especially the DCSCB one providing real usage 
example, you'll see that we do have a mix of coherent and non-coherent 
states that need to coordinate amongst themselves.

> And the proposed scheme looks only for b.L system?

It was initiated for b.L hence the name, but that is applicable to other 
systems as well.


Nicolas
Joseph Lo Jan. 15, 2013, 3 a.m. UTC | #10
On Fri, 2013-01-11 at 20:24 +0800, Lorenzo Pieralisi wrote:
> On Fri, Jan 11, 2013 at 07:20:29AM +0000, Joseph Lo wrote:
> > On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> > > On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> > > > The powered-down state of Tegra20 requires power gating both CPU cores.
> > > > When the secondary CPU requests to enter powered-down state, it saves
> > > > its own contexts and then enters WFI. The Tegra20 had a limition to
> > > > power down both CPU cores. The secondary CPU must waits for CPU0 in
> > > > powered-down state too. If the secondary CPU be woken up before CPU0
> > > > entering powered-down state, then it needs to restore its CPU states
> > > > and waits for next chance.
> > > >
> > > > Be aware of that, you may see the legacy power state "LP2" in the code
> > > > which is exactly the same meaning of "CPU power down".
> > > >
> > > > Based on the work by:
> > > > Colin Cross <ccross@android.com>
> > > > Gary King <gking@nvidia.com>
> > > >
> > > > Signed-off-by: Joseph Lo <josephl@nvidia.com>
> > > > +
> > > > +#ifdef CONFIG_PM_SLEEP
> > > > +/*
> > > > + * tegra_pen_lock
> > > > + *
> > > > + * spinlock implementation with no atomic test-and-set and no coherence
> > > > + * using Peterson's algorithm on strongly-ordered registers
> > > > + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> > > > + *
> > > > + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> > > > + * on cpu 0:
> > > > + * SCRATCH38 = r2 = flag[0]
> > > > + * SCRATCH39 = r3 = flag[1]
> > > > + * on cpu1:
> > > > + * SCRATCH39 = r2 = flag[1]
> > > > + * SCRATCH38 = r3 = flag[0]
> > > > + *
> > > > + * must be called with MMU on
> > > > + * corrupts r0-r3, r12
> > > > + */
> > > > +ENTRY(tegra_pen_lock)
> > > > +       mov32   r3, TEGRA_PMC_VIRT
> > > > +       cpu_id  r0
> > > > +       add     r1, r3, #PMC_SCRATCH37
> > > > +       cmp     r0, #0
> > > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > > +       addeq   r3, r3, #PMC_SCRATCH39
> > > > +       addne   r2, r3, #PMC_SCRATCH39
> > > > +       addne   r3, r3, #PMC_SCRATCH38
> > > > +
> > > > +       mov     r12, #1
> > > > +       str     r12, [r2]               @ flag[cpu] = 1
> > > > +       dsb
> > > > +       str     r12, [r1]               @ !turn = cpu
> > > > +1:     dsb
> > > > +       ldr     r12, [r3]
> > > > +       cmp     r12, #1                 @ flag[!cpu] == 1?
> > > > +       ldreq   r12, [r1]
> > > > +       cmpeq   r12, r0                 @ !turn == cpu?
> > > > +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> > > > +
> > > > +       mov     pc, lr                  @ locked
> > > > +ENDPROC(tegra_pen_lock)
> > > > +
> > > > +ENTRY(tegra_pen_unlock)
> > > > +       dsb
> > > > +       mov32   r3, TEGRA_PMC_VIRT
> > > > +       cpu_id  r0
> > > > +       cmp     r0, #0
> > > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > > +       addne   r2, r3, #PMC_SCRATCH39
> > > > +       mov     r12, #0
> > > > +       str     r12, [r2]
> > > > +       mov     pc, lr
> > > > +ENDPROC(tegra_pen_unlock)
> > > 
> > > There is an ongoing work to make this locking scheme for MMU/coherency off
> > > paths ARM generic, and we do not want to merge yet another platform specific
> > > locking mechanism. I will point you to the patchset when it hits LAK.
> > > 
> > 
> > You did mention there is an ARM generic locking scheme for MMU/coherency
> > off case before. Do you mean the patch below?
> > 
> > https://patchwork.kernel.org/patch/1957911/
> > https://patchwork.kernel.org/patch/1957901/
> 
> Those are used for first-man election when multiple CPUs come out of
> idle at once.
> 
> You should have a look at the entire series and in particular:
> 
> https://patchwork.kernel.org/patch/1957891/
> https://patchwork.kernel.org/patch/1957951/
> 
> > I gave it a review today. Looks it can't fit our usage for CPU idle
> > powered-down mode on Tegra20.
> > 
> > The generic mechanism only can be used when CPUs in non coherent world.
> > But our usage needs the mechanism could be used in both coherent and non
> > coherent case. OK. The case is we need to sync the status about the CPU1
> > was ready to power down. In this case, the CPU0 is still in coherent
> > world but the CPU1 isn't. So we need the locking scheme could be still
> > safe in this situation.
> 
> I know, I implemented something of that sort for an A9 based development
> platform, that's why I pointed you to this new patchset.
> 
> Have a look at the series, it should do what you want.
> 
Hi Lorenzo,

May I upstream this stuff first? I can promise to you I will re-work a
common CPUs and cluster power sync wrappers (platform_power_ops) for all
Tegra series that based on the generic framework. Because we didn't have
this work in Tegra tree yet and we indeed need it for supporting cluster
power down and switching.

But it need lots of time for re-work, testing and verification. And I
have lots of patches that need the function of cpu suspend that be
introduced in this patch series to support platform suspend for Tegra.

So I am asking the permission here for upstream this series first and I
will continue the job to come out a common CPUs and cluster power sync
wrappers for Tegra that we indeed need it to support cluster power down
and switching.

Thanks,
Joseph
Lorenzo Pieralisi Jan. 15, 2013, 11:34 a.m. UTC | #11
On Tue, Jan 15, 2013 at 03:00:44AM +0000, Joseph Lo wrote:
> On Fri, 2013-01-11 at 20:24 +0800, Lorenzo Pieralisi wrote:
> > On Fri, Jan 11, 2013 at 07:20:29AM +0000, Joseph Lo wrote:
> > > On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> > > > On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> > > > > The powered-down state of Tegra20 requires power gating both CPU cores.
> > > > > When the secondary CPU requests to enter powered-down state, it saves
> > > > > its own contexts and then enters WFI. The Tegra20 had a limition to
> > > > > power down both CPU cores. The secondary CPU must waits for CPU0 in
> > > > > powered-down state too. If the secondary CPU be woken up before CPU0
> > > > > entering powered-down state, then it needs to restore its CPU states
> > > > > and waits for next chance.
> > > > >
> > > > > Be aware of that, you may see the legacy power state "LP2" in the code
> > > > > which is exactly the same meaning of "CPU power down".
> > > > >
> > > > > Based on the work by:
> > > > > Colin Cross <ccross@android.com>
> > > > > Gary King <gking@nvidia.com>
> > > > >
> > > > > Signed-off-by: Joseph Lo <josephl@nvidia.com>
> > > > > +
> > > > > +#ifdef CONFIG_PM_SLEEP
> > > > > +/*
> > > > > + * tegra_pen_lock
> > > > > + *
> > > > > + * spinlock implementation with no atomic test-and-set and no coherence
> > > > > + * using Peterson's algorithm on strongly-ordered registers
> > > > > + * used to synchronize a cpu waking up from wfi with entering lp2 on idle
> > > > > + *
> > > > > + * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
> > > > > + * on cpu 0:
> > > > > + * SCRATCH38 = r2 = flag[0]
> > > > > + * SCRATCH39 = r3 = flag[1]
> > > > > + * on cpu1:
> > > > > + * SCRATCH39 = r2 = flag[1]
> > > > > + * SCRATCH38 = r3 = flag[0]
> > > > > + *
> > > > > + * must be called with MMU on
> > > > > + * corrupts r0-r3, r12
> > > > > + */
> > > > > +ENTRY(tegra_pen_lock)
> > > > > +       mov32   r3, TEGRA_PMC_VIRT
> > > > > +       cpu_id  r0
> > > > > +       add     r1, r3, #PMC_SCRATCH37
> > > > > +       cmp     r0, #0
> > > > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > > > +       addeq   r3, r3, #PMC_SCRATCH39
> > > > > +       addne   r2, r3, #PMC_SCRATCH39
> > > > > +       addne   r3, r3, #PMC_SCRATCH38
> > > > > +
> > > > > +       mov     r12, #1
> > > > > +       str     r12, [r2]               @ flag[cpu] = 1
> > > > > +       dsb
> > > > > +       str     r12, [r1]               @ !turn = cpu
> > > > > +1:     dsb
> > > > > +       ldr     r12, [r3]
> > > > > +       cmp     r12, #1                 @ flag[!cpu] == 1?
> > > > > +       ldreq   r12, [r1]
> > > > > +       cmpeq   r12, r0                 @ !turn == cpu?
> > > > > +       beq     1b                      @ while !turn == cpu && flag[!cpu] == 1
> > > > > +
> > > > > +       mov     pc, lr                  @ locked
> > > > > +ENDPROC(tegra_pen_lock)
> > > > > +
> > > > > +ENTRY(tegra_pen_unlock)
> > > > > +       dsb
> > > > > +       mov32   r3, TEGRA_PMC_VIRT
> > > > > +       cpu_id  r0
> > > > > +       cmp     r0, #0
> > > > > +       addeq   r2, r3, #PMC_SCRATCH38
> > > > > +       addne   r2, r3, #PMC_SCRATCH39
> > > > > +       mov     r12, #0
> > > > > +       str     r12, [r2]
> > > > > +       mov     pc, lr
> > > > > +ENDPROC(tegra_pen_unlock)
> > > > 
> > > > There is an ongoing work to make this locking scheme for MMU/coherency off
> > > > paths ARM generic, and we do not want to merge yet another platform specific
> > > > locking mechanism. I will point you to the patchset when it hits LAK.
> > > > 
> > > 
> > > You did mention there is an ARM generic locking scheme for MMU/coherency
> > > off case before. Do you mean the patch below?
> > > 
> > > https://patchwork.kernel.org/patch/1957911/
> > > https://patchwork.kernel.org/patch/1957901/
> > 
> > Those are used for first-man election when multiple CPUs come out of
> > idle at once.
> > 
> > You should have a look at the entire series and in particular:
> > 
> > https://patchwork.kernel.org/patch/1957891/
> > https://patchwork.kernel.org/patch/1957951/
> > 
> > > I gave it a review today. Looks it can't fit our usage for CPU idle
> > > powered-down mode on Tegra20.
> > > 
> > > The generic mechanism only can be used when CPUs in non coherent world.
> > > But our usage needs the mechanism could be used in both coherent and non
> > > coherent case. OK. The case is we need to sync the status about the CPU1
> > > was ready to power down. In this case, the CPU0 is still in coherent
> > > world but the CPU1 isn't. So we need the locking scheme could be still
> > > safe in this situation.
> > 
> > I know, I implemented something of that sort for an A9 based development
> > platform, that's why I pointed you to this new patchset.
> > 
> > Have a look at the series, it should do what you want.
> > 
> Hi Lorenzo,
> 
> May I upstream this stuff first? I can promise to you I will re-work a
> common CPUs and cluster power sync wrappers (platform_power_ops) for all
> Tegra series that based on the generic framework. Because we didn't have
> this work in Tegra tree yet and we indeed need it for supporting cluster
> power down and switching.
> 
> But it need lots of time for re-work, testing and verification. And I
> have lots of patches that need the function of cpu suspend that be
> introduced in this patch series to support platform suspend for Tegra.
> 
> So I am asking the permission here for upstream this series first and I
> will continue the job to come out a common CPUs and cluster power sync
> wrappers for Tegra that we indeed need it to support cluster power down
> and switching.

I understand that and you should not ask me permission :-) since I am
not a maintainer. I just wanted to save you some effort, you will end
up rewriting the whole thing anyway IMHO and I think the power API would
have saved you time if it were there before.

Again, it is not my call, I am more than happy to review your code but
that's all I can do.

If you need help to integrate the power API please do ask questions.

Lorenzo
Joseph Lo Jan. 16, 2013, 3:17 a.m. UTC | #12
On Tue, 2013-01-15 at 19:34 +0800, Lorenzo Pieralisi wrote:
> On Tue, Jan 15, 2013 at 03:00:44AM +0000, Joseph Lo wrote:
> > On Fri, 2013-01-11 at 20:24 +0800, Lorenzo Pieralisi wrote:
> > > On Fri, Jan 11, 2013 at 07:20:29AM +0000, Joseph Lo wrote:
> > > > On Wed, 2012-12-05 at 18:50 +0800, Lorenzo Pieralisi wrote:
> > > > > On Wed, Dec 05, 2012 at 10:01:49AM +0000, Joseph Lo wrote:
> > > > You did mention there is an ARM generic locking scheme for MMU/coherency
> > > > off case before. Do you mean the patch below?
> > > > 
> > > > https://patchwork.kernel.org/patch/1957911/
> > > > https://patchwork.kernel.org/patch/1957901/
> > > 
> > > Those are used for first-man election when multiple CPUs come out of
> > > idle at once.
> > > 
> > > You should have a look at the entire series and in particular:
> > > 
> > > https://patchwork.kernel.org/patch/1957891/
> > > https://patchwork.kernel.org/patch/1957951/
> > > 
> > > > I gave it a review today. Looks it can't fit our usage for CPU idle
> > > > powered-down mode on Tegra20.
> > > > 
> > > > The generic mechanism only can be used when CPUs in non coherent world.
> > > > But our usage needs the mechanism could be used in both coherent and non
> > > > coherent case. OK. The case is we need to sync the status about the CPU1
> > > > was ready to power down. In this case, the CPU0 is still in coherent
> > > > world but the CPU1 isn't. So we need the locking scheme could be still
> > > > safe in this situation.
> > > 
> > > I know, I implemented something of that sort for an A9 based development
> > > platform, that's why I pointed you to this new patchset.
> > > 
> > > Have a look at the series, it should do what you want.
> > > 
> > Hi Lorenzo,
> > 
> > May I upstream this stuff first? I can promise to you I will re-work a
> > common CPUs and cluster power sync wrappers (platform_power_ops) for all
> > Tegra series that based on the generic framework. Because we didn't have
> > this work in Tegra tree yet and we indeed need it for supporting cluster
> > power down and switching.
> > 
> > But it need lots of time for re-work, testing and verification. And I
> > have lots of patches that need the function of cpu suspend that be
> > introduced in this patch series to support platform suspend for Tegra.
> > 
> > So I am asking the permission here for upstream this series first and I
> > will continue the job to come out a common CPUs and cluster power sync
> > wrappers for Tegra that we indeed need it to support cluster power down
> > and switching.
> 
> I understand that and you should not ask me permission :-) since I am
> not a maintainer. I just wanted to save you some effort, you will end
> up rewriting the whole thing anyway IMHO and I think the power API would
> have saved you time if it were there before.
> 
I think we are always taking care the comments from the community. And
it's really worth for us to reconsider the code design and re-use the
common framework to reduce the effort.
> 
> If you need help to integrate the power API please do ask questions.
> 
Thanks again.

Joseph
diff mbox

Patch

diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index d32e8b0..9d59a46 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -22,21 +22,105 @@ 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/clockchips.h>
 
 #include <asm/cpuidle.h>
+#include <asm/proc-fns.h>
+#include <asm/suspend.h>
+#include <asm/smp_plat.h>
+
+#include "pm.h"
+#include "sleep.h"
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra20_idle_lp2(struct cpuidle_device *dev,
+			    struct cpuidle_driver *drv,
+			    int index);
+#endif
 
 static struct cpuidle_driver tegra_idle_driver = {
 	.name = "tegra_idle",
 	.owner = THIS_MODULE,
 	.en_core_tk_irqen = 1,
+#ifdef CONFIG_PM_SLEEP
+	.state_count = 2,
+#else
 	.state_count = 1,
+#endif
 	.states = {
 		[0] = ARM_CPUIDLE_WFI_STATE_PWR(600),
+#ifdef CONFIG_PM_SLEEP
+		[1] = {
+			.enter			= tegra20_idle_lp2,
+			.exit_latency		= 5000,
+			.target_residency	= 10000,
+			.power_usage		= 0,
+			.flags			= CPUIDLE_FLAG_TIME_VALID,
+			.name			= "powered-down",
+			.desc			= "CPU power gated",
+		},
+#endif
 	},
 };
 
 static DEFINE_PER_CPU(struct cpuidle_device, tegra_idle_device);
 
+#ifdef CONFIG_PM_SLEEP
+#ifdef CONFIG_SMP
+static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
+					 struct cpuidle_driver *drv,
+					 int index)
+{
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+
+	smp_wmb();
+
+	cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
+
+	tegra20_cpu_clear_resettable();
+
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+
+	return true;
+}
+#else
+static inline bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
+						struct cpuidle_driver *drv,
+						int index)
+{
+	return true;
+}
+#endif
+
+static int __cpuinit tegra20_idle_lp2(struct cpuidle_device *dev,
+				      struct cpuidle_driver *drv,
+				      int index)
+{
+	u32 cpu = is_smp() ? cpu_logical_map(dev->cpu) : dev->cpu;
+	bool entered_lp2 = false;
+
+	local_fiq_disable();
+
+	tegra_set_cpu_in_lp2(cpu);
+	cpu_pm_enter();
+
+	if (cpu == 0)
+		cpu_do_idle();
+	else
+		entered_lp2 = tegra20_idle_enter_lp2_cpu_1(dev, drv, index);
+
+	cpu_pm_exit();
+	tegra_clear_cpu_in_lp2(cpu);
+
+	local_fiq_enable();
+
+	smp_rmb();
+
+	return (entered_lp2) ? index : 0;
+}
+#endif
+
 int __init tegra20_cpuidle_init(void)
 {
 	int ret;
diff --git a/arch/arm/mach-tegra/pm.c b/arch/arm/mach-tegra/pm.c
index 1b11707..db72ea9 100644
--- a/arch/arm/mach-tegra/pm.c
+++ b/arch/arm/mach-tegra/pm.c
@@ -173,6 +173,8 @@  bool __cpuinit tegra_set_cpu_in_lp2(int phy_cpu_id)
 
 	if ((phy_cpu_id == 0) && cpumask_equal(cpu_lp2_mask, cpu_online_mask))
 		last_cpu = true;
+	else if (phy_cpu_id == 1)
+		tegra20_cpu_set_resettable_soon();
 
 	spin_unlock(&tegra_lp2_lock);
 	return last_cpu;
diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S
index 72ce709..dfb2be5 100644
--- a/arch/arm/mach-tegra/sleep-tegra20.S
+++ b/arch/arm/mach-tegra/sleep-tegra20.S
@@ -21,6 +21,8 @@ 
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/proc-fns.h>
+#include <asm/cp15.h>
 
 #include "sleep.h"
 #include "flowctrl.h"
@@ -78,3 +80,146 @@  ENTRY(tegra20_cpu_shutdown)
 	mov	pc, lr
 ENDPROC(tegra20_cpu_shutdown)
 #endif
+
+#ifdef CONFIG_PM_SLEEP
+/*
+ * tegra_pen_lock
+ *
+ * spinlock implementation with no atomic test-and-set and no coherence
+ * using Peterson's algorithm on strongly-ordered registers
+ * used to synchronize a cpu waking up from wfi with entering lp2 on idle
+ *
+ * SCRATCH37 = r1 = !turn (inverted from Peterson's algorithm)
+ * on cpu 0:
+ * SCRATCH38 = r2 = flag[0]
+ * SCRATCH39 = r3 = flag[1]
+ * on cpu1:
+ * SCRATCH39 = r2 = flag[1]
+ * SCRATCH38 = r3 = flag[0]
+ *
+ * must be called with MMU on
+ * corrupts r0-r3, r12
+ */
+ENTRY(tegra_pen_lock)
+	mov32	r3, TEGRA_PMC_VIRT
+	cpu_id	r0
+	add	r1, r3, #PMC_SCRATCH37
+	cmp	r0, #0
+	addeq	r2, r3, #PMC_SCRATCH38
+	addeq	r3, r3, #PMC_SCRATCH39
+	addne	r2, r3, #PMC_SCRATCH39
+	addne	r3, r3, #PMC_SCRATCH38
+
+	mov	r12, #1
+	str	r12, [r2]		@ flag[cpu] = 1
+	dsb
+	str	r12, [r1]		@ !turn = cpu
+1:	dsb
+	ldr	r12, [r3]
+	cmp	r12, #1			@ flag[!cpu] == 1?
+	ldreq	r12, [r1]
+	cmpeq	r12, r0			@ !turn == cpu?
+	beq	1b			@ while !turn == cpu && flag[!cpu] == 1
+
+	mov	pc, lr			@ locked
+ENDPROC(tegra_pen_lock)
+
+ENTRY(tegra_pen_unlock)
+	dsb
+	mov32	r3, TEGRA_PMC_VIRT
+	cpu_id	r0
+	cmp	r0, #0
+	addeq	r2, r3, #PMC_SCRATCH38
+	addne	r2, r3, #PMC_SCRATCH39
+	mov	r12, #0
+	str	r12, [r2]
+	mov     pc, lr
+ENDPROC(tegra_pen_unlock)
+
+/*
+ * tegra20_cpu_clear_resettable(void)
+ *
+ * Called to clear the "resettable soon" flag in PMC_SCRATCH41 when
+ * it is expected that the secondary CPU will be idle soon.
+ */
+ENTRY(tegra20_cpu_clear_resettable)
+	mov32	r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
+	mov	r12, #CPU_NOT_RESETTABLE
+	str	r12, [r1]
+	mov	pc, lr
+ENDPROC(tegra20_cpu_clear_resettable)
+
+/*
+ * tegra20_cpu_set_resettable_soon(void)
+ *
+ * Called to set the "resettable soon" flag in PMC_SCRATCH41 when
+ * it is expected that the secondary CPU will be idle soon.
+ */
+ENTRY(tegra20_cpu_set_resettable_soon)
+	mov32	r1, TEGRA_PMC_VIRT + PMC_SCRATCH41
+	mov	r12, #CPU_RESETTABLE_SOON
+	str	r12, [r1]
+	mov	pc, lr
+ENDPROC(tegra20_cpu_set_resettable_soon)
+
+/*
+ * tegra20_sleep_cpu_secondary_finish(unsigned long v2p)
+ *
+ * Enters WFI on secondary CPU by exiting coherency.
+ */
+ENTRY(tegra20_sleep_cpu_secondary_finish)
+	mrc	p15, 0, r11, c1, c0, 1  @ save actlr before exiting coherency
+
+	/* Flush and disable the L1 data cache */
+	bl	tegra_disable_clean_inv_dcache
+
+	mov32	r0, TEGRA_PMC_VIRT + PMC_SCRATCH41
+	mov	r3, #CPU_RESETTABLE
+	str	r3, [r0]
+
+	bl	cpu_do_idle
+
+	/*
+	 * cpu may be reset while in wfi, which will return through
+	 * tegra_resume to cpu_resume
+	 * or interrupt may wake wfi, which will return here
+	 * cpu state is unchanged - MMU is on, cache is on, coherency
+	 * is off, and the data cache is off
+	 *
+	 * r11 contains the original actlr
+	 */
+
+	bl	tegra_pen_lock
+
+	mov32	r3, TEGRA_PMC_VIRT
+	add	r0, r3, #PMC_SCRATCH41
+	mov	r3, #CPU_NOT_RESETTABLE
+	str	r3, [r0]
+
+	bl	tegra_pen_unlock
+
+	/* Re-enable the data cache */
+	mrc	p15, 0, r10, c1, c0, 0
+	orr	r10, r10, #CR_C
+	mcr	p15, 0, r10, c1, c0, 0
+	isb
+
+	mcr	p15, 0, r11, c1, c0, 1	@ reenable coherency
+
+	/* Invalidate the TLBs & BTAC */
+	mov	r1, #0
+	mcr	p15, 0, r1, c8, c3, 0	@ invalidate shared TLBs
+	mcr	p15, 0, r1, c7, c1, 6	@ invalidate shared BTAC
+	dsb
+	isb
+
+	/* the cpu was running with coherency disabled,
+	 * caches may be out of date */
+	bl	v7_flush_kern_cache_louis
+
+	ldmia	sp!, {r1 - r3}		@ pop phys pgd, virt SP, phys resume fn
+	mov     r0, #0			@ return success
+	mov     sp, r2			@ sp is stored in r2 by __cpu_suspend
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(tegra20_sleep_cpu_secondary_finish)
+#endif
diff --git a/arch/arm/mach-tegra/sleep.h b/arch/arm/mach-tegra/sleep.h
index 9821ee7..a02f71a 100644
--- a/arch/arm/mach-tegra/sleep.h
+++ b/arch/arm/mach-tegra/sleep.h
@@ -25,6 +25,19 @@ 
 					+ IO_PPSB_VIRT)
 #define TEGRA_CLK_RESET_VIRT (TEGRA_CLK_RESET_BASE - IO_PPSB_PHYS \
 					+ IO_PPSB_VIRT)
+#define TEGRA_PMC_VIRT	(TEGRA_PMC_BASE - IO_APB_PHYS + IO_APB_VIRT)
+
+/* PMC_SCRATCH37-39 and 41 are used for tegra_pen_lock and idle */
+#define PMC_SCRATCH37	0x130
+#define PMC_SCRATCH38	0x134
+#define PMC_SCRATCH39	0x138
+#define PMC_SCRATCH41	0x140
+
+#ifdef CONFIG_ARCH_TEGRA_2x_SOC
+#define CPU_RESETTABLE		2
+#define CPU_RESETTABLE_SOON	1
+#define CPU_NOT_RESETTABLE	0
+#endif
 
 #ifdef __ASSEMBLY__
 /* returns the offset of the flow controller halt register for a cpu */
@@ -104,6 +117,8 @@  exit_l2_resume:
 .endm
 #endif /* CONFIG_CACHE_L2X0 */
 #else
+void tegra_pen_lock(void);
+void tegra_pen_unlock(void);
 void tegra_resume(void);
 int tegra_sleep_cpu_finish(unsigned long);
 
@@ -115,6 +130,14 @@  static inline void tegra20_hotplug_init(void) {}
 static inline void tegra30_hotplug_init(void) {}
 #endif
 
+void tegra20_cpu_clear_resettable(void);
+#ifdef CONFIG_ARCH_TEGRA_2x_SOC
+void tegra20_cpu_set_resettable_soon(void);
+#else
+static inline void tegra20_cpu_set_resettable_soon(void) {}
+#endif
+
+int tegra20_sleep_cpu_secondary_finish(unsigned long);
 int tegra30_sleep_cpu_secondary_finish(unsigned long);
 void tegra30_tear_down_cpu(void);