diff mbox

[v2,3/9] ARM: tegra: # of CPU cores detection w/ & w/o HAVE_ARM_SCU

Message ID 1357649263-1098-4-git-send-email-hdoyu@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Hiroshi DOYU Jan. 8, 2013, 12:47 p.m. UTC
The method to detect the number of CPU cores on Cortex-A9 MPCore and
Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
have to read it from the system coprocessor(CP15), because the SCU on
Cortex-A15 MPCore does not have software readable registers. This
patch selects the correct method at runtime based on the CPU ID.

Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
---
 arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

Comments

Russell King - ARM Linux Jan. 8, 2013, 2:26 p.m. UTC | #1
On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote:
> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> have to read it from the system coprocessor(CP15), because the SCU on
> Cortex-A15 MPCore does not have software readable registers. This
> patch selects the correct method at runtime based on the CPU ID.
> 
> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> ---
>  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> index 1b926df..68e76ef 100644
> --- a/arch/arm/mach-tegra/platsmp.c
> +++ b/arch/arm/mach-tegra/platsmp.c
> @@ -23,6 +23,7 @@
>  #include <asm/hardware/gic.h>
>  #include <asm/mach-types.h>
>  #include <asm/smp_scu.h>
> +#include <asm/cputype.h>
>  
>  #include <mach/powergate.h>
>  
> @@ -34,9 +35,13 @@
>  #include "common.h"
>  #include "iomap.h"
>  
> +#define CPU_MASK		0xff0ffff0
> +#define CPU_CORTEX_A9		0x410fc090
> +#define CPU_CORTEX_A15		0x410fc0f0

NAK. There's some patches around to make this stuff generic, we don't
need more ifdefs springing up.  We need to get those generic patches
in.
Mark Rutland Jan. 8, 2013, 2:28 p.m. UTC | #2
Hello,

On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> have to read it from the system coprocessor(CP15), because the SCU on
> Cortex-A15 MPCore does not have software readable registers. This
> patch selects the correct method at runtime based on the CPU ID.
> 
> Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> ---
>  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
>  1 file changed, 28 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> index 1b926df..68e76ef 100644
> --- a/arch/arm/mach-tegra/platsmp.c
> +++ b/arch/arm/mach-tegra/platsmp.c
> @@ -23,6 +23,7 @@
>  #include <asm/hardware/gic.h>
>  #include <asm/mach-types.h>
>  #include <asm/smp_scu.h>
> +#include <asm/cputype.h>
>  
>  #include <mach/powergate.h>
>  
> @@ -34,9 +35,13 @@
>  #include "common.h"
>  #include "iomap.h"
>  
> +#define CPU_MASK		0xff0ffff0
> +#define CPU_CORTEX_A9		0x410fc090
> +#define CPU_CORTEX_A15		0x410fc0f0
> +
>  extern void tegra_secondary_startup(void);
>  
> -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
> +static void __iomem *scu_base;
>  
>  #define EVP_CPU_RESET_VECTOR \
>  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
> @@ -149,7 +154,26 @@ done:
>   */
>  static void __init tegra_smp_init_cpus(void)
>  {
> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> +	unsigned int i, cpu_id, ncores;
> +	u32 l2ctlr;
> +	phys_addr_t pa;
> +
> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> +	switch (cpu_id) {
> +	case CPU_CORTEX_A15:
> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> +		break;

[...]

As mentioned last time [1], you should get this information from the dt
instead.

Thanks,
Mark.

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2012-December/138319.html
Hiroshi DOYU Jan. 8, 2013, 2:53 p.m. UTC | #3
Hi Mark,

Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:

> Hello,
> 
> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> > The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > have to read it from the system coprocessor(CP15), because the SCU on
> > Cortex-A15 MPCore does not have software readable registers. This
> > patch selects the correct method at runtime based on the CPU ID.
> > 
> > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> > ---
> >  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
> >  1 file changed, 28 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> > index 1b926df..68e76ef 100644
> > --- a/arch/arm/mach-tegra/platsmp.c
> > +++ b/arch/arm/mach-tegra/platsmp.c
> > @@ -23,6 +23,7 @@
> >  #include <asm/hardware/gic.h>
> >  #include <asm/mach-types.h>
> >  #include <asm/smp_scu.h>
> > +#include <asm/cputype.h>
> >  
> >  #include <mach/powergate.h>
> >  
> > @@ -34,9 +35,13 @@
> >  #include "common.h"
> >  #include "iomap.h"
> >  
> > +#define CPU_MASK		0xff0ffff0
> > +#define CPU_CORTEX_A9		0x410fc090
> > +#define CPU_CORTEX_A15		0x410fc0f0
> > +
> >  extern void tegra_secondary_startup(void);
> >  
> > -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
> > +static void __iomem *scu_base;
> >  
> >  #define EVP_CPU_RESET_VECTOR \
> >  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
> > @@ -149,7 +154,26 @@ done:
> >   */
> >  static void __init tegra_smp_init_cpus(void)
> >  {
> > -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > +	unsigned int i, cpu_id, ncores;
> > +	u32 l2ctlr;
> > +	phys_addr_t pa;
> > +
> > +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > +	switch (cpu_id) {
> > +	case CPU_CORTEX_A15:
> > +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > +		break;
> 
> [...]
> 
> As mentioned last time [1], you should get this information from the dt
> instead.

Most of platsmp.c:.smp_init_cpus() implementations seem just to
overwrite # of cores by SCU/MRC detection. Is there any implementation
to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
Mark Rutland Jan. 8, 2013, 4:21 p.m. UTC | #4
On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
> Hi Mark,
> 
> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
> 
> > Hello,
> > 
> > On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> > > The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > > have to read it from the system coprocessor(CP15), because the SCU on
> > > Cortex-A15 MPCore does not have software readable registers. This
> > > patch selects the correct method at runtime based on the CPU ID.
> > > 
> > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> > > ---
> > >  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
> > >  1 file changed, 28 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> > > index 1b926df..68e76ef 100644
> > > --- a/arch/arm/mach-tegra/platsmp.c
> > > +++ b/arch/arm/mach-tegra/platsmp.c
> > > @@ -23,6 +23,7 @@
> > >  #include <asm/hardware/gic.h>
> > >  #include <asm/mach-types.h>
> > >  #include <asm/smp_scu.h>
> > > +#include <asm/cputype.h>
> > >  
> > >  #include <mach/powergate.h>
> > >  
> > > @@ -34,9 +35,13 @@
> > >  #include "common.h"
> > >  #include "iomap.h"
> > >  
> > > +#define CPU_MASK		0xff0ffff0
> > > +#define CPU_CORTEX_A9		0x410fc090
> > > +#define CPU_CORTEX_A15		0x410fc0f0
> > > +
> > >  extern void tegra_secondary_startup(void);
> > >  
> > > -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
> > > +static void __iomem *scu_base;
> > >  
> > >  #define EVP_CPU_RESET_VECTOR \
> > >  	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
> > > @@ -149,7 +154,26 @@ done:
> > >   */
> > >  static void __init tegra_smp_init_cpus(void)
> > >  {
> > > -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > > +	unsigned int i, cpu_id, ncores;
> > > +	u32 l2ctlr;
> > > +	phys_addr_t pa;
> > > +
> > > +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > > +	switch (cpu_id) {
> > > +	case CPU_CORTEX_A15:
> > > +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > > +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > > +		break;
> > 
> > [...]
> > 
> > As mentioned last time [1], you should get this information from the dt
> > instead.
> 
> Most of platsmp.c:.smp_init_cpus() implementations seem just to
> overwrite # of cores by SCU/MRC detection. Is there any implementation
> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?

As far as I can see, there's no other platform which just relies on
arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
be handled by arm_dt_init_cpus.

I think the best option would be to have a separate smp_ops for your dt
platforms where we know cpu nodes are populated (e.g. Tegra 114), where
smp_init_cpus is different to that for non-dt platforms. That way non dt
platforms can keep the SCU hack for now, and won't be broken, and the dt
platforms are far removed from the SCU hack and just use common infrastructure.

Maybe someone else has a better idea?

Thanks,
Mark.
Lorenzo Pieralisi Jan. 8, 2013, 5:11 p.m. UTC | #5
On Tue, Jan 08, 2013 at 04:21:38PM +0000, Mark Rutland wrote:
> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:

[...]

> > > >  static void __init tegra_smp_init_cpus(void)
> > > >  {
> > > > -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > > > +	unsigned int i, cpu_id, ncores;
> > > > +	u32 l2ctlr;
> > > > +	phys_addr_t pa;
> > > > +
> > > > +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > > > +	switch (cpu_id) {
> > > > +	case CPU_CORTEX_A15:
> > > > +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > > > +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > > > +		break;
> > > 
> > > [...]
> > > 
> > > As mentioned last time [1], you should get this information from the dt
> > > instead.
> > 
> > Most of platsmp.c:.smp_init_cpus() implementations seem just to
> > overwrite # of cores by SCU/MRC detection. Is there any implementation
> > to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> 
> As far as I can see, there's no other platform which just relies on
> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> be handled by arm_dt_init_cpus.
> 
> I think the best option would be to have a separate smp_ops for your dt
> platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> smp_init_cpus is different to that for non-dt platforms. That way non dt
> platforms can keep the SCU hack for now, and won't be broken, and the dt
> platforms are far removed from the SCU hack and just use common infrastructure.
> 
> Maybe someone else has a better idea?

Have a look at mach-vexpress/platsmp.c (keeping in mind that the
GENERIC_SCU case will be refactored/removed since arm_dt_init_cpu_maps() is
doing most of the required steps), please let me know what you think.

Thanks,
Lorenzo
Stephen Warren Jan. 8, 2013, 7:32 p.m. UTC | #6
On 01/08/2013 09:21 AM, Mark Rutland wrote:
> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
>>>> have to read it from the system coprocessor(CP15), because the SCU on
>>>> Cortex-A15 MPCore does not have software readable registers. This
>>>> patch selects the correct method at runtime based on the CPU ID.
...
>>>>  static void __init tegra_smp_init_cpus(void)
>>>>  {
>>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
>>>> +	unsigned int i, cpu_id, ncores;
>>>> +	u32 l2ctlr;
>>>> +	phys_addr_t pa;
>>>> +
>>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
>>>> +	switch (cpu_id) {
>>>> +	case CPU_CORTEX_A15:
>>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
>>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
>>>> +		break;
>>>
>>> [...]
>>>
>>> As mentioned last time [1], you should get this information from the dt
>>> instead.
>>
>> Most of platsmp.c:.smp_init_cpus() implementations seem just to
>> overwrite # of cores by SCU/MRC detection. Is there any implementation
>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> 
> As far as I can see, there's no other platform which just relies on
> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> be handled by arm_dt_init_cpus.
> 
> I think the best option would be to have a separate smp_ops for your dt
> platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> smp_init_cpus is different to that for non-dt platforms. That way non dt
> platforms can keep the SCU hack for now, and won't be broken, and the dt
> platforms are far removed from the SCU hack and just use common infrastructure.

Tegra doesn't have any non-DT support now.

If we're going to make Tegra114 read the CPU information from DT, then
I'd rather just switch over all Tegra platforms to requiring CPU nodes
in the DT. We have few enough SoC variants that it should be easy to
switch everything to DT before adding Tegra114 support without causing
any problems.

But that all said, I'm not convinced it's a good idea to force the
information to be present in DT when it's just duplicating stuff that
can be runtime-probed from the HW...
Hiroshi DOYU Jan. 9, 2013, 5:46 a.m. UTC | #7
Russell King - ARM Linux <linux@arm.linux.org.uk> wrote @ Tue, 8 Jan 2013 15:26:51 +0100:

> On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote:
> > The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > have to read it from the system coprocessor(CP15), because the SCU on
> > Cortex-A15 MPCore does not have software readable registers. This
> > patch selects the correct method at runtime based on the CPU ID.
> > 
> > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> > ---
> >  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
> >  1 file changed, 28 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> > index 1b926df..68e76ef 100644
> > --- a/arch/arm/mach-tegra/platsmp.c
> > +++ b/arch/arm/mach-tegra/platsmp.c
> > @@ -23,6 +23,7 @@
> >  #include <asm/hardware/gic.h>
> >  #include <asm/mach-types.h>
> >  #include <asm/smp_scu.h>
> > +#include <asm/cputype.h>
> >  
> >  #include <mach/powergate.h>
> >  
> > @@ -34,9 +35,13 @@
> >  #include "common.h"
> >  #include "iomap.h"
> >  
> > +#define CPU_MASK		0xff0ffff0
> > +#define CPU_CORTEX_A9		0x410fc090
> > +#define CPU_CORTEX_A15		0x410fc0f0
> 
> NAK. There's some patches around to make this stuff generic, we don't
> need more ifdefs springing up.  We need to get those generic patches
> in.

Does anyone give some pointers to the above patches?
Hiroshi DOYU Jan. 9, 2013, 5:49 a.m. UTC | #8
Hi,

Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100:

> On 01/08/2013 09:21 AM, Mark Rutland wrote:
> > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
> >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
> >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> >>>> have to read it from the system coprocessor(CP15), because the SCU on
> >>>> Cortex-A15 MPCore does not have software readable registers. This
> >>>> patch selects the correct method at runtime based on the CPU ID.
> ...
> >>>>  static void __init tegra_smp_init_cpus(void)
> >>>>  {
> >>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> >>>> +	unsigned int i, cpu_id, ncores;
> >>>> +	u32 l2ctlr;
> >>>> +	phys_addr_t pa;
> >>>> +
> >>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> >>>> +	switch (cpu_id) {
> >>>> +	case CPU_CORTEX_A15:
> >>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> >>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> >>>> +		break;
> >>>
> >>> [...]
> >>>
> >>> As mentioned last time [1], you should get this information from the dt
> >>> instead.
> >>
> >> Most of platsmp.c:.smp_init_cpus() implementations seem just to
> >> overwrite # of cores by SCU/MRC detection. Is there any implementation
> >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> > 
> > As far as I can see, there's no other platform which just relies on
> > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> > be handled by arm_dt_init_cpus.

True.

> > I think the best option would be to have a separate smp_ops for your dt
> > platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> > smp_init_cpus is different to that for non-dt platforms. That way non dt
> > platforms can keep the SCU hack for now, and won't be broken, and the dt
> > platforms are far removed from the SCU hack and just use common
> > infrastructure.
>
> Tegra doesn't have any non-DT support now.

What about falling down to SCU/MRC hack only when DT detection fails
or no /cpus entry in DT?

Can arm_dt_init_cpu_maps() return if it suceeds or not?

> 
> If we're going to make Tegra114 read the CPU information from DT, then
> I'd rather just switch over all Tegra platforms to requiring CPU nodes
> in the DT. We have few enough SoC variants that it should be easy to
> switch everything to DT before adding Tegra114 support without causing
> any problems.

Possible.

> But that all said, I'm not convinced it's a good idea to force the
> information to be present in DT when it's just duplicating stuff that
> can be runtime-probed from the HW...

That's also my concern somewhat.

The main purpose to DT CPU core detection is for multiple CPU
clusters. SCU/MRC cannot know anything outside of their own
clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow
core. Switching those clusters is done transparently. The last A9/A15
core is transparently switched back and forth to the shadow one. So in
this case, SCU/MRC detection seems to work fine.
Joseph Lo Jan. 9, 2013, 6:07 a.m. UTC | #9
On Wed, 2013-01-09 at 13:46 +0800, Hiroshi Doyu wrote:
> Russell King - ARM Linux <linux@arm.linux.org.uk> wrote @ Tue, 8 Jan 2013 15:26:51 +0100:
> 
> > On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote:
> > > The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > > have to read it from the system coprocessor(CP15), because the SCU on
> > > Cortex-A15 MPCore does not have software readable registers. This
> > > patch selects the correct method at runtime based on the CPU ID.
> > > 
> > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> > > ---
> > >  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
> > >  1 file changed, 28 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> > > index 1b926df..68e76ef 100644
> > > --- a/arch/arm/mach-tegra/platsmp.c
> > > +++ b/arch/arm/mach-tegra/platsmp.c
> > > @@ -23,6 +23,7 @@
> > >  #include <asm/hardware/gic.h>
> > >  #include <asm/mach-types.h>
> > >  #include <asm/smp_scu.h>
> > > +#include <asm/cputype.h>
> > >  
> > >  #include <mach/powergate.h>
> > >  
> > > @@ -34,9 +35,13 @@
> > >  #include "common.h"
> > >  #include "iomap.h"
> > >  
> > > +#define CPU_MASK		0xff0ffff0
> > > +#define CPU_CORTEX_A9		0x410fc090
> > > +#define CPU_CORTEX_A15		0x410fc0f0
> > 
> > NAK. There's some patches around to make this stuff generic, we don't
> > need more ifdefs springing up.  We need to get those generic patches
> > in.
> 
> Does anyone give some pointers to the above patches?

Hi Hiroshi,

Please check "arch/arm/include/asm/cputype.h" in next-20130109. It just
be merged.
Hiroshi DOYU Jan. 9, 2013, 6:25 a.m. UTC | #10
Hi Joseph,

Joseph Lo <josephl@nvidia.com> wrote @ Wed, 9 Jan 2013 07:07:41 +0100:

> On Wed, 2013-01-09 at 13:46 +0800, Hiroshi Doyu wrote:
> > Russell King - ARM Linux <linux@arm.linux.org.uk> wrote @ Tue, 8 Jan 2013 15:26:51 +0100:
> > 
> > > On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote:
> > > > The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > > > have to read it from the system coprocessor(CP15), because the SCU on
> > > > Cortex-A15 MPCore does not have software readable registers. This
> > > > patch selects the correct method at runtime based on the CPU ID.
> > > > 
> > > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com>
> > > > ---
> > > >  arch/arm/mach-tegra/platsmp.c |   31 ++++++++++++++++++++++++++++---
> > > >  1 file changed, 28 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
> > > > index 1b926df..68e76ef 100644
> > > > --- a/arch/arm/mach-tegra/platsmp.c
> > > > +++ b/arch/arm/mach-tegra/platsmp.c
> > > > @@ -23,6 +23,7 @@
> > > >  #include <asm/hardware/gic.h>
> > > >  #include <asm/mach-types.h>
> > > >  #include <asm/smp_scu.h>
> > > > +#include <asm/cputype.h>
> > > >  
> > > >  #include <mach/powergate.h>
> > > >  
> > > > @@ -34,9 +35,13 @@
> > > >  #include "common.h"
> > > >  #include "iomap.h"
> > > >  
> > > > +#define CPU_MASK		0xff0ffff0
> > > > +#define CPU_CORTEX_A9		0x410fc090
> > > > +#define CPU_CORTEX_A15		0x410fc0f0
> > > 
> > > NAK. There's some patches around to make this stuff generic, we don't
> > > need more ifdefs springing up.  We need to get those generic patches
> > > in.
> > 
> > Does anyone give some pointers to the above patches?
> 
> Hi Hiroshi,
> 
> Please check "arch/arm/include/asm/cputype.h" in next-20130109. It just
> be merged.

I found the following commit. Thanks.

commit cd8dde6fc347b56b6cb9afa7cc0f9073da163176
Author: Christoffer Dall <c.dall@virtualopensystems.com>
Date:   Tue Dec 18 04:06:37 2012 +0000

    ARM: Define CPU part numbers and implementors
    
    Define implementor IDs, part numbers and Xscale architecture versions in
    cputype.h.  Also create accessor functions for reading the implementor,
    part number, and Xscale architecture versions from the CPUID regiser.
    
    Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
    Signed-off-by: Will Deacon <will.deacon@arm.com>
Lorenzo Pieralisi Jan. 9, 2013, 11:34 a.m. UTC | #11
On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote:
> Hi,
> 
> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100:
> 
> > On 01/08/2013 09:21 AM, Mark Rutland wrote:
> > > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
> > >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
> > >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> > >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > >>>> have to read it from the system coprocessor(CP15), because the SCU on
> > >>>> Cortex-A15 MPCore does not have software readable registers. This
> > >>>> patch selects the correct method at runtime based on the CPU ID.
> > ...
> > >>>>  static void __init tegra_smp_init_cpus(void)
> > >>>>  {
> > >>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > >>>> +	unsigned int i, cpu_id, ncores;
> > >>>> +	u32 l2ctlr;
> > >>>> +	phys_addr_t pa;
> > >>>> +
> > >>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > >>>> +	switch (cpu_id) {
> > >>>> +	case CPU_CORTEX_A15:
> > >>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > >>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > >>>> +		break;
> > >>>
> > >>> [...]
> > >>>
> > >>> As mentioned last time [1], you should get this information from the dt
> > >>> instead.
> > >>
> > >> Most of platsmp.c:.smp_init_cpus() implementations seem just to
> > >> overwrite # of cores by SCU/MRC detection. Is there any implementation
> > >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> > > 
> > > As far as I can see, there's no other platform which just relies on
> > > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> > > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> > > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> > > be handled by arm_dt_init_cpus.
> 
> True.
> 
> > > I think the best option would be to have a separate smp_ops for your dt
> > > platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> > > smp_init_cpus is different to that for non-dt platforms. That way non dt
> > > platforms can keep the SCU hack for now, and won't be broken, and the dt
> > > platforms are far removed from the SCU hack and just use common
> > > infrastructure.
> >
> > Tegra doesn't have any non-DT support now.
> 
> What about falling down to SCU/MRC hack only when DT detection fails
> or no /cpus entry in DT?
> 
> Can arm_dt_init_cpu_maps() return if it suceeds or not?

I think the best solution to this problem consists in /me adding a
function, say, arm_dt_nb_cores(), that returns an error if DT probing
failed and the number of cores if it succeeded.

If it fails, you can fall back to SCU detection of cores for legacy
reasons (A9), but from A15 onwards I have quite a strong feeling against
using L2 as number of cores detection mechanism since it does not work
for multi-cluster systems. Fair enough, on Tegra this is not a problem
(now) but I do not want this method to propagate to other platforms where
multi-cluster number of cores detection will fail if we rely on the L2
mechanism.

This means that if the DT /cpu nodes are updated for all Tegra
platforms, DT becomes the mechanism to detect the number of cores
for all of them.

If you allow the A9 SCU mechanism to override the DT information (even
if it is correct) we might end up in the rather unwieldy situation where
there is a disconnect between the cpu_logical_map initialized entries and
the HW probed number of cores (if the DT bindings are correct the
cpu_logical_map entries are initialized by DT).

The gist is:

- if DT /cpu nodes are there and defined properly, we build the
  cpu_logical_map and count the number of cores from DT
- if DT /cpu nodes are missing or bogus we rely on the cpu_logical_map
  in smp_setup_processor_id() and fall back to legacy mechanism to detect
  the number of cores. This just for legacy boards/SoC.

Let me know if this is reasonable please.

> 
> > 
> > If we're going to make Tegra114 read the CPU information from DT, then
> > I'd rather just switch over all Tegra platforms to requiring CPU nodes
> > in the DT. We have few enough SoC variants that it should be easy to
> > switch everything to DT before adding Tegra114 support without causing
> > any problems.
> 
> Possible.
> 
> > But that all said, I'm not convinced it's a good idea to force the
> > information to be present in DT when it's just duplicating stuff that
> > can be runtime-probed from the HW...
> 
> That's also my concern somewhat.
> 
> The main purpose to DT CPU core detection is for multiple CPU
> clusters. SCU/MRC cannot know anything outside of their own
> clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow
> core. Switching those clusters is done transparently. The last A9/A15
> core is transparently switched back and forth to the shadow one. So in
> this case, SCU/MRC detection seems to work fine.

You can probe the number of cores through SCU but remember
that MPIDRs of existing CPUs are generated, not probed if the DT is not
present (smp_setup_processor_id()). cpu_logical_map content, before the
DT bindings were introduced, or even now if DT /cpu nodes are bogus, is
simply generated as a sequential number but is not *probeable* per-se.
To tell the truth, this works well for all existing ARM platforms in the
mainline but moving forward we'd better stick at bits of information provided
by the DT, at least that's my opinion.

Thoughts ?

Lorenzo
Stephen Warren Jan. 9, 2013, 4:17 p.m. UTC | #12
On 01/09/2013 04:34 AM, Lorenzo Pieralisi wrote:
> On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote:
>> Hi,
>>
>> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100:
>>
>>> On 01/08/2013 09:21 AM, Mark Rutland wrote:
>>>> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
>>>>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
>>>>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
>>>>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
>>>>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
>>>>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
>>>>>>> have to read it from the system coprocessor(CP15), because the SCU on
>>>>>>> Cortex-A15 MPCore does not have software readable registers. This
>>>>>>> patch selects the correct method at runtime based on the CPU ID.
>>> ...
>>>>>>>  static void __init tegra_smp_init_cpus(void)
>>>>>>>  {
>>>>>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
>>>>>>> +	unsigned int i, cpu_id, ncores;
>>>>>>> +	u32 l2ctlr;
>>>>>>> +	phys_addr_t pa;
>>>>>>> +
>>>>>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
>>>>>>> +	switch (cpu_id) {
>>>>>>> +	case CPU_CORTEX_A15:
>>>>>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
>>>>>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
>>>>>>> +		break;
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>> As mentioned last time [1], you should get this information from the dt
>>>>>> instead.
>>>>>
>>>>> Most of platsmp.c:.smp_init_cpus() implementations seem just to
>>>>> overwrite # of cores by SCU/MRC detection. Is there any implementation
>>>>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
>>>>
>>>> As far as I can see, there's no other platform which just relies on
>>>> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
>>>> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
>>>> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
>>>> be handled by arm_dt_init_cpus.
>>
>> True.
>>
>>>> I think the best option would be to have a separate smp_ops for your dt
>>>> platforms where we know cpu nodes are populated (e.g. Tegra 114), where
>>>> smp_init_cpus is different to that for non-dt platforms. That way non dt
>>>> platforms can keep the SCU hack for now, and won't be broken, and the dt
>>>> platforms are far removed from the SCU hack and just use common
>>>> infrastructure.
>>>
>>> Tegra doesn't have any non-DT support now.
>>
>> What about falling down to SCU/MRC hack only when DT detection fails
>> or no /cpus entry in DT?
>>
>> Can arm_dt_init_cpu_maps() return if it suceeds or not?
> 
> I think the best solution to this problem consists in /me adding a
> function, say, arm_dt_nb_cores(), that returns an error if DT probing
> failed and the number of cores if it succeeded.

Well, if the primary mechanism needs to be DT-based going forward, I'd
say just require the DT nodes to be present to use the new function, and
outright fail if they aren't. That way, everything always works one way.
It shouldn't be hard to add the CPU nodes for Tegra, right?
Lorenzo Pieralisi Jan. 9, 2013, 6:07 p.m. UTC | #13
On Wed, Jan 09, 2013 at 04:17:15PM +0000, Stephen Warren wrote:
> On 01/09/2013 04:34 AM, Lorenzo Pieralisi wrote:
> > On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote:
> >> Hi,
> >>
> >> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100:
> >>
> >>> On 01/08/2013 09:21 AM, Mark Rutland wrote:
> >>>> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
> >>>>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
> >>>>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> >>>>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> >>>>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> >>>>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> >>>>>>> have to read it from the system coprocessor(CP15), because the SCU on
> >>>>>>> Cortex-A15 MPCore does not have software readable registers. This
> >>>>>>> patch selects the correct method at runtime based on the CPU ID.
> >>> ...
> >>>>>>>  static void __init tegra_smp_init_cpus(void)
> >>>>>>>  {
> >>>>>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> >>>>>>> +	unsigned int i, cpu_id, ncores;
> >>>>>>> +	u32 l2ctlr;
> >>>>>>> +	phys_addr_t pa;
> >>>>>>> +
> >>>>>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> >>>>>>> +	switch (cpu_id) {
> >>>>>>> +	case CPU_CORTEX_A15:
> >>>>>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> >>>>>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> >>>>>>> +		break;
> >>>>>>
> >>>>>> [...]
> >>>>>>
> >>>>>> As mentioned last time [1], you should get this information from the dt
> >>>>>> instead.
> >>>>>
> >>>>> Most of platsmp.c:.smp_init_cpus() implementations seem just to
> >>>>> overwrite # of cores by SCU/MRC detection. Is there any implementation
> >>>>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> >>>>
> >>>> As far as I can see, there's no other platform which just relies on
> >>>> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> >>>> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> >>>> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> >>>> be handled by arm_dt_init_cpus.
> >>
> >> True.
> >>
> >>>> I think the best option would be to have a separate smp_ops for your dt
> >>>> platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> >>>> smp_init_cpus is different to that for non-dt platforms. That way non dt
> >>>> platforms can keep the SCU hack for now, and won't be broken, and the dt
> >>>> platforms are far removed from the SCU hack and just use common
> >>>> infrastructure.
> >>>
> >>> Tegra doesn't have any non-DT support now.
> >>
> >> What about falling down to SCU/MRC hack only when DT detection fails
> >> or no /cpus entry in DT?
> >>
> >> Can arm_dt_init_cpu_maps() return if it suceeds or not?
> > 
> > I think the best solution to this problem consists in /me adding a
> > function, say, arm_dt_nb_cores(), that returns an error if DT probing
> > failed and the number of cores if it succeeded.
> 
> Well, if the primary mechanism needs to be DT-based going forward, I'd
> say just require the DT nodes to be present to use the new function, and
> outright fail if they aren't. That way, everything always works one way.
> It shouldn't be hard to add the CPU nodes for Tegra, right?

Adding /cpu nodes to DT is trivial, but I would like to keep a fall back
mechanism in place for existing platforms to carry out the transition
as smoothly as possible (eg people using new kernels with old DTS).

/cpu nodes will have priority over HW based probing for cores counting.

As discussed with Hiroshi I will post a patch to provide platforms with
a DT cpu map validity check so that the fall back mechanism can be
triggered properly.

Thank you very much,
Lorenzo
Hiroshi DOYU Jan. 10, 2013, 6:31 a.m. UTC | #14
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote @ Wed, 9 Jan 2013 12:34:32 +0100:

> On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote:
> > Hi,
> > 
> > Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100:
> > 
> > > On 01/08/2013 09:21 AM, Mark Rutland wrote:
> > > > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
> > > >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
> > > >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> > > >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > > >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > > >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > > >>>> have to read it from the system coprocessor(CP15), because the SCU on
> > > >>>> Cortex-A15 MPCore does not have software readable registers. This
> > > >>>> patch selects the correct method at runtime based on the CPU ID.
> > > ...
> > > >>>>  static void __init tegra_smp_init_cpus(void)
> > > >>>>  {
> > > >>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > > >>>> +	unsigned int i, cpu_id, ncores;
> > > >>>> +	u32 l2ctlr;
> > > >>>> +	phys_addr_t pa;
> > > >>>> +
> > > >>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > > >>>> +	switch (cpu_id) {
> > > >>>> +	case CPU_CORTEX_A15:
> > > >>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > > >>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > > >>>> +		break;
> > > >>>
> > > >>> [...]
> > > >>>
> > > >>> As mentioned last time [1], you should get this information from the dt
> > > >>> instead.
> > > >>
> > > >> Most of platsmp.c:.smp_init_cpus() implementations seem just to
> > > >> overwrite # of cores by SCU/MRC detection. Is there any implementation
> > > >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> > > > 
> > > > As far as I can see, there's no other platform which just relies on
> > > > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> > > > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> > > > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> > > > be handled by arm_dt_init_cpus.
> > 
> > True.
> > 
> > > > I think the best option would be to have a separate smp_ops for your dt
> > > > platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> > > > smp_init_cpus is different to that for non-dt platforms. That way non dt
> > > > platforms can keep the SCU hack for now, and won't be broken, and the dt
> > > > platforms are far removed from the SCU hack and just use common
> > > > infrastructure.
> > >
> > > Tegra doesn't have any non-DT support now.
> > 
> > What about falling down to SCU/MRC hack only when DT detection fails
> > or no /cpus entry in DT?
> > 
> > Can arm_dt_init_cpu_maps() return if it suceeds or not?
> 
> I think the best solution to this problem consists in /me adding a
> function, say, arm_dt_nb_cores(), that returns an error if DT probing
> failed and the number of cores if it succeeded.
> 
> If it fails, you can fall back to SCU detection of cores for legacy
> reasons (A9), but from A15 onwards I have quite a strong feeling against
> using L2 as number of cores detection mechanism since it does not work
> for multi-cluster systems. Fair enough, on Tegra this is not a problem
> (now) but I do not want this method to propagate to other platforms where
> multi-cluster number of cores detection will fail if we rely on the L2
> mechanism.
> 
> This means that if the DT /cpu nodes are updated for all Tegra
> platforms, DT becomes the mechanism to detect the number of cores
> for all of them.
>
> If you allow the A9 SCU mechanism to override the DT information (even
> if it is correct) we might end up in the rather unwieldy situation where
> there is a disconnect between the cpu_logical_map initialized entries and
> the HW probed number of cores (if the DT bindings are correct the
> cpu_logical_map entries are initialized by DT).
> 
> The gist is:
> 
> - if DT /cpu nodes are there and defined properly, we build the
>   cpu_logical_map and count the number of cores from DT
> - if DT /cpu nodes are missing or bogus we rely on the cpu_logical_map
>   in smp_setup_processor_id() and fall back to legacy mechanism to detect
>   the number of cores. This just for legacy boards/SoC.
> 
> Let me know if this is reasonable please.

Sounds quite reasonable to me.

Please review another mail which falls back to SCU/MRC detection when
arm_dt_init_cpu_maps() fails.

> > > If we're going to make Tegra114 read the CPU information from DT, then
> > > I'd rather just switch over all Tegra platforms to requiring CPU nodes
> > > in the DT. We have few enough SoC variants that it should be easy to
> > > switch everything to DT before adding Tegra114 support without causing
> > > any problems.
> > 
> > Possible.
> > 
> > > But that all said, I'm not convinced it's a good idea to force the
> > > information to be present in DT when it's just duplicating stuff that
> > > can be runtime-probed from the HW...
> > 
> > That's also my concern somewhat.
> > 
> > The main purpose to DT CPU core detection is for multiple CPU
> > clusters. SCU/MRC cannot know anything outside of their own
> > clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow
> > core. Switching those clusters is done transparently. The last A9/A15
> > core is transparently switched back and forth to the shadow one. So in
> > this case, SCU/MRC detection seems to work fine.
> 
> You can probe the number of cores through SCU but remember
> that MPIDRs of existing CPUs are generated, not probed if the DT is not
> present (smp_setup_processor_id()). cpu_logical_map content, before the
> DT bindings were introduced, or even now if DT /cpu nodes are bogus, is
> simply generated as a sequential number but is not *probeable* per-se.

Honestly I don't understand the above so well. Could you please
explain a little bit more?
Hiroshi DOYU Jan. 10, 2013, 6:53 a.m. UTC | #15
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote @ Wed, 9 Jan 2013 19:07:13 +0100:

> On Wed, Jan 09, 2013 at 04:17:15PM +0000, Stephen Warren wrote:
> > On 01/09/2013 04:34 AM, Lorenzo Pieralisi wrote:
> > > On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote:
> > >> Hi,
> > >>
> > >> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100:
> > >>
> > >>> On 01/08/2013 09:21 AM, Mark Rutland wrote:
> > >>>> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote:
> > >>>>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100:
> > >>>>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote:
> > >>>>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and
> > >>>>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this
> > >>>>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we
> > >>>>>>> have to read it from the system coprocessor(CP15), because the SCU on
> > >>>>>>> Cortex-A15 MPCore does not have software readable registers. This
> > >>>>>>> patch selects the correct method at runtime based on the CPU ID.
> > >>> ...
> > >>>>>>>  static void __init tegra_smp_init_cpus(void)
> > >>>>>>>  {
> > >>>>>>> -	unsigned int i, ncores = scu_get_core_count(scu_base);
> > >>>>>>> +	unsigned int i, cpu_id, ncores;
> > >>>>>>> +	u32 l2ctlr;
> > >>>>>>> +	phys_addr_t pa;
> > >>>>>>> +
> > >>>>>>> +	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
> > >>>>>>> +	switch (cpu_id) {
> > >>>>>>> +	case CPU_CORTEX_A15:
> > >>>>>>> +		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
> > >>>>>>> +		ncores = ((l2ctlr >> 24) & 3) + 1;
> > >>>>>>> +		break;
> > >>>>>>
> > >>>>>> [...]
> > >>>>>>
> > >>>>>> As mentioned last time [1], you should get this information from the dt
> > >>>>>> instead.
> > >>>>>
> > >>>>> Most of platsmp.c:.smp_init_cpus() implementations seem just to
> > >>>>> overwrite # of cores by SCU/MRC detection. Is there any implementation
> > >>>>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
> > >>>>
> > >>>> As far as I can see, there's no other platform which just relies on
> > >>>> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some
> > >>>> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus
> > >>>> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to
> > >>>> be handled by arm_dt_init_cpus.
> > >>
> > >> True.
> > >>
> > >>>> I think the best option would be to have a separate smp_ops for your dt
> > >>>> platforms where we know cpu nodes are populated (e.g. Tegra 114), where
> > >>>> smp_init_cpus is different to that for non-dt platforms. That way non dt
> > >>>> platforms can keep the SCU hack for now, and won't be broken, and the dt
> > >>>> platforms are far removed from the SCU hack and just use common
> > >>>> infrastructure.
> > >>>
> > >>> Tegra doesn't have any non-DT support now.
> > >>
> > >> What about falling down to SCU/MRC hack only when DT detection fails
> > >> or no /cpus entry in DT?
> > >>
> > >> Can arm_dt_init_cpu_maps() return if it suceeds or not?
> > > 
> > > I think the best solution to this problem consists in /me adding a
> > > function, say, arm_dt_nb_cores(), that returns an error if DT probing
> > > failed and the number of cores if it succeeded.
> > 
> > Well, if the primary mechanism needs to be DT-based going forward, I'd
> > say just require the DT nodes to be present to use the new function, and
> > outright fail if they aren't. That way, everything always works one way.
> > It shouldn't be hard to add the CPU nodes for Tegra, right?
> 
> Adding /cpu nodes to DT is trivial, but I would like to keep a fall back
> mechanism in place for existing platforms to carry out the transition
> as smoothly as possible (eg people using new kernels with old DTS).
> 
> /cpu nodes will have priority over HW based probing for cores counting.
> 
> As discussed with Hiroshi I will post a patch to provide platforms with
> a DT cpu map validity check so that the fall back mechanism can be
> triggered properly.

I'll add /cpu nodes for all tegra, anyway.

Until your patch comes, I'll make a code just consider a single core
if DT fails at first. So T114 initial support patch could be merged as
well.
Lorenzo Pieralisi Jan. 10, 2013, 9:51 a.m. UTC | #16
On Thu, Jan 10, 2013 at 06:31:43AM +0000, Hiroshi Doyu wrote:
> Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote @ Wed, 9 Jan 2013 12:34:32 +0100:

[...]

> > > > But that all said, I'm not convinced it's a good idea to force the
> > > > information to be present in DT when it's just duplicating stuff that
> > > > can be runtime-probed from the HW...
> > > 
> > > That's also my concern somewhat.
> > > 
> > > The main purpose to DT CPU core detection is for multiple CPU
> > > clusters. SCU/MRC cannot know anything outside of their own
> > > clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow
> > > core. Switching those clusters is done transparently. The last A9/A15
> > > core is transparently switched back and forth to the shadow one. So in
> > > this case, SCU/MRC detection seems to work fine.
> > 
> > You can probe the number of cores through SCU but remember
> > that MPIDRs of existing CPUs are generated, not probed if the DT is not
> > present (smp_setup_processor_id()). cpu_logical_map content, before the
> > DT bindings were introduced, or even now if DT /cpu nodes are bogus, is
> > simply generated as a sequential number but is not *probeable* per-se.
> 
> Honestly I don't understand the above so well. Could you please
> explain a little bit more?

The DT /cpu bindings provide the kernel with a list of MPIDR ("reg"
property) of all CPUs in the system. What I wanted to say is that, you
can probe in HW the number of cores in some situations, but the DT
provides more information than that, so I was debating the "duplicating
stuff that can be runtime probed" point.

In some platforms you can probe the number of cores in HW, but the MPIDR
of existing CPUs is "generated" in smp_setup_processor_id() (you cannot
probe the MPIDR of CPUs that are off or held in a pen), as a sequential number
and stored in the cpu_logical_map. This works well for single cluster systems,
since the MPIDR[23:0] of processors in a single cluster system is a sequential
number starting from 0 (well, at least MPIDR[7:0], but let's not get bogged
down into details).

For multi-cluster systems this fails, and DT comes to the rescue along
with its companion parsing function, arm_dt_init_cpu_maps().

The long and short of it is: DT /cpu nodes are not there just for number
of cores counting, but there is more. Probably on Tegra this is not
needed at the moment, but this provides the groundwork for a generic and
solid solution for all upcoming ARM platforms.

I hope this helps, let me know if it is not clear.

Lorenzo
diff mbox

Patch

diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c
index 1b926df..68e76ef 100644
--- a/arch/arm/mach-tegra/platsmp.c
+++ b/arch/arm/mach-tegra/platsmp.c
@@ -23,6 +23,7 @@ 
 #include <asm/hardware/gic.h>
 #include <asm/mach-types.h>
 #include <asm/smp_scu.h>
+#include <asm/cputype.h>
 
 #include <mach/powergate.h>
 
@@ -34,9 +35,13 @@ 
 #include "common.h"
 #include "iomap.h"
 
+#define CPU_MASK		0xff0ffff0
+#define CPU_CORTEX_A9		0x410fc090
+#define CPU_CORTEX_A15		0x410fc0f0
+
 extern void tegra_secondary_startup(void);
 
-static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE);
+static void __iomem *scu_base;
 
 #define EVP_CPU_RESET_VECTOR \
 	(IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100)
@@ -149,7 +154,26 @@  done:
  */
 static void __init tegra_smp_init_cpus(void)
 {
-	unsigned int i, ncores = scu_get_core_count(scu_base);
+	unsigned int i, cpu_id, ncores;
+	u32 l2ctlr;
+	phys_addr_t pa;
+
+	cpu_id = read_cpuid(CPUID_ID) & CPU_MASK;
+	switch (cpu_id) {
+	case CPU_CORTEX_A15:
+		asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr));
+		ncores = ((l2ctlr >> 24) & 3) + 1;
+		break;
+	case CPU_CORTEX_A9:
+		/* Get SCU physical base */
+		asm("mrc p15, 4, %0, c15, c0, 0" : "=r" (pa));
+		scu_base = IO_ADDRESS(pa);
+		ncores = scu_get_core_count(scu_base);
+		break;
+	default:
+		BUG();
+		break;
+	}
 
 	if (ncores > nr_cpu_ids) {
 		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
@@ -166,7 +190,8 @@  static void __init tegra_smp_init_cpus(void)
 static void __init tegra_smp_prepare_cpus(unsigned int max_cpus)
 {
 	tegra_cpu_reset_handler_init();
-	scu_enable(scu_base);
+	if (scu_base)
+		scu_enable(scu_base);
 }
 
 struct smp_operations tegra_smp_ops __initdata = {