Message ID | 1357649263-1098-4-git-send-email-hdoyu@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote: > The method to detect the number of CPU cores on Cortex-A9 MPCore and > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > have to read it from the system coprocessor(CP15), because the SCU on > Cortex-A15 MPCore does not have software readable registers. This > patch selects the correct method at runtime based on the CPU ID. > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > --- > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > 1 file changed, 28 insertions(+), 3 deletions(-) > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > index 1b926df..68e76ef 100644 > --- a/arch/arm/mach-tegra/platsmp.c > +++ b/arch/arm/mach-tegra/platsmp.c > @@ -23,6 +23,7 @@ > #include <asm/hardware/gic.h> > #include <asm/mach-types.h> > #include <asm/smp_scu.h> > +#include <asm/cputype.h> > > #include <mach/powergate.h> > > @@ -34,9 +35,13 @@ > #include "common.h" > #include "iomap.h" > > +#define CPU_MASK 0xff0ffff0 > +#define CPU_CORTEX_A9 0x410fc090 > +#define CPU_CORTEX_A15 0x410fc0f0 NAK. There's some patches around to make this stuff generic, we don't need more ifdefs springing up. We need to get those generic patches in.
Hello, On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > The method to detect the number of CPU cores on Cortex-A9 MPCore and > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > have to read it from the system coprocessor(CP15), because the SCU on > Cortex-A15 MPCore does not have software readable registers. This > patch selects the correct method at runtime based on the CPU ID. > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > --- > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > 1 file changed, 28 insertions(+), 3 deletions(-) > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > index 1b926df..68e76ef 100644 > --- a/arch/arm/mach-tegra/platsmp.c > +++ b/arch/arm/mach-tegra/platsmp.c > @@ -23,6 +23,7 @@ > #include <asm/hardware/gic.h> > #include <asm/mach-types.h> > #include <asm/smp_scu.h> > +#include <asm/cputype.h> > > #include <mach/powergate.h> > > @@ -34,9 +35,13 @@ > #include "common.h" > #include "iomap.h" > > +#define CPU_MASK 0xff0ffff0 > +#define CPU_CORTEX_A9 0x410fc090 > +#define CPU_CORTEX_A15 0x410fc0f0 > + > extern void tegra_secondary_startup(void); > > -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE); > +static void __iomem *scu_base; > > #define EVP_CPU_RESET_VECTOR \ > (IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100) > @@ -149,7 +154,26 @@ done: > */ > static void __init tegra_smp_init_cpus(void) > { > - unsigned int i, ncores = scu_get_core_count(scu_base); > + unsigned int i, cpu_id, ncores; > + u32 l2ctlr; > + phys_addr_t pa; > + > + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > + switch (cpu_id) { > + case CPU_CORTEX_A15: > + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > + ncores = ((l2ctlr >> 24) & 3) + 1; > + break; [...] As mentioned last time [1], you should get this information from the dt instead. Thanks, Mark. [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2012-December/138319.html
Hi Mark, Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > Hello, > > On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > > The method to detect the number of CPU cores on Cortex-A9 MPCore and > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > have to read it from the system coprocessor(CP15), because the SCU on > > Cortex-A15 MPCore does not have software readable registers. This > > patch selects the correct method at runtime based on the CPU ID. > > > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > > --- > > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > > 1 file changed, 28 insertions(+), 3 deletions(-) > > > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > > index 1b926df..68e76ef 100644 > > --- a/arch/arm/mach-tegra/platsmp.c > > +++ b/arch/arm/mach-tegra/platsmp.c > > @@ -23,6 +23,7 @@ > > #include <asm/hardware/gic.h> > > #include <asm/mach-types.h> > > #include <asm/smp_scu.h> > > +#include <asm/cputype.h> > > > > #include <mach/powergate.h> > > > > @@ -34,9 +35,13 @@ > > #include "common.h" > > #include "iomap.h" > > > > +#define CPU_MASK 0xff0ffff0 > > +#define CPU_CORTEX_A9 0x410fc090 > > +#define CPU_CORTEX_A15 0x410fc0f0 > > + > > extern void tegra_secondary_startup(void); > > > > -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE); > > +static void __iomem *scu_base; > > > > #define EVP_CPU_RESET_VECTOR \ > > (IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100) > > @@ -149,7 +154,26 @@ done: > > */ > > static void __init tegra_smp_init_cpus(void) > > { > > - unsigned int i, ncores = scu_get_core_count(scu_base); > > + unsigned int i, cpu_id, ncores; > > + u32 l2ctlr; > > + phys_addr_t pa; > > + > > + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > > + switch (cpu_id) { > > + case CPU_CORTEX_A15: > > + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > > + ncores = ((l2ctlr >> 24) & 3) + 1; > > + break; > > [...] > > As mentioned last time [1], you should get this information from the dt > instead. Most of platsmp.c:.smp_init_cpus() implementations seem just to overwrite # of cores by SCU/MRC detection. Is there any implementation to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()?
On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: > Hi Mark, > > Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > > > Hello, > > > > On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > > > The method to detect the number of CPU cores on Cortex-A9 MPCore and > > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > > have to read it from the system coprocessor(CP15), because the SCU on > > > Cortex-A15 MPCore does not have software readable registers. This > > > patch selects the correct method at runtime based on the CPU ID. > > > > > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > > > --- > > > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > > > 1 file changed, 28 insertions(+), 3 deletions(-) > > > > > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > > > index 1b926df..68e76ef 100644 > > > --- a/arch/arm/mach-tegra/platsmp.c > > > +++ b/arch/arm/mach-tegra/platsmp.c > > > @@ -23,6 +23,7 @@ > > > #include <asm/hardware/gic.h> > > > #include <asm/mach-types.h> > > > #include <asm/smp_scu.h> > > > +#include <asm/cputype.h> > > > > > > #include <mach/powergate.h> > > > > > > @@ -34,9 +35,13 @@ > > > #include "common.h" > > > #include "iomap.h" > > > > > > +#define CPU_MASK 0xff0ffff0 > > > +#define CPU_CORTEX_A9 0x410fc090 > > > +#define CPU_CORTEX_A15 0x410fc0f0 > > > + > > > extern void tegra_secondary_startup(void); > > > > > > -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE); > > > +static void __iomem *scu_base; > > > > > > #define EVP_CPU_RESET_VECTOR \ > > > (IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100) > > > @@ -149,7 +154,26 @@ done: > > > */ > > > static void __init tegra_smp_init_cpus(void) > > > { > > > - unsigned int i, ncores = scu_get_core_count(scu_base); > > > + unsigned int i, cpu_id, ncores; > > > + u32 l2ctlr; > > > + phys_addr_t pa; > > > + > > > + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > > > + switch (cpu_id) { > > > + case CPU_CORTEX_A15: > > > + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > > > + ncores = ((l2ctlr >> 24) & 3) + 1; > > > + break; > > > > [...] > > > > As mentioned last time [1], you should get this information from the dt > > instead. > > Most of platsmp.c:.smp_init_cpus() implementations seem just to > overwrite # of cores by SCU/MRC detection. Is there any implementation > to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? As far as I can see, there's no other platform which just relies on arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to be handled by arm_dt_init_cpus. I think the best option would be to have a separate smp_ops for your dt platforms where we know cpu nodes are populated (e.g. Tegra 114), where smp_init_cpus is different to that for non-dt platforms. That way non dt platforms can keep the SCU hack for now, and won't be broken, and the dt platforms are far removed from the SCU hack and just use common infrastructure. Maybe someone else has a better idea? Thanks, Mark.
On Tue, Jan 08, 2013 at 04:21:38PM +0000, Mark Rutland wrote: > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: [...] > > > > static void __init tegra_smp_init_cpus(void) > > > > { > > > > - unsigned int i, ncores = scu_get_core_count(scu_base); > > > > + unsigned int i, cpu_id, ncores; > > > > + u32 l2ctlr; > > > > + phys_addr_t pa; > > > > + > > > > + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > > > > + switch (cpu_id) { > > > > + case CPU_CORTEX_A15: > > > > + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > > > > + ncores = ((l2ctlr >> 24) & 3) + 1; > > > > + break; > > > > > > [...] > > > > > > As mentioned last time [1], you should get this information from the dt > > > instead. > > > > Most of platsmp.c:.smp_init_cpus() implementations seem just to > > overwrite # of cores by SCU/MRC detection. Is there any implementation > > to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > > As far as I can see, there's no other platform which just relies on > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > be handled by arm_dt_init_cpus. > > I think the best option would be to have a separate smp_ops for your dt > platforms where we know cpu nodes are populated (e.g. Tegra 114), where > smp_init_cpus is different to that for non-dt platforms. That way non dt > platforms can keep the SCU hack for now, and won't be broken, and the dt > platforms are far removed from the SCU hack and just use common infrastructure. > > Maybe someone else has a better idea? Have a look at mach-vexpress/platsmp.c (keeping in mind that the GENERIC_SCU case will be refactored/removed since arm_dt_init_cpu_maps() is doing most of the required steps), please let me know what you think. Thanks, Lorenzo
On 01/08/2013 09:21 AM, Mark Rutland wrote: > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we >>>> have to read it from the system coprocessor(CP15), because the SCU on >>>> Cortex-A15 MPCore does not have software readable registers. This >>>> patch selects the correct method at runtime based on the CPU ID. ... >>>> static void __init tegra_smp_init_cpus(void) >>>> { >>>> - unsigned int i, ncores = scu_get_core_count(scu_base); >>>> + unsigned int i, cpu_id, ncores; >>>> + u32 l2ctlr; >>>> + phys_addr_t pa; >>>> + >>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; >>>> + switch (cpu_id) { >>>> + case CPU_CORTEX_A15: >>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); >>>> + ncores = ((l2ctlr >> 24) & 3) + 1; >>>> + break; >>> >>> [...] >>> >>> As mentioned last time [1], you should get this information from the dt >>> instead. >> >> Most of platsmp.c:.smp_init_cpus() implementations seem just to >> overwrite # of cores by SCU/MRC detection. Is there any implementation >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > > As far as I can see, there's no other platform which just relies on > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > be handled by arm_dt_init_cpus. > > I think the best option would be to have a separate smp_ops for your dt > platforms where we know cpu nodes are populated (e.g. Tegra 114), where > smp_init_cpus is different to that for non-dt platforms. That way non dt > platforms can keep the SCU hack for now, and won't be broken, and the dt > platforms are far removed from the SCU hack and just use common infrastructure. Tegra doesn't have any non-DT support now. If we're going to make Tegra114 read the CPU information from DT, then I'd rather just switch over all Tegra platforms to requiring CPU nodes in the DT. We have few enough SoC variants that it should be easy to switch everything to DT before adding Tegra114 support without causing any problems. But that all said, I'm not convinced it's a good idea to force the information to be present in DT when it's just duplicating stuff that can be runtime-probed from the HW...
Russell King - ARM Linux <linux@arm.linux.org.uk> wrote @ Tue, 8 Jan 2013 15:26:51 +0100: > On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote: > > The method to detect the number of CPU cores on Cortex-A9 MPCore and > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > have to read it from the system coprocessor(CP15), because the SCU on > > Cortex-A15 MPCore does not have software readable registers. This > > patch selects the correct method at runtime based on the CPU ID. > > > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > > --- > > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > > 1 file changed, 28 insertions(+), 3 deletions(-) > > > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > > index 1b926df..68e76ef 100644 > > --- a/arch/arm/mach-tegra/platsmp.c > > +++ b/arch/arm/mach-tegra/platsmp.c > > @@ -23,6 +23,7 @@ > > #include <asm/hardware/gic.h> > > #include <asm/mach-types.h> > > #include <asm/smp_scu.h> > > +#include <asm/cputype.h> > > > > #include <mach/powergate.h> > > > > @@ -34,9 +35,13 @@ > > #include "common.h" > > #include "iomap.h" > > > > +#define CPU_MASK 0xff0ffff0 > > +#define CPU_CORTEX_A9 0x410fc090 > > +#define CPU_CORTEX_A15 0x410fc0f0 > > NAK. There's some patches around to make this stuff generic, we don't > need more ifdefs springing up. We need to get those generic patches > in. Does anyone give some pointers to the above patches?
Hi, Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100: > On 01/08/2013 09:21 AM, Mark Rutland wrote: > > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: > >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and > >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > >>>> have to read it from the system coprocessor(CP15), because the SCU on > >>>> Cortex-A15 MPCore does not have software readable registers. This > >>>> patch selects the correct method at runtime based on the CPU ID. > ... > >>>> static void __init tegra_smp_init_cpus(void) > >>>> { > >>>> - unsigned int i, ncores = scu_get_core_count(scu_base); > >>>> + unsigned int i, cpu_id, ncores; > >>>> + u32 l2ctlr; > >>>> + phys_addr_t pa; > >>>> + > >>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > >>>> + switch (cpu_id) { > >>>> + case CPU_CORTEX_A15: > >>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > >>>> + ncores = ((l2ctlr >> 24) & 3) + 1; > >>>> + break; > >>> > >>> [...] > >>> > >>> As mentioned last time [1], you should get this information from the dt > >>> instead. > >> > >> Most of platsmp.c:.smp_init_cpus() implementations seem just to > >> overwrite # of cores by SCU/MRC detection. Is there any implementation > >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > > > > As far as I can see, there's no other platform which just relies on > > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > > be handled by arm_dt_init_cpus. True. > > I think the best option would be to have a separate smp_ops for your dt > > platforms where we know cpu nodes are populated (e.g. Tegra 114), where > > smp_init_cpus is different to that for non-dt platforms. That way non dt > > platforms can keep the SCU hack for now, and won't be broken, and the dt > > platforms are far removed from the SCU hack and just use common > > infrastructure. > > Tegra doesn't have any non-DT support now. What about falling down to SCU/MRC hack only when DT detection fails or no /cpus entry in DT? Can arm_dt_init_cpu_maps() return if it suceeds or not? > > If we're going to make Tegra114 read the CPU information from DT, then > I'd rather just switch over all Tegra platforms to requiring CPU nodes > in the DT. We have few enough SoC variants that it should be easy to > switch everything to DT before adding Tegra114 support without causing > any problems. Possible. > But that all said, I'm not convinced it's a good idea to force the > information to be present in DT when it's just duplicating stuff that > can be runtime-probed from the HW... That's also my concern somewhat. The main purpose to DT CPU core detection is for multiple CPU clusters. SCU/MRC cannot know anything outside of their own clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow core. Switching those clusters is done transparently. The last A9/A15 core is transparently switched back and forth to the shadow one. So in this case, SCU/MRC detection seems to work fine.
On Wed, 2013-01-09 at 13:46 +0800, Hiroshi Doyu wrote: > Russell King - ARM Linux <linux@arm.linux.org.uk> wrote @ Tue, 8 Jan 2013 15:26:51 +0100: > > > On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote: > > > The method to detect the number of CPU cores on Cortex-A9 MPCore and > > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > > have to read it from the system coprocessor(CP15), because the SCU on > > > Cortex-A15 MPCore does not have software readable registers. This > > > patch selects the correct method at runtime based on the CPU ID. > > > > > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > > > --- > > > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > > > 1 file changed, 28 insertions(+), 3 deletions(-) > > > > > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > > > index 1b926df..68e76ef 100644 > > > --- a/arch/arm/mach-tegra/platsmp.c > > > +++ b/arch/arm/mach-tegra/platsmp.c > > > @@ -23,6 +23,7 @@ > > > #include <asm/hardware/gic.h> > > > #include <asm/mach-types.h> > > > #include <asm/smp_scu.h> > > > +#include <asm/cputype.h> > > > > > > #include <mach/powergate.h> > > > > > > @@ -34,9 +35,13 @@ > > > #include "common.h" > > > #include "iomap.h" > > > > > > +#define CPU_MASK 0xff0ffff0 > > > +#define CPU_CORTEX_A9 0x410fc090 > > > +#define CPU_CORTEX_A15 0x410fc0f0 > > > > NAK. There's some patches around to make this stuff generic, we don't > > need more ifdefs springing up. We need to get those generic patches > > in. > > Does anyone give some pointers to the above patches? Hi Hiroshi, Please check "arch/arm/include/asm/cputype.h" in next-20130109. It just be merged.
Hi Joseph, Joseph Lo <josephl@nvidia.com> wrote @ Wed, 9 Jan 2013 07:07:41 +0100: > On Wed, 2013-01-09 at 13:46 +0800, Hiroshi Doyu wrote: > > Russell King - ARM Linux <linux@arm.linux.org.uk> wrote @ Tue, 8 Jan 2013 15:26:51 +0100: > > > > > On Tue, Jan 08, 2013 at 02:47:37PM +0200, Hiroshi Doyu wrote: > > > > The method to detect the number of CPU cores on Cortex-A9 MPCore and > > > > Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > > > information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > > > have to read it from the system coprocessor(CP15), because the SCU on > > > > Cortex-A15 MPCore does not have software readable registers. This > > > > patch selects the correct method at runtime based on the CPU ID. > > > > > > > > Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> > > > > --- > > > > arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- > > > > 1 file changed, 28 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c > > > > index 1b926df..68e76ef 100644 > > > > --- a/arch/arm/mach-tegra/platsmp.c > > > > +++ b/arch/arm/mach-tegra/platsmp.c > > > > @@ -23,6 +23,7 @@ > > > > #include <asm/hardware/gic.h> > > > > #include <asm/mach-types.h> > > > > #include <asm/smp_scu.h> > > > > +#include <asm/cputype.h> > > > > > > > > #include <mach/powergate.h> > > > > > > > > @@ -34,9 +35,13 @@ > > > > #include "common.h" > > > > #include "iomap.h" > > > > > > > > +#define CPU_MASK 0xff0ffff0 > > > > +#define CPU_CORTEX_A9 0x410fc090 > > > > +#define CPU_CORTEX_A15 0x410fc0f0 > > > > > > NAK. There's some patches around to make this stuff generic, we don't > > > need more ifdefs springing up. We need to get those generic patches > > > in. > > > > Does anyone give some pointers to the above patches? > > Hi Hiroshi, > > Please check "arch/arm/include/asm/cputype.h" in next-20130109. It just > be merged. I found the following commit. Thanks. commit cd8dde6fc347b56b6cb9afa7cc0f9073da163176 Author: Christoffer Dall <c.dall@virtualopensystems.com> Date: Tue Dec 18 04:06:37 2012 +0000 ARM: Define CPU part numbers and implementors Define implementor IDs, part numbers and Xscale architecture versions in cputype.h. Also create accessor functions for reading the implementor, part number, and Xscale architecture versions from the CPUID regiser. Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote: > Hi, > > Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100: > > > On 01/08/2013 09:21 AM, Mark Rutland wrote: > > > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: > > >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > > >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > > >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and > > >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > >>>> have to read it from the system coprocessor(CP15), because the SCU on > > >>>> Cortex-A15 MPCore does not have software readable registers. This > > >>>> patch selects the correct method at runtime based on the CPU ID. > > ... > > >>>> static void __init tegra_smp_init_cpus(void) > > >>>> { > > >>>> - unsigned int i, ncores = scu_get_core_count(scu_base); > > >>>> + unsigned int i, cpu_id, ncores; > > >>>> + u32 l2ctlr; > > >>>> + phys_addr_t pa; > > >>>> + > > >>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > > >>>> + switch (cpu_id) { > > >>>> + case CPU_CORTEX_A15: > > >>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > > >>>> + ncores = ((l2ctlr >> 24) & 3) + 1; > > >>>> + break; > > >>> > > >>> [...] > > >>> > > >>> As mentioned last time [1], you should get this information from the dt > > >>> instead. > > >> > > >> Most of platsmp.c:.smp_init_cpus() implementations seem just to > > >> overwrite # of cores by SCU/MRC detection. Is there any implementation > > >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > > > > > > As far as I can see, there's no other platform which just relies on > > > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > > > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > > > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > > > be handled by arm_dt_init_cpus. > > True. > > > > I think the best option would be to have a separate smp_ops for your dt > > > platforms where we know cpu nodes are populated (e.g. Tegra 114), where > > > smp_init_cpus is different to that for non-dt platforms. That way non dt > > > platforms can keep the SCU hack for now, and won't be broken, and the dt > > > platforms are far removed from the SCU hack and just use common > > > infrastructure. > > > > Tegra doesn't have any non-DT support now. > > What about falling down to SCU/MRC hack only when DT detection fails > or no /cpus entry in DT? > > Can arm_dt_init_cpu_maps() return if it suceeds or not? I think the best solution to this problem consists in /me adding a function, say, arm_dt_nb_cores(), that returns an error if DT probing failed and the number of cores if it succeeded. If it fails, you can fall back to SCU detection of cores for legacy reasons (A9), but from A15 onwards I have quite a strong feeling against using L2 as number of cores detection mechanism since it does not work for multi-cluster systems. Fair enough, on Tegra this is not a problem (now) but I do not want this method to propagate to other platforms where multi-cluster number of cores detection will fail if we rely on the L2 mechanism. This means that if the DT /cpu nodes are updated for all Tegra platforms, DT becomes the mechanism to detect the number of cores for all of them. If you allow the A9 SCU mechanism to override the DT information (even if it is correct) we might end up in the rather unwieldy situation where there is a disconnect between the cpu_logical_map initialized entries and the HW probed number of cores (if the DT bindings are correct the cpu_logical_map entries are initialized by DT). The gist is: - if DT /cpu nodes are there and defined properly, we build the cpu_logical_map and count the number of cores from DT - if DT /cpu nodes are missing or bogus we rely on the cpu_logical_map in smp_setup_processor_id() and fall back to legacy mechanism to detect the number of cores. This just for legacy boards/SoC. Let me know if this is reasonable please. > > > > > If we're going to make Tegra114 read the CPU information from DT, then > > I'd rather just switch over all Tegra platforms to requiring CPU nodes > > in the DT. We have few enough SoC variants that it should be easy to > > switch everything to DT before adding Tegra114 support without causing > > any problems. > > Possible. > > > But that all said, I'm not convinced it's a good idea to force the > > information to be present in DT when it's just duplicating stuff that > > can be runtime-probed from the HW... > > That's also my concern somewhat. > > The main purpose to DT CPU core detection is for multiple CPU > clusters. SCU/MRC cannot know anything outside of their own > clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow > core. Switching those clusters is done transparently. The last A9/A15 > core is transparently switched back and forth to the shadow one. So in > this case, SCU/MRC detection seems to work fine. You can probe the number of cores through SCU but remember that MPIDRs of existing CPUs are generated, not probed if the DT is not present (smp_setup_processor_id()). cpu_logical_map content, before the DT bindings were introduced, or even now if DT /cpu nodes are bogus, is simply generated as a sequential number but is not *probeable* per-se. To tell the truth, this works well for all existing ARM platforms in the mainline but moving forward we'd better stick at bits of information provided by the DT, at least that's my opinion. Thoughts ? Lorenzo
On 01/09/2013 04:34 AM, Lorenzo Pieralisi wrote: > On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote: >> Hi, >> >> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100: >> >>> On 01/08/2013 09:21 AM, Mark Rutland wrote: >>>> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: >>>>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: >>>>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: >>>>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and >>>>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this >>>>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we >>>>>>> have to read it from the system coprocessor(CP15), because the SCU on >>>>>>> Cortex-A15 MPCore does not have software readable registers. This >>>>>>> patch selects the correct method at runtime based on the CPU ID. >>> ... >>>>>>> static void __init tegra_smp_init_cpus(void) >>>>>>> { >>>>>>> - unsigned int i, ncores = scu_get_core_count(scu_base); >>>>>>> + unsigned int i, cpu_id, ncores; >>>>>>> + u32 l2ctlr; >>>>>>> + phys_addr_t pa; >>>>>>> + >>>>>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; >>>>>>> + switch (cpu_id) { >>>>>>> + case CPU_CORTEX_A15: >>>>>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); >>>>>>> + ncores = ((l2ctlr >> 24) & 3) + 1; >>>>>>> + break; >>>>>> >>>>>> [...] >>>>>> >>>>>> As mentioned last time [1], you should get this information from the dt >>>>>> instead. >>>>> >>>>> Most of platsmp.c:.smp_init_cpus() implementations seem just to >>>>> overwrite # of cores by SCU/MRC detection. Is there any implementation >>>>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? >>>> >>>> As far as I can see, there's no other platform which just relies on >>>> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some >>>> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus >>>> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to >>>> be handled by arm_dt_init_cpus. >> >> True. >> >>>> I think the best option would be to have a separate smp_ops for your dt >>>> platforms where we know cpu nodes are populated (e.g. Tegra 114), where >>>> smp_init_cpus is different to that for non-dt platforms. That way non dt >>>> platforms can keep the SCU hack for now, and won't be broken, and the dt >>>> platforms are far removed from the SCU hack and just use common >>>> infrastructure. >>> >>> Tegra doesn't have any non-DT support now. >> >> What about falling down to SCU/MRC hack only when DT detection fails >> or no /cpus entry in DT? >> >> Can arm_dt_init_cpu_maps() return if it suceeds or not? > > I think the best solution to this problem consists in /me adding a > function, say, arm_dt_nb_cores(), that returns an error if DT probing > failed and the number of cores if it succeeded. Well, if the primary mechanism needs to be DT-based going forward, I'd say just require the DT nodes to be present to use the new function, and outright fail if they aren't. That way, everything always works one way. It shouldn't be hard to add the CPU nodes for Tegra, right?
On Wed, Jan 09, 2013 at 04:17:15PM +0000, Stephen Warren wrote: > On 01/09/2013 04:34 AM, Lorenzo Pieralisi wrote: > > On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote: > >> Hi, > >> > >> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100: > >> > >>> On 01/08/2013 09:21 AM, Mark Rutland wrote: > >>>> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: > >>>>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > >>>>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > >>>>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and > >>>>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > >>>>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > >>>>>>> have to read it from the system coprocessor(CP15), because the SCU on > >>>>>>> Cortex-A15 MPCore does not have software readable registers. This > >>>>>>> patch selects the correct method at runtime based on the CPU ID. > >>> ... > >>>>>>> static void __init tegra_smp_init_cpus(void) > >>>>>>> { > >>>>>>> - unsigned int i, ncores = scu_get_core_count(scu_base); > >>>>>>> + unsigned int i, cpu_id, ncores; > >>>>>>> + u32 l2ctlr; > >>>>>>> + phys_addr_t pa; > >>>>>>> + > >>>>>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > >>>>>>> + switch (cpu_id) { > >>>>>>> + case CPU_CORTEX_A15: > >>>>>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > >>>>>>> + ncores = ((l2ctlr >> 24) & 3) + 1; > >>>>>>> + break; > >>>>>> > >>>>>> [...] > >>>>>> > >>>>>> As mentioned last time [1], you should get this information from the dt > >>>>>> instead. > >>>>> > >>>>> Most of platsmp.c:.smp_init_cpus() implementations seem just to > >>>>> overwrite # of cores by SCU/MRC detection. Is there any implementation > >>>>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > >>>> > >>>> As far as I can see, there's no other platform which just relies on > >>>> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > >>>> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > >>>> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > >>>> be handled by arm_dt_init_cpus. > >> > >> True. > >> > >>>> I think the best option would be to have a separate smp_ops for your dt > >>>> platforms where we know cpu nodes are populated (e.g. Tegra 114), where > >>>> smp_init_cpus is different to that for non-dt platforms. That way non dt > >>>> platforms can keep the SCU hack for now, and won't be broken, and the dt > >>>> platforms are far removed from the SCU hack and just use common > >>>> infrastructure. > >>> > >>> Tegra doesn't have any non-DT support now. > >> > >> What about falling down to SCU/MRC hack only when DT detection fails > >> or no /cpus entry in DT? > >> > >> Can arm_dt_init_cpu_maps() return if it suceeds or not? > > > > I think the best solution to this problem consists in /me adding a > > function, say, arm_dt_nb_cores(), that returns an error if DT probing > > failed and the number of cores if it succeeded. > > Well, if the primary mechanism needs to be DT-based going forward, I'd > say just require the DT nodes to be present to use the new function, and > outright fail if they aren't. That way, everything always works one way. > It shouldn't be hard to add the CPU nodes for Tegra, right? Adding /cpu nodes to DT is trivial, but I would like to keep a fall back mechanism in place for existing platforms to carry out the transition as smoothly as possible (eg people using new kernels with old DTS). /cpu nodes will have priority over HW based probing for cores counting. As discussed with Hiroshi I will post a patch to provide platforms with a DT cpu map validity check so that the fall back mechanism can be triggered properly. Thank you very much, Lorenzo
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote @ Wed, 9 Jan 2013 12:34:32 +0100: > On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote: > > Hi, > > > > Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100: > > > > > On 01/08/2013 09:21 AM, Mark Rutland wrote: > > > > On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: > > > >> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > > > >>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > > > >>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and > > > >>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > > >>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > > >>>> have to read it from the system coprocessor(CP15), because the SCU on > > > >>>> Cortex-A15 MPCore does not have software readable registers. This > > > >>>> patch selects the correct method at runtime based on the CPU ID. > > > ... > > > >>>> static void __init tegra_smp_init_cpus(void) > > > >>>> { > > > >>>> - unsigned int i, ncores = scu_get_core_count(scu_base); > > > >>>> + unsigned int i, cpu_id, ncores; > > > >>>> + u32 l2ctlr; > > > >>>> + phys_addr_t pa; > > > >>>> + > > > >>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > > > >>>> + switch (cpu_id) { > > > >>>> + case CPU_CORTEX_A15: > > > >>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > > > >>>> + ncores = ((l2ctlr >> 24) & 3) + 1; > > > >>>> + break; > > > >>> > > > >>> [...] > > > >>> > > > >>> As mentioned last time [1], you should get this information from the dt > > > >>> instead. > > > >> > > > >> Most of platsmp.c:.smp_init_cpus() implementations seem just to > > > >> overwrite # of cores by SCU/MRC detection. Is there any implementation > > > >> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > > > > > > > > As far as I can see, there's no other platform which just relies on > > > > arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > > > > sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > > > > to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > > > > be handled by arm_dt_init_cpus. > > > > True. > > > > > > I think the best option would be to have a separate smp_ops for your dt > > > > platforms where we know cpu nodes are populated (e.g. Tegra 114), where > > > > smp_init_cpus is different to that for non-dt platforms. That way non dt > > > > platforms can keep the SCU hack for now, and won't be broken, and the dt > > > > platforms are far removed from the SCU hack and just use common > > > > infrastructure. > > > > > > Tegra doesn't have any non-DT support now. > > > > What about falling down to SCU/MRC hack only when DT detection fails > > or no /cpus entry in DT? > > > > Can arm_dt_init_cpu_maps() return if it suceeds or not? > > I think the best solution to this problem consists in /me adding a > function, say, arm_dt_nb_cores(), that returns an error if DT probing > failed and the number of cores if it succeeded. > > If it fails, you can fall back to SCU detection of cores for legacy > reasons (A9), but from A15 onwards I have quite a strong feeling against > using L2 as number of cores detection mechanism since it does not work > for multi-cluster systems. Fair enough, on Tegra this is not a problem > (now) but I do not want this method to propagate to other platforms where > multi-cluster number of cores detection will fail if we rely on the L2 > mechanism. > > This means that if the DT /cpu nodes are updated for all Tegra > platforms, DT becomes the mechanism to detect the number of cores > for all of them. > > If you allow the A9 SCU mechanism to override the DT information (even > if it is correct) we might end up in the rather unwieldy situation where > there is a disconnect between the cpu_logical_map initialized entries and > the HW probed number of cores (if the DT bindings are correct the > cpu_logical_map entries are initialized by DT). > > The gist is: > > - if DT /cpu nodes are there and defined properly, we build the > cpu_logical_map and count the number of cores from DT > - if DT /cpu nodes are missing or bogus we rely on the cpu_logical_map > in smp_setup_processor_id() and fall back to legacy mechanism to detect > the number of cores. This just for legacy boards/SoC. > > Let me know if this is reasonable please. Sounds quite reasonable to me. Please review another mail which falls back to SCU/MRC detection when arm_dt_init_cpu_maps() fails. > > > If we're going to make Tegra114 read the CPU information from DT, then > > > I'd rather just switch over all Tegra platforms to requiring CPU nodes > > > in the DT. We have few enough SoC variants that it should be easy to > > > switch everything to DT before adding Tegra114 support without causing > > > any problems. > > > > Possible. > > > > > But that all said, I'm not convinced it's a good idea to force the > > > information to be present in DT when it's just duplicating stuff that > > > can be runtime-probed from the HW... > > > > That's also my concern somewhat. > > > > The main purpose to DT CPU core detection is for multiple CPU > > clusters. SCU/MRC cannot know anything outside of their own > > clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow > > core. Switching those clusters is done transparently. The last A9/A15 > > core is transparently switched back and forth to the shadow one. So in > > this case, SCU/MRC detection seems to work fine. > > You can probe the number of cores through SCU but remember > that MPIDRs of existing CPUs are generated, not probed if the DT is not > present (smp_setup_processor_id()). cpu_logical_map content, before the > DT bindings were introduced, or even now if DT /cpu nodes are bogus, is > simply generated as a sequential number but is not *probeable* per-se. Honestly I don't understand the above so well. Could you please explain a little bit more?
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote @ Wed, 9 Jan 2013 19:07:13 +0100: > On Wed, Jan 09, 2013 at 04:17:15PM +0000, Stephen Warren wrote: > > On 01/09/2013 04:34 AM, Lorenzo Pieralisi wrote: > > > On Wed, Jan 09, 2013 at 05:49:46AM +0000, Hiroshi Doyu wrote: > > >> Hi, > > >> > > >> Stephen Warren <swarren@wwwdotorg.org> wrote @ Tue, 8 Jan 2013 20:32:32 +0100: > > >> > > >>> On 01/08/2013 09:21 AM, Mark Rutland wrote: > > >>>> On Tue, Jan 08, 2013 at 02:53:42PM +0000, Hiroshi Doyu wrote: > > >>>>> Mark Rutland <mark.rutland@arm.com> wrote @ Tue, 8 Jan 2013 15:28:28 +0100: > > >>>>>> On Tue, Jan 08, 2013 at 12:47:37PM +0000, Hiroshi Doyu wrote: > > >>>>>>> The method to detect the number of CPU cores on Cortex-A9 MPCore and > > >>>>>>> Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this > > >>>>>>> information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we > > >>>>>>> have to read it from the system coprocessor(CP15), because the SCU on > > >>>>>>> Cortex-A15 MPCore does not have software readable registers. This > > >>>>>>> patch selects the correct method at runtime based on the CPU ID. > > >>> ... > > >>>>>>> static void __init tegra_smp_init_cpus(void) > > >>>>>>> { > > >>>>>>> - unsigned int i, ncores = scu_get_core_count(scu_base); > > >>>>>>> + unsigned int i, cpu_id, ncores; > > >>>>>>> + u32 l2ctlr; > > >>>>>>> + phys_addr_t pa; > > >>>>>>> + > > >>>>>>> + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; > > >>>>>>> + switch (cpu_id) { > > >>>>>>> + case CPU_CORTEX_A15: > > >>>>>>> + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); > > >>>>>>> + ncores = ((l2ctlr >> 24) & 3) + 1; > > >>>>>>> + break; > > >>>>>> > > >>>>>> [...] > > >>>>>> > > >>>>>> As mentioned last time [1], you should get this information from the dt > > >>>>>> instead. > > >>>>> > > >>>>> Most of platsmp.c:.smp_init_cpus() implementations seem just to > > >>>>> overwrite # of cores by SCU/MRC detection. Is there any implementation > > >>>>> to use the DT's # and skip SCU/MRC detection in .smp_init_cpus()? > > >>>> > > >>>> As far as I can see, there's no other platform which just relies on > > >>>> arm_dt_init_cpu_maps. Until recently, it didn't exist, so that makes some > > >>>> sense. As far as I can see, for the Tegra 114 you only need your smp_init_cpus > > >>>> to call set_smp_cross_call(gic_raise_softirq). Everything else you do seems to > > >>>> be handled by arm_dt_init_cpus. > > >> > > >> True. > > >> > > >>>> I think the best option would be to have a separate smp_ops for your dt > > >>>> platforms where we know cpu nodes are populated (e.g. Tegra 114), where > > >>>> smp_init_cpus is different to that for non-dt platforms. That way non dt > > >>>> platforms can keep the SCU hack for now, and won't be broken, and the dt > > >>>> platforms are far removed from the SCU hack and just use common > > >>>> infrastructure. > > >>> > > >>> Tegra doesn't have any non-DT support now. > > >> > > >> What about falling down to SCU/MRC hack only when DT detection fails > > >> or no /cpus entry in DT? > > >> > > >> Can arm_dt_init_cpu_maps() return if it suceeds or not? > > > > > > I think the best solution to this problem consists in /me adding a > > > function, say, arm_dt_nb_cores(), that returns an error if DT probing > > > failed and the number of cores if it succeeded. > > > > Well, if the primary mechanism needs to be DT-based going forward, I'd > > say just require the DT nodes to be present to use the new function, and > > outright fail if they aren't. That way, everything always works one way. > > It shouldn't be hard to add the CPU nodes for Tegra, right? > > Adding /cpu nodes to DT is trivial, but I would like to keep a fall back > mechanism in place for existing platforms to carry out the transition > as smoothly as possible (eg people using new kernels with old DTS). > > /cpu nodes will have priority over HW based probing for cores counting. > > As discussed with Hiroshi I will post a patch to provide platforms with > a DT cpu map validity check so that the fall back mechanism can be > triggered properly. I'll add /cpu nodes for all tegra, anyway. Until your patch comes, I'll make a code just consider a single core if DT fails at first. So T114 initial support patch could be merged as well.
On Thu, Jan 10, 2013 at 06:31:43AM +0000, Hiroshi Doyu wrote: > Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote @ Wed, 9 Jan 2013 12:34:32 +0100: [...] > > > > But that all said, I'm not convinced it's a good idea to force the > > > > information to be present in DT when it's just duplicating stuff that > > > > can be runtime-probed from the HW... > > > > > > That's also my concern somewhat. > > > > > > The main purpose to DT CPU core detection is for multiple CPU > > > clusters. SCU/MRC cannot know anything outside of their own > > > clusters. In tegra, we have 4 Cortex-A9/A15 core plus 1 shadow > > > core. Switching those clusters is done transparently. The last A9/A15 > > > core is transparently switched back and forth to the shadow one. So in > > > this case, SCU/MRC detection seems to work fine. > > > > You can probe the number of cores through SCU but remember > > that MPIDRs of existing CPUs are generated, not probed if the DT is not > > present (smp_setup_processor_id()). cpu_logical_map content, before the > > DT bindings were introduced, or even now if DT /cpu nodes are bogus, is > > simply generated as a sequential number but is not *probeable* per-se. > > Honestly I don't understand the above so well. Could you please > explain a little bit more? The DT /cpu bindings provide the kernel with a list of MPIDR ("reg" property) of all CPUs in the system. What I wanted to say is that, you can probe in HW the number of cores in some situations, but the DT provides more information than that, so I was debating the "duplicating stuff that can be runtime probed" point. In some platforms you can probe the number of cores in HW, but the MPIDR of existing CPUs is "generated" in smp_setup_processor_id() (you cannot probe the MPIDR of CPUs that are off or held in a pen), as a sequential number and stored in the cpu_logical_map. This works well for single cluster systems, since the MPIDR[23:0] of processors in a single cluster system is a sequential number starting from 0 (well, at least MPIDR[7:0], but let's not get bogged down into details). For multi-cluster systems this fails, and DT comes to the rescue along with its companion parsing function, arm_dt_init_cpu_maps(). The long and short of it is: DT /cpu nodes are not there just for number of cores counting, but there is more. Probably on Tegra this is not needed at the moment, but this provides the groundwork for a generic and solid solution for all upcoming ARM platforms. I hope this helps, let me know if it is not clear. Lorenzo
diff --git a/arch/arm/mach-tegra/platsmp.c b/arch/arm/mach-tegra/platsmp.c index 1b926df..68e76ef 100644 --- a/arch/arm/mach-tegra/platsmp.c +++ b/arch/arm/mach-tegra/platsmp.c @@ -23,6 +23,7 @@ #include <asm/hardware/gic.h> #include <asm/mach-types.h> #include <asm/smp_scu.h> +#include <asm/cputype.h> #include <mach/powergate.h> @@ -34,9 +35,13 @@ #include "common.h" #include "iomap.h" +#define CPU_MASK 0xff0ffff0 +#define CPU_CORTEX_A9 0x410fc090 +#define CPU_CORTEX_A15 0x410fc0f0 + extern void tegra_secondary_startup(void); -static void __iomem *scu_base = IO_ADDRESS(TEGRA_ARM_PERIF_BASE); +static void __iomem *scu_base; #define EVP_CPU_RESET_VECTOR \ (IO_ADDRESS(TEGRA_EXCEPTION_VECTORS_BASE) + 0x100) @@ -149,7 +154,26 @@ done: */ static void __init tegra_smp_init_cpus(void) { - unsigned int i, ncores = scu_get_core_count(scu_base); + unsigned int i, cpu_id, ncores; + u32 l2ctlr; + phys_addr_t pa; + + cpu_id = read_cpuid(CPUID_ID) & CPU_MASK; + switch (cpu_id) { + case CPU_CORTEX_A15: + asm("mrc p15, 1, %0, c9, c0, 2\n" : "=r" (l2ctlr)); + ncores = ((l2ctlr >> 24) & 3) + 1; + break; + case CPU_CORTEX_A9: + /* Get SCU physical base */ + asm("mrc p15, 4, %0, c15, c0, 0" : "=r" (pa)); + scu_base = IO_ADDRESS(pa); + ncores = scu_get_core_count(scu_base); + break; + default: + BUG(); + break; + } if (ncores > nr_cpu_ids) { pr_warn("SMP: %u cores greater than maximum (%u), clipping\n", @@ -166,7 +190,8 @@ static void __init tegra_smp_init_cpus(void) static void __init tegra_smp_prepare_cpus(unsigned int max_cpus) { tegra_cpu_reset_handler_init(); - scu_enable(scu_base); + if (scu_base) + scu_enable(scu_base); } struct smp_operations tegra_smp_ops __initdata = {
The method to detect the number of CPU cores on Cortex-A9 MPCore and Cortex-A15 MPCore is different. On Cortex-A9 MPCore we can get this information from the Snoop Control Unit(SCU). On Cortex-A15 MPCore we have to read it from the system coprocessor(CP15), because the SCU on Cortex-A15 MPCore does not have software readable registers. This patch selects the correct method at runtime based on the CPU ID. Signed-off-by: Hiroshi Doyu <hdoyu@nvidia.com> --- arch/arm/mach-tegra/platsmp.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-)