Message ID | 1399981484-31628-1-git-send-email-haojian.zhuang@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, 13 May 2014, Haojian Zhuang wrote: > Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM > framework to manage power on HiP04 SoC. > > Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org> Some more comments... [...] > +static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on) > +{ > + unsigned long data; > + > + if (!fabric) > + return; How could this validly be NULL? > + data = readl_relaxed(fabric + FAB_SF_MODE); > + if (on) > + data |= 1 << cluster; > + else > + data &= ~(1 << cluster); > + writel_relaxed(data, fabric + FAB_SF_MODE); > + while (1) { > + if (data == readl_relaxed(fabric + FAB_SF_MODE)) > + break; > + } > +} The above could be easily coded in assembly for the power_up_setup callback thusly: hip04_power_up_setup: cmp r0, #0 @ check affinity level bxeq lr @ nothing to do at CPU level mrc p15, 0, r0, c0, c0, 5 @ get MPIDR ubfx r0, r0, #8, #8 @ extract cluster number adr r1, .LC0 ldmia r1, {r2, r3} sub r2, r2, r1 @ virt_addr - phys_addr ldr r1, [r2, r3] @ get fabric_phys_addr mov r2, #1 ldr r3, [r1, #FAB_SF_MODE] @ read "data" orr r3, r3, r2, lsl r0 @ set cluster bit str r3, [r1, #FAB_SF_MODE] @ write it back 1: ldr r2, [r1, #FAB_SF_MODE] @ read register content cmp r2, r3 @ make sure it matches bne 1b @ otherwise retry bx lr :LC0: .word . .word fabric_phys_addr - .LC0 That should be it. > +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) > +{ > + unsigned long data, mask; > + > + if (!relocation || !sysctrl) > + return -ENODEV; > + if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) > + return -EINVAL; > + > + spin_lock_irq(&boot_lock); > + writel_relaxed(hip04_boot.bootwrapper_phys, relocation); > + writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4); > + writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); > + writel_relaxed(0, relocation + 12); Shouldn't you do the above writes only when hip04_cpu_table[cluster][cpu] is zero? Please see the comment in mcpm_cpu_power_down() about unordered calls. > + if (hip04_cluster_down(cluster)) { > + data = CLUSTER_DEBUG_RESET_BIT; > + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); > + do { > + mask = CLUSTER_DEBUG_RESET_STATUS; > + data = readl_relaxed(sysctrl + \ > + SC_CPU_RESET_STATUS(cluster)); > + } while (data & mask); > + hip04_set_snoop_filter(cluster, 1); > + } > + > + hip04_cpu_table[cluster][cpu]++; > + > + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ > + CORE_DEBUG_RESET_BIT(cpu); > + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); > + spin_unlock_irq(&boot_lock); > + msleep(POLL_MSEC); > + > + return 0; > +} > + > +static void hip04_mcpm_power_down(void) > +{ > + unsigned int mpidr, cpu, cluster, data = 0; > + bool skip_reset = false; > + > + mpidr = read_cpuid_mpidr(); > + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); > + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); > + > + __mcpm_cpu_going_down(cpu, cluster); > + > + spin_lock(&boot_lock); > + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); > + hip04_cpu_table[cluster][cpu]--; > + if (hip04_cpu_table[cluster][cpu] == 1) { > + /* A power_up request went ahead of us. */ > + skip_reset = true; > + } else if (hip04_cpu_table[cluster][cpu] > 1) { > + pr_err("Cluster %d CPU%d is still running\n", cluster, cpu); This message is misleading. If execution gets here, that means mcpm_cpu_power_up() was called more than twice in a row for the same CPU which should never happen. > + BUG(); > + } > + > + spin_unlock(&boot_lock); > + > + v7_exit_coherency_flush(louis); > + > + __mcpm_cpu_down(cpu, cluster); > + > + if (!skip_reset) { > + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ > + CORE_DEBUG_RESET_BIT(cpu); > + writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster)); You should not perform this outside the lock protected region as this could race with hip04_mcpm_power_up(). Instead, this should be done above when hip04_cpu_table[cluster][cpu] == 0 after being decremented. > +} > + > +static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) > +{ > + unsigned int data, tries; > + > + BUG_ON(cluster >= HIP04_MAX_CLUSTERS || > + cpu >= HIP04_MAX_CPUS_PER_CLUSTER); > + > + for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; tries++) { > + data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); > + if (!(data & CORE_RESET_STATUS(cpu))) { > + msleep(POLL_MSEC); > + continue; > + } > + return 0; > + } > + return -ETIMEDOUT; > +} > + > +static void hip04_mcpm_powered_up(void) > +{ > + if (!relocation) > + return; > + spin_lock(&boot_lock); > + writel_relaxed(0, relocation); > + writel_relaxed(0, relocation + 4); > + writel_relaxed(0, relocation + 8); > + writel_relaxed(0, relocation + 12); > + spin_unlock(&boot_lock); > +} > + > +static const struct mcpm_platform_ops hip04_mcpm_ops = { > + .power_up = hip04_mcpm_power_up, > + .power_down = hip04_mcpm_power_down, > + .wait_for_powerdown = hip04_mcpm_wait_for_powerdown, > + .powered_up = hip04_mcpm_powered_up, > +}; > + > +static bool __init hip04_cpu_table_init(void) > +{ > + unsigned int mpidr, cpu, cluster; > + > + mpidr = read_cpuid_mpidr(); > + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); > + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); > + > + if (cluster >= HIP04_MAX_CLUSTERS || > + cpu >= HIP04_MAX_CPUS_PER_CLUSTER) { > + pr_err("%s: boot CPU is out of bound!\n", __func__); > + return false; > + } > + hip04_set_snoop_filter(cluster, 1); > + hip04_cpu_table[cluster][cpu] = 1; > + return true; > +} > + > +static int __init hip04_mcpm_init(void) > +{ > + struct device_node *np, *np_fab; > + int ret = -ENODEV; > + > + np = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl"); > + if (!np) > + goto err; > + np_fab = of_find_compatible_node(NULL, NULL, "hisilicon,hip04-fabric"); > + if (!np_fab) > + goto err; > + > + if (of_property_read_u32(np, "bootwrapper-phys", > + &hip04_boot.bootwrapper_phys)) { > + pr_err("failed to get bootwrapper-phys\n"); > + ret = -EINVAL; > + goto err; > + } > + if (of_property_read_u32(np, "bootwrapper-size", > + &hip04_boot.bootwrapper_size)) { > + pr_err("failed to get bootwrapper-size\n"); > + ret = -EINVAL; > + goto err; > + } > + if (of_property_read_u32(np, "bootwrapper-magic", > + &hip04_boot.bootwrapper_magic)) { > + pr_err("failed to get bootwrapper-magic\n"); > + ret = -EINVAL; > + goto err; > + } > + if (of_property_read_u32(np, "relocation-entry", > + &hip04_boot.relocation_entry)) { > + pr_err("failed to get relocation-entry\n"); > + ret = -EINVAL; > + goto err; > + } > + if (of_property_read_u32(np, "relocation-size", > + &hip04_boot.relocation_size)) { > + pr_err("failed to get relocation-size\n"); > + ret = -EINVAL; > + goto err; > + } > + > + relocation = ioremap(hip04_boot.relocation_entry, > + hip04_boot.relocation_size); > + if (!relocation) { > + pr_err("failed to map relocation space\n"); > + ret = -ENOMEM; > + goto err; > + } > + sysctrl = of_iomap(np, 0); > + if (!sysctrl) { > + pr_err("failed to get sysctrl base\n"); > + ret = -ENOMEM; > + goto err_sysctrl; > + } > + fabric = of_iomap(np_fab, 0); > + if (!fabric) { > + pr_err("failed to get fabric base\n"); > + ret = -ENOMEM; > + goto err_fabric; > + } > + > + if (!hip04_cpu_table_init()) > + return -EINVAL; > + ret = mcpm_platform_register(&hip04_mcpm_ops); > + if (!ret) { > + mcpm_sync_init(NULL); > + pr_info("HiP04 MCPM initialized\n"); > + } > + mcpm_smp_set_ops(); > + return ret; > +err_fabric: > + iounmap(sysctrl); > +err_sysctrl: > + iounmap(relocation); > +err: > + return ret; > +} > +early_initcall(hip04_mcpm_init); > -- > 1.9.1 >
On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > On Tue, 13 May 2014, Haojian Zhuang wrote: > >> Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM >> framework to manage power on HiP04 SoC. >> >> Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org> > > Some more comments... > > [...] >> +static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on) >> +{ >> + unsigned long data; >> + >> + if (!fabric) >> + return; > > How could this validly be NULL? > OK. I'll make it report BUG. >> + data = readl_relaxed(fabric + FAB_SF_MODE); >> + if (on) >> + data |= 1 << cluster; >> + else >> + data &= ~(1 << cluster); >> + writel_relaxed(data, fabric + FAB_SF_MODE); >> + while (1) { >> + if (data == readl_relaxed(fabric + FAB_SF_MODE)) >> + break; >> + } >> +} > > The above could be easily coded in assembly for the power_up_setup > callback thusly: > > hip04_power_up_setup: > > cmp r0, #0 @ check affinity level > bxeq lr @ nothing to do at CPU level > > mrc p15, 0, r0, c0, c0, 5 @ get MPIDR > ubfx r0, r0, #8, #8 @ extract cluster number > > adr r1, .LC0 > ldmia r1, {r2, r3} > sub r2, r2, r1 @ virt_addr - phys_addr > ldr r1, [r2, r3] @ get fabric_phys_addr > mov r2, #1 > ldr r3, [r1, #FAB_SF_MODE] @ read "data" > orr r3, r3, r2, lsl r0 @ set cluster bit > str r3, [r1, #FAB_SF_MODE] @ write it back > > 1: ldr r2, [r1, #FAB_SF_MODE] @ read register content > cmp r2, r3 @ make sure it matches > bne 1b @ otherwise retry > > bx lr > > :LC0: .word . > .word fabric_phys_addr - .LC0 > > That should be it. > No. These code should be executed before new CPU on. If I transfer them to assembler code, it means that code will be executed after new CPU on. Then it results me failing to make new CPU online. >> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) >> +{ >> + unsigned long data, mask; >> + >> + if (!relocation || !sysctrl) >> + return -ENODEV; >> + if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) >> + return -EINVAL; >> + >> + spin_lock_irq(&boot_lock); >> + writel_relaxed(hip04_boot.bootwrapper_phys, relocation); >> + writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4); >> + writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); >> + writel_relaxed(0, relocation + 12); > > Shouldn't you do the above writes only when > hip04_cpu_table[cluster][cpu] is zero? Please see the comment in > mcpm_cpu_power_down() about unordered calls. > OK. I can add the check. >> + if (hip04_cluster_down(cluster)) { >> + data = CLUSTER_DEBUG_RESET_BIT; >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); >> + do { >> + mask = CLUSTER_DEBUG_RESET_STATUS; >> + data = readl_relaxed(sysctrl + \ >> + SC_CPU_RESET_STATUS(cluster)); >> + } while (data & mask); >> + hip04_set_snoop_filter(cluster, 1); >> + } >> + >> + hip04_cpu_table[cluster][cpu]++; >> + >> + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ >> + CORE_DEBUG_RESET_BIT(cpu); >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); >> + spin_unlock_irq(&boot_lock); >> + msleep(POLL_MSEC); >> + >> + return 0; >> +} >> + >> +static void hip04_mcpm_power_down(void) >> +{ >> + unsigned int mpidr, cpu, cluster, data = 0; >> + bool skip_reset = false; >> + >> + mpidr = read_cpuid_mpidr(); >> + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); >> + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); >> + >> + __mcpm_cpu_going_down(cpu, cluster); >> + >> + spin_lock(&boot_lock); >> + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); >> + hip04_cpu_table[cluster][cpu]--; >> + if (hip04_cpu_table[cluster][cpu] == 1) { >> + /* A power_up request went ahead of us. */ >> + skip_reset = true; >> + } else if (hip04_cpu_table[cluster][cpu] > 1) { >> + pr_err("Cluster %d CPU%d is still running\n", cluster, cpu); > > This message is misleading. If execution gets here, that means > mcpm_cpu_power_up() was called more than twice in a row for the same CPU > which should never happen. > OK. I'll replace the comments. >> + BUG(); >> + } >> + >> + spin_unlock(&boot_lock); >> + >> + v7_exit_coherency_flush(louis); >> + >> + __mcpm_cpu_down(cpu, cluster); >> + >> + if (!skip_reset) { >> + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ >> + CORE_DEBUG_RESET_BIT(cpu); >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster)); > > You should not perform this outside the lock protected region as this > could race with hip04_mcpm_power_up(). Instead, this should be done > above when hip04_cpu_table[cluster][cpu] == 0 after being decremented. > No. power_down() is executed on the specified CPU. If spin_unlock() is placed after reset operation, it means that there's no chance to execute the spin_unlock(). Because CPU is already in reset mode at this time. Regards Haojian
On Thu, 15 May 2014, Haojian Zhuang wrote: > On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > > On Tue, 13 May 2014, Haojian Zhuang wrote: > > > >> + data = readl_relaxed(fabric + FAB_SF_MODE); > >> + if (on) > >> + data |= 1 << cluster; > >> + else > >> + data &= ~(1 << cluster); > >> + writel_relaxed(data, fabric + FAB_SF_MODE); > >> + while (1) { > >> + if (data == readl_relaxed(fabric + FAB_SF_MODE)) > >> + break; > >> + } > >> +} > > > > The above could be easily coded in assembly for the power_up_setup > > callback thusly: > > > > hip04_power_up_setup: > > > > cmp r0, #0 @ check affinity level > > bxeq lr @ nothing to do at CPU level > > > > mrc p15, 0, r0, c0, c0, 5 @ get MPIDR > > ubfx r0, r0, #8, #8 @ extract cluster number > > > > adr r1, .LC0 > > ldmia r1, {r2, r3} > > sub r2, r2, r1 @ virt_addr - phys_addr > > ldr r1, [r2, r3] @ get fabric_phys_addr > > mov r2, #1 > > ldr r3, [r1, #FAB_SF_MODE] @ read "data" > > orr r3, r3, r2, lsl r0 @ set cluster bit > > str r3, [r1, #FAB_SF_MODE] @ write it back > > > > 1: ldr r2, [r1, #FAB_SF_MODE] @ read register content > > cmp r2, r3 @ make sure it matches > > bne 1b @ otherwise retry > > > > bx lr > > > > :LC0: .word . > > .word fabric_phys_addr - .LC0 > > > > That should be it. > > > > No. These code should be executed before new CPU on. If I transfer > them to assembler code, it means that code will be executed after > new CPU on. Exact. > Then it results me failing to make new CPU online. The assembly code could be wrong as well. Are you sure this is not the actual reason? Is there some documentation for this stuff? > >> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) > >> +{ > >> + unsigned long data, mask; > >> + > >> + if (!relocation || !sysctrl) > >> + return -ENODEV; > >> + if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) > >> + return -EINVAL; > >> + > >> + spin_lock_irq(&boot_lock); > >> + writel_relaxed(hip04_boot.bootwrapper_phys, relocation); > >> + writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4); > >> + writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); > >> + writel_relaxed(0, relocation + 12); > > > > Shouldn't you do the above writes only when > > hip04_cpu_table[cluster][cpu] is zero? Please see the comment in > > mcpm_cpu_power_down() about unordered calls. > > > OK. I can add the check. > > >> + if (hip04_cluster_down(cluster)) { > >> + data = CLUSTER_DEBUG_RESET_BIT; > >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); > >> + do { > >> + mask = CLUSTER_DEBUG_RESET_STATUS; > >> + data = readl_relaxed(sysctrl + \ > >> + SC_CPU_RESET_STATUS(cluster)); > >> + } while (data & mask); > >> + hip04_set_snoop_filter(cluster, 1); > >> + } > >> + > >> + hip04_cpu_table[cluster][cpu]++; > >> + > >> + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ > >> + CORE_DEBUG_RESET_BIT(cpu); > >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); > >> + spin_unlock_irq(&boot_lock); > >> + msleep(POLL_MSEC); > >> + > >> + return 0; > >> +} > >> + > >> +static void hip04_mcpm_power_down(void) > >> +{ > >> + unsigned int mpidr, cpu, cluster, data = 0; > >> + bool skip_reset = false; > >> + > >> + mpidr = read_cpuid_mpidr(); > >> + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); > >> + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); > >> + > >> + __mcpm_cpu_going_down(cpu, cluster); > >> + > >> + spin_lock(&boot_lock); > >> + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); > >> + hip04_cpu_table[cluster][cpu]--; > >> + if (hip04_cpu_table[cluster][cpu] == 1) { > >> + /* A power_up request went ahead of us. */ > >> + skip_reset = true; > >> + } else if (hip04_cpu_table[cluster][cpu] > 1) { > >> + pr_err("Cluster %d CPU%d is still running\n", cluster, cpu); > > > > This message is misleading. If execution gets here, that means > > mcpm_cpu_power_up() was called more than twice in a row for the same CPU > > which should never happen. > > > OK. I'll replace the comments. > > >> + BUG(); > >> + } > >> + > >> + spin_unlock(&boot_lock); > >> + > >> + v7_exit_coherency_flush(louis); > >> + > >> + __mcpm_cpu_down(cpu, cluster); > >> + > >> + if (!skip_reset) { > >> + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ > >> + CORE_DEBUG_RESET_BIT(cpu); > >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster)); > > > > You should not perform this outside the lock protected region as this > > could race with hip04_mcpm_power_up(). Instead, this should be done > > above when hip04_cpu_table[cluster][cpu] == 0 after being decremented. > > > > No. power_down() is executed on the specified CPU. If spin_unlock() is > placed after reset operation, it means that there's no chance to > execute the spin_unlock(). Because CPU is already in reset mode at > this time. Normally, reset is effective only when WFI is later executed. Are you sure this is not the case on hip04 as well? Nicolas
On 16 May 2014 04:01, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > On Thu, 15 May 2014, Haojian Zhuang wrote: > >> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: >> > On Tue, 13 May 2014, Haojian Zhuang wrote: >> > >> >> + data = readl_relaxed(fabric + FAB_SF_MODE); >> >> + if (on) >> >> + data |= 1 << cluster; >> >> + else >> >> + data &= ~(1 << cluster); >> >> + writel_relaxed(data, fabric + FAB_SF_MODE); >> >> + while (1) { >> >> + if (data == readl_relaxed(fabric + FAB_SF_MODE)) >> >> + break; >> >> + } >> >> +} >> > >> > The above could be easily coded in assembly for the power_up_setup >> > callback thusly: >> > >> > hip04_power_up_setup: >> > >> > cmp r0, #0 @ check affinity level >> > bxeq lr @ nothing to do at CPU level >> > >> > mrc p15, 0, r0, c0, c0, 5 @ get MPIDR >> > ubfx r0, r0, #8, #8 @ extract cluster number >> > >> > adr r1, .LC0 >> > ldmia r1, {r2, r3} >> > sub r2, r2, r1 @ virt_addr - phys_addr >> > ldr r1, [r2, r3] @ get fabric_phys_addr >> > mov r2, #1 >> > ldr r3, [r1, #FAB_SF_MODE] @ read "data" >> > orr r3, r3, r2, lsl r0 @ set cluster bit >> > str r3, [r1, #FAB_SF_MODE] @ write it back >> > >> > 1: ldr r2, [r1, #FAB_SF_MODE] @ read register content >> > cmp r2, r3 @ make sure it matches >> > bne 1b @ otherwise retry >> > >> > bx lr >> > >> > :LC0: .word . >> > .word fabric_phys_addr - .LC0 >> > >> > That should be it. >> > >> >> No. These code should be executed before new CPU on. If I transfer >> them to assembler code, it means that code will be executed after >> new CPU on. > > Exact. > >> Then it results me failing to make new CPU online. > > The assembly code could be wrong as well. Are you sure this is not the > actual reason? > > Is there some documentation for this stuff? > There's no problem in assembly code. I even rewrite your assembly code. If I keep my c code with assembly code, new CPU could be online right. If I only use assembly code, I only get the kernel panic. So it's not caused by assembly code. It's caused by executing code after new CPU on. There's no documentation on this. They didn't prepare well on documents. I think they'll improve it in the future. >> >> +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) >> >> +{ >> >> + unsigned long data, mask; >> >> + >> >> + if (!relocation || !sysctrl) >> >> + return -ENODEV; >> >> + if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) >> >> + return -EINVAL; >> >> + >> >> + spin_lock_irq(&boot_lock); >> >> + writel_relaxed(hip04_boot.bootwrapper_phys, relocation); >> >> + writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4); >> >> + writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); >> >> + writel_relaxed(0, relocation + 12); >> > >> > Shouldn't you do the above writes only when >> > hip04_cpu_table[cluster][cpu] is zero? Please see the comment in >> > mcpm_cpu_power_down() about unordered calls. >> > >> OK. I can add the check. >> >> >> + if (hip04_cluster_down(cluster)) { >> >> + data = CLUSTER_DEBUG_RESET_BIT; >> >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); >> >> + do { >> >> + mask = CLUSTER_DEBUG_RESET_STATUS; >> >> + data = readl_relaxed(sysctrl + \ >> >> + SC_CPU_RESET_STATUS(cluster)); >> >> + } while (data & mask); >> >> + hip04_set_snoop_filter(cluster, 1); >> >> + } >> >> + >> >> + hip04_cpu_table[cluster][cpu]++; >> >> + >> >> + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ >> >> + CORE_DEBUG_RESET_BIT(cpu); >> >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); >> >> + spin_unlock_irq(&boot_lock); >> >> + msleep(POLL_MSEC); >> >> + >> >> + return 0; >> >> +} >> >> + >> >> +static void hip04_mcpm_power_down(void) >> >> +{ >> >> + unsigned int mpidr, cpu, cluster, data = 0; >> >> + bool skip_reset = false; >> >> + >> >> + mpidr = read_cpuid_mpidr(); >> >> + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); >> >> + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); >> >> + >> >> + __mcpm_cpu_going_down(cpu, cluster); >> >> + >> >> + spin_lock(&boot_lock); >> >> + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); >> >> + hip04_cpu_table[cluster][cpu]--; >> >> + if (hip04_cpu_table[cluster][cpu] == 1) { >> >> + /* A power_up request went ahead of us. */ >> >> + skip_reset = true; >> >> + } else if (hip04_cpu_table[cluster][cpu] > 1) { >> >> + pr_err("Cluster %d CPU%d is still running\n", cluster, cpu); >> > >> > This message is misleading. If execution gets here, that means >> > mcpm_cpu_power_up() was called more than twice in a row for the same CPU >> > which should never happen. >> > >> OK. I'll replace the comments. >> >> >> + BUG(); >> >> + } >> >> + >> >> + spin_unlock(&boot_lock); >> >> + >> >> + v7_exit_coherency_flush(louis); >> >> + >> >> + __mcpm_cpu_down(cpu, cluster); >> >> + >> >> + if (!skip_reset) { >> >> + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ >> >> + CORE_DEBUG_RESET_BIT(cpu); >> >> + writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster)); >> > >> > You should not perform this outside the lock protected region as this >> > could race with hip04_mcpm_power_up(). Instead, this should be done >> > above when hip04_cpu_table[cluster][cpu] == 0 after being decremented. >> > >> >> No. power_down() is executed on the specified CPU. If spin_unlock() is >> placed after reset operation, it means that there's no chance to >> execute the spin_unlock(). Because CPU is already in reset mode at >> this time. > > Normally, reset is effective only when WFI is later executed. Are you > sure this is not the case on hip04 as well? > > Oh. it's different. cpu_v7_reset() likes to give a reset pulse signal to CPU core logic. The operation on SC_CPU_RESET_REQ register likes make CPU out of reset mode. After system is power on, all CPUs except for CPU0 stay in reset mode. Regards Haojian
On Tue, May 20, 2014 at 12:43:59PM +0800, Haojian Zhuang wrote: > On 16 May 2014 04:01, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > > On Thu, 15 May 2014, Haojian Zhuang wrote: > > > >> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > >> > On Tue, 13 May 2014, Haojian Zhuang wrote: > >> > > >> >> + data = readl_relaxed(fabric + FAB_SF_MODE); > >> >> + if (on) > >> >> + data |= 1 << cluster; > >> >> + else > >> >> + data &= ~(1 << cluster); > >> >> + writel_relaxed(data, fabric + FAB_SF_MODE); > >> >> + while (1) { > >> >> + if (data == readl_relaxed(fabric + FAB_SF_MODE)) > >> >> + break; > >> >> + } > >> >> +} > >> > > >> > The above could be easily coded in assembly for the power_up_setup > >> > callback thusly: > >> > > >> > hip04_power_up_setup: > >> > > >> > cmp r0, #0 @ check affinity level > >> > bxeq lr @ nothing to do at CPU level > >> > > >> > mrc p15, 0, r0, c0, c0, 5 @ get MPIDR > >> > ubfx r0, r0, #8, #8 @ extract cluster number > >> > > >> > adr r1, .LC0 > >> > ldmia r1, {r2, r3} > >> > sub r2, r2, r1 @ virt_addr - phys_addr > >> > ldr r1, [r2, r3] @ get fabric_phys_addr > >> > mov r2, #1 > >> > ldr r3, [r1, #FAB_SF_MODE] @ read "data" > >> > orr r3, r3, r2, lsl r0 @ set cluster bit > >> > str r3, [r1, #FAB_SF_MODE] @ write it back > >> > > >> > 1: ldr r2, [r1, #FAB_SF_MODE] @ read register content > >> > cmp r2, r3 @ make sure it matches > >> > bne 1b @ otherwise retry > >> > > >> > bx lr > >> > > >> > :LC0: .word . > >> > .word fabric_phys_addr - .LC0 > >> > > >> > That should be it. > >> > > >> > >> No. These code should be executed before new CPU on. If I transfer > >> them to assembler code, it means that code will be executed after > >> new CPU on. > > > > Exact. > > > >> Then it results me failing to make new CPU online. > > > > The assembly code could be wrong as well. Are you sure this is not the > > actual reason? > > > > Is there some documentation for this stuff? > > > > There's no problem in assembly code. I even rewrite your assembly code. > > If I keep my c code with assembly code, new CPU could be online right. > If I only use assembly code, I only get the kernel panic. So it's not > caused by assembly code. It's caused by executing code after new CPU > on. > > There's no documentation on this. They didn't prepare well on documents. > I think they'll improve it in the future. It's essential to understand what the hardware is actually doing here. If we don't understand exactly what toggling those bits in FAB_SF_MODE actually does, then it's impossible to judge on how to do it safely. Cheers ---Dave
[ I somehow missed this email yesterday. Sorry if I asked the same questions for which you already had provided answers. ] On Tue, 20 May 2014, Haojian Zhuang wrote: > On 16 May 2014 04:01, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > > On Thu, 15 May 2014, Haojian Zhuang wrote: > > > >> On 14 May 2014 03:43, Nicolas Pitre <nicolas.pitre@linaro.org> wrote: > >> > On Tue, 13 May 2014, Haojian Zhuang wrote: > >> > > >> >> + data = readl_relaxed(fabric + FAB_SF_MODE); > >> >> + if (on) > >> >> + data |= 1 << cluster; > >> >> + else > >> >> + data &= ~(1 << cluster); > >> >> + writel_relaxed(data, fabric + FAB_SF_MODE); > >> >> + while (1) { > >> >> + if (data == readl_relaxed(fabric + FAB_SF_MODE)) > >> >> + break; > >> >> + } > >> >> +} > >> > > >> > The above could be easily coded in assembly for the power_up_setup > >> > callback thusly: > >> > > >> > hip04_power_up_setup: > >> > > >> > cmp r0, #0 @ check affinity level > >> > bxeq lr @ nothing to do at CPU level > >> > > >> > mrc p15, 0, r0, c0, c0, 5 @ get MPIDR > >> > ubfx r0, r0, #8, #8 @ extract cluster number > >> > > >> > adr r1, .LC0 > >> > ldmia r1, {r2, r3} > >> > sub r2, r2, r1 @ virt_addr - phys_addr > >> > ldr r1, [r2, r3] @ get fabric_phys_addr > >> > mov r2, #1 > >> > ldr r3, [r1, #FAB_SF_MODE] @ read "data" > >> > orr r3, r3, r2, lsl r0 @ set cluster bit > >> > str r3, [r1, #FAB_SF_MODE] @ write it back > >> > > >> > 1: ldr r2, [r1, #FAB_SF_MODE] @ read register content > >> > cmp r2, r3 @ make sure it matches > >> > bne 1b @ otherwise retry > >> > > >> > bx lr > >> > > >> > :LC0: .word . > >> > .word fabric_phys_addr - .LC0 > >> > > >> > That should be it. > >> > > >> > >> No. These code should be executed before new CPU on. If I transfer > >> them to assembler code, it means that code will be executed after > >> new CPU on. > > > > Exact. > > > >> Then it results me failing to make new CPU online. > > > > The assembly code could be wrong as well. Are you sure this is not the > > actual reason? > > > > Is there some documentation for this stuff? > > > > There's no problem in assembly code. I even rewrite your assembly code. > > If I keep my c code with assembly code, new CPU could be online right. > If I only use assembly code, I only get the kernel panic. So it's not > caused by assembly code. It's caused by executing code after new CPU > on. Beware. The assembly code, when invoked via the MCPM layer during early boot of a CPU, is executing with the MMU still disabled. That means all addresses must be physical addresses. This is where things myght be tricky. And then that code should not work if invoked from C code because it then has to deal with virtual addresses. So if you tested the assembly code by calling it from C code and it worked then the assembly code is wrong. To be sure please post the code you tested (mine wasn't complete) so we could tell you if it is right. > cpu_v7_reset() likes to give a reset pulse signal to CPU core logic. > The operation on SC_CPU_RESET_REQ register likes make CPU out of reset > mode. After system is power on, all CPUs except for CPU0 stay in reset > mode. Sorry, I don't fully understand the above. I also note in your code that you write the same bits to SC_CPU_RESET_REQ in both the power_up and power_down methods. So if this is about sending a reset pulse only, how do you keep a CPU down for a long period? Nicolas
diff --git a/arch/arm/mach-hisi/Makefile b/arch/arm/mach-hisi/Makefile index 2ae1b59..e7a8640 100644 --- a/arch/arm/mach-hisi/Makefile +++ b/arch/arm/mach-hisi/Makefile @@ -3,4 +3,5 @@ # obj-y += hisilicon.o +obj-$(CONFIG_MCPM) += platmcpm.o obj-$(CONFIG_SMP) += platsmp.o hotplug.o diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c new file mode 100644 index 0000000..3b42977 --- /dev/null +++ b/arch/arm/mach-hisi/platmcpm.c @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2013-2014 Linaro Ltd. + * Copyright (c) 2013-2014 Hisilicon Limited. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + */ +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/of_address.h> + +#include <asm/cputype.h> +#include <asm/cp15.h> +#include <asm/mcpm.h> + +#include "core.h" + +/* bits definition in SC_CPU_RESET_REQ[x]/SC_CPU_RESET_DREQ[x] + * 1 -- unreset; 0 -- reset + */ +#define CORE_RESET_BIT(x) (1 << x) +#define NEON_RESET_BIT(x) (1 << (x + 4)) +#define CORE_DEBUG_RESET_BIT(x) (1 << (x + 9)) +#define CLUSTER_L2_RESET_BIT (1 << 8) +#define CLUSTER_DEBUG_RESET_BIT (1 << 13) + +/* + * bits definition in SC_CPU_RESET_STATUS[x] + * 1 -- reset status; 0 -- unreset status + */ +#define CORE_RESET_STATUS(x) (1 << x) +#define NEON_RESET_STATUS(x) (1 << (x + 4)) +#define CORE_DEBUG_RESET_STATUS(x) (1 << (x + 9)) +#define CLUSTER_L2_RESET_STATUS (1 << 8) +#define CLUSTER_DEBUG_RESET_STATUS (1 << 13) +#define CORE_WFI_STATUS(x) (1 << (x + 16)) +#define CORE_WFE_STATUS(x) (1 << (x + 20)) +#define CORE_DEBUG_ACK(x) (1 << (x + 24)) + +#define SC_CPU_RESET_REQ(x) (0x520 + (x << 3)) /* reset */ +#define SC_CPU_RESET_DREQ(x) (0x524 + (x << 3)) /* unreset */ +#define SC_CPU_RESET_STATUS(x) (0x1520 + (x << 3)) + +#define FAB_SF_MODE 0x0c +#define FAB_SF_INVLD 0x10 + +/* bits definition in FB_SF_INVLD */ +#define FB_SF_INVLD_START (1 << 8) + +#define HIP04_MAX_CLUSTERS 4 +#define HIP04_MAX_CPUS_PER_CLUSTER 4 + +#define POLL_MSEC 10 +#define TIMEOUT_MSEC 1000 + +struct hip04_secondary_cpu_data { + u32 bootwrapper_phys; + u32 bootwrapper_size; + u32 bootwrapper_magic; + u32 relocation_entry; + u32 relocation_size; +}; + +static void __iomem *relocation, *sysctrl, *fabric; +static int hip04_cpu_table[HIP04_MAX_CLUSTERS][HIP04_MAX_CPUS_PER_CLUSTER]; +static DEFINE_SPINLOCK(boot_lock); +static struct hip04_secondary_cpu_data hip04_boot; + +static bool hip04_cluster_down(unsigned int cluster) +{ + int i; + + for (i = 0; i < HIP04_MAX_CPUS_PER_CLUSTER; i++) + if (hip04_cpu_table[cluster][i]) + return false; + return true; +} + +static void hip04_set_snoop_filter(unsigned int cluster, unsigned int on) +{ + unsigned long data; + + if (!fabric) + return; + data = readl_relaxed(fabric + FAB_SF_MODE); + if (on) + data |= 1 << cluster; + else + data &= ~(1 << cluster); + writel_relaxed(data, fabric + FAB_SF_MODE); + while (1) { + if (data == readl_relaxed(fabric + FAB_SF_MODE)) + break; + } +} + +static int hip04_mcpm_power_up(unsigned int cpu, unsigned int cluster) +{ + unsigned long data, mask; + + if (!relocation || !sysctrl) + return -ENODEV; + if (cluster >= HIP04_MAX_CLUSTERS || cpu >= HIP04_MAX_CPUS_PER_CLUSTER) + return -EINVAL; + + spin_lock_irq(&boot_lock); + writel_relaxed(hip04_boot.bootwrapper_phys, relocation); + writel_relaxed(hip04_boot.bootwrapper_magic, relocation + 4); + writel_relaxed(virt_to_phys(mcpm_entry_point), relocation + 8); + writel_relaxed(0, relocation + 12); + + if (hip04_cluster_down(cluster)) { + data = CLUSTER_DEBUG_RESET_BIT; + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); + do { + mask = CLUSTER_DEBUG_RESET_STATUS; + data = readl_relaxed(sysctrl + \ + SC_CPU_RESET_STATUS(cluster)); + } while (data & mask); + hip04_set_snoop_filter(cluster, 1); + } + + hip04_cpu_table[cluster][cpu]++; + + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ + CORE_DEBUG_RESET_BIT(cpu); + writel_relaxed(data, sysctrl + SC_CPU_RESET_DREQ(cluster)); + spin_unlock_irq(&boot_lock); + msleep(POLL_MSEC); + + return 0; +} + +static void hip04_mcpm_power_down(void) +{ + unsigned int mpidr, cpu, cluster, data = 0; + bool skip_reset = false; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + __mcpm_cpu_going_down(cpu, cluster); + + spin_lock(&boot_lock); + BUG_ON(__mcpm_cluster_state(cluster) != CLUSTER_UP); + hip04_cpu_table[cluster][cpu]--; + if (hip04_cpu_table[cluster][cpu] == 1) { + /* A power_up request went ahead of us. */ + skip_reset = true; + } else if (hip04_cpu_table[cluster][cpu] > 1) { + pr_err("Cluster %d CPU%d is still running\n", cluster, cpu); + BUG(); + } + + spin_unlock(&boot_lock); + + v7_exit_coherency_flush(louis); + + __mcpm_cpu_down(cpu, cluster); + + if (!skip_reset) { + data = CORE_RESET_BIT(cpu) | NEON_RESET_BIT(cpu) | \ + CORE_DEBUG_RESET_BIT(cpu); + writel_relaxed(data, sysctrl + SC_CPU_RESET_REQ(cluster)); + } +} + +static int hip04_mcpm_wait_for_powerdown(unsigned int cpu, unsigned int cluster) +{ + unsigned int data, tries; + + BUG_ON(cluster >= HIP04_MAX_CLUSTERS || + cpu >= HIP04_MAX_CPUS_PER_CLUSTER); + + for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; tries++) { + data = readl_relaxed(sysctrl + SC_CPU_RESET_STATUS(cluster)); + if (!(data & CORE_RESET_STATUS(cpu))) { + msleep(POLL_MSEC); + continue; + } + return 0; + } + return -ETIMEDOUT; +} + +static void hip04_mcpm_powered_up(void) +{ + if (!relocation) + return; + spin_lock(&boot_lock); + writel_relaxed(0, relocation); + writel_relaxed(0, relocation + 4); + writel_relaxed(0, relocation + 8); + writel_relaxed(0, relocation + 12); + spin_unlock(&boot_lock); +} + +static const struct mcpm_platform_ops hip04_mcpm_ops = { + .power_up = hip04_mcpm_power_up, + .power_down = hip04_mcpm_power_down, + .wait_for_powerdown = hip04_mcpm_wait_for_powerdown, + .powered_up = hip04_mcpm_powered_up, +}; + +static bool __init hip04_cpu_table_init(void) +{ + unsigned int mpidr, cpu, cluster; + + mpidr = read_cpuid_mpidr(); + cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + + if (cluster >= HIP04_MAX_CLUSTERS || + cpu >= HIP04_MAX_CPUS_PER_CLUSTER) { + pr_err("%s: boot CPU is out of bound!\n", __func__); + return false; + } + hip04_set_snoop_filter(cluster, 1); + hip04_cpu_table[cluster][cpu] = 1; + return true; +} + +static int __init hip04_mcpm_init(void) +{ + struct device_node *np, *np_fab; + int ret = -ENODEV; + + np = of_find_compatible_node(NULL, NULL, "hisilicon,sysctrl"); + if (!np) + goto err; + np_fab = of_find_compatible_node(NULL, NULL, "hisilicon,hip04-fabric"); + if (!np_fab) + goto err; + + if (of_property_read_u32(np, "bootwrapper-phys", + &hip04_boot.bootwrapper_phys)) { + pr_err("failed to get bootwrapper-phys\n"); + ret = -EINVAL; + goto err; + } + if (of_property_read_u32(np, "bootwrapper-size", + &hip04_boot.bootwrapper_size)) { + pr_err("failed to get bootwrapper-size\n"); + ret = -EINVAL; + goto err; + } + if (of_property_read_u32(np, "bootwrapper-magic", + &hip04_boot.bootwrapper_magic)) { + pr_err("failed to get bootwrapper-magic\n"); + ret = -EINVAL; + goto err; + } + if (of_property_read_u32(np, "relocation-entry", + &hip04_boot.relocation_entry)) { + pr_err("failed to get relocation-entry\n"); + ret = -EINVAL; + goto err; + } + if (of_property_read_u32(np, "relocation-size", + &hip04_boot.relocation_size)) { + pr_err("failed to get relocation-size\n"); + ret = -EINVAL; + goto err; + } + + relocation = ioremap(hip04_boot.relocation_entry, + hip04_boot.relocation_size); + if (!relocation) { + pr_err("failed to map relocation space\n"); + ret = -ENOMEM; + goto err; + } + sysctrl = of_iomap(np, 0); + if (!sysctrl) { + pr_err("failed to get sysctrl base\n"); + ret = -ENOMEM; + goto err_sysctrl; + } + fabric = of_iomap(np_fab, 0); + if (!fabric) { + pr_err("failed to get fabric base\n"); + ret = -ENOMEM; + goto err_fabric; + } + + if (!hip04_cpu_table_init()) + return -EINVAL; + ret = mcpm_platform_register(&hip04_mcpm_ops); + if (!ret) { + mcpm_sync_init(NULL); + pr_info("HiP04 MCPM initialized\n"); + } + mcpm_smp_set_ops(); + return ret; +err_fabric: + iounmap(sysctrl); +err_sysctrl: + iounmap(relocation); +err: + return ret; +} +early_initcall(hip04_mcpm_init);
Multiple CPU clusters are used in Hisilicon HiP04 SoC. Now use MCPM framework to manage power on HiP04 SoC. Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org> --- arch/arm/mach-hisi/Makefile | 1 + arch/arm/mach-hisi/platmcpm.c | 304 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 305 insertions(+) create mode 100644 arch/arm/mach-hisi/platmcpm.c