Message ID | 20211204002038.113653-4-atishp@atishpatra.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Sparse HART id support | expand |
On Sat, Dec 4, 2021 at 5:51 AM Atish Patra <atishp@atishpatra.org> wrote: > > From: Atish Patra <atishp@rivosinc.com> > > The __cpu_up_stack/task_pointer array is only used for spinwait method > now. The per cpu array based lookup is also fragile for platforms with > discontiguous/sparse hartids. The spinwait method is only used for > M-mode Linux or older firmwares without SBI HSM extension. For general > Linux systems, ordered booting method is preferred anyways to support > cpu hotplug and kexec. > > Make sure that __cpu_up_stack/task_pointer is only used for spinwait > method. Take this opportunity to rename it to > __cpu_spinwait_stack/task_pointer to emphasize the purpose as well. > > Signed-off-by: Atish Patra <atishp@rivosinc.com> Looks good to me. Reviewed-by: Anup Patel <anup@brainfault.org> Regards, Anup > --- > arch/riscv/include/asm/cpu_ops.h | 2 -- > arch/riscv/kernel/cpu_ops.c | 16 ---------------- > arch/riscv/kernel/cpu_ops_spinwait.c | 27 ++++++++++++++++++++++++++- > arch/riscv/kernel/head.S | 4 ++-- > arch/riscv/kernel/head.h | 4 ++-- > 5 files changed, 30 insertions(+), 23 deletions(-) > > diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h > index a8ec3c5c1bd2..134590f1b843 100644 > --- a/arch/riscv/include/asm/cpu_ops.h > +++ b/arch/riscv/include/asm/cpu_ops.h > @@ -40,7 +40,5 @@ struct cpu_operations { > > extern const struct cpu_operations *cpu_ops[NR_CPUS]; > void __init cpu_set_ops(int cpu); > -void cpu_update_secondary_bootdata(unsigned int cpuid, > - struct task_struct *tidle); > > #endif /* ifndef __ASM_CPU_OPS_H */ > diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c > index 3f5a38b03044..c1e30f403c3b 100644 > --- a/arch/riscv/kernel/cpu_ops.c > +++ b/arch/riscv/kernel/cpu_ops.c > @@ -8,31 +8,15 @@ > #include <linux/of.h> > #include <linux/string.h> > #include <linux/sched.h> > -#include <linux/sched/task_stack.h> > #include <asm/cpu_ops.h> > #include <asm/sbi.h> > #include <asm/smp.h> > > const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; > > -void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); > -void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); > - > extern const struct cpu_operations cpu_ops_sbi; > extern const struct cpu_operations cpu_ops_spinwait; > > -void cpu_update_secondary_bootdata(unsigned int cpuid, > - struct task_struct *tidle) > -{ > - int hartid = cpuid_to_hartid_map(cpuid); > - > - /* Make sure tidle is updated */ > - smp_mb(); > - WRITE_ONCE(__cpu_up_stack_pointer[hartid], > - task_stack_page(tidle) + THREAD_SIZE); > - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); > -} > - > void __init cpu_set_ops(int cpuid) > { > #if IS_ENABLED(CONFIG_RISCV_SBI) > diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c > index b2c957bb68c1..9f398eb94f7a 100644 > --- a/arch/riscv/kernel/cpu_ops_spinwait.c > +++ b/arch/riscv/kernel/cpu_ops_spinwait.c > @@ -6,11 +6,36 @@ > #include <linux/errno.h> > #include <linux/of.h> > #include <linux/string.h> > +#include <linux/sched/task_stack.h> > #include <asm/cpu_ops.h> > #include <asm/sbi.h> > #include <asm/smp.h> > > const struct cpu_operations cpu_ops_spinwait; > +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); > +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); > + > +static void cpu_update_secondary_bootdata(unsigned int cpuid, > + struct task_struct *tidle) > +{ > + int hartid = cpuid_to_hartid_map(cpuid); > + > + /* > + * The hartid must be less than NR_CPUS to avoid out-of-bound access > + * errors for __cpu_spinwait_stack/task_pointer. That is not always possible > + * for platforms with discontiguous hartid numbering scheme. That's why > + * spinwait booting is not the recommended approach for any platforms > + * and will be removed in future. > + */ > + if (hartid == INVALID_HARTID || hartid >= NR_CPUS) > + return; > + > + /* Make sure tidle is updated */ > + smp_mb(); > + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], > + task_stack_page(tidle) + THREAD_SIZE); > + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); > +} > > static int spinwait_cpu_prepare(unsigned int cpuid) > { > @@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) > * selects the first cpu to boot the kernel and causes the remainder > * of the cpus to spin in a loop waiting for their stack pointer to be > * setup by that main cpu. Writing to bootdata > - * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they > + * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they > * can continue the boot process. > */ > cpu_update_secondary_bootdata(cpuid, tidle); > diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S > index 40d4c625513c..6f8e99eac6a1 100644 > --- a/arch/riscv/kernel/head.S > +++ b/arch/riscv/kernel/head.S > @@ -347,9 +347,9 @@ clear_bss_done: > csrw CSR_TVEC, a3 > > slli a3, a0, LGREG > - la a1, __cpu_up_stack_pointer > + la a1, __cpu_spinwait_stack_pointer > XIP_FIXUP_OFFSET a1 > - la a2, __cpu_up_task_pointer > + la a2, __cpu_spinwait_task_pointer > XIP_FIXUP_OFFSET a2 > add a1, a3, a1 > add a2, a3, a2 > diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h > index aabbc3ac3e48..5393cca77790 100644 > --- a/arch/riscv/kernel/head.h > +++ b/arch/riscv/kernel/head.h > @@ -16,7 +16,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); > asmlinkage void __init __copy_data(void); > #endif > > -extern void *__cpu_up_stack_pointer[]; > -extern void *__cpu_up_task_pointer[]; > +extern void *__cpu_spinwait_stack_pointer[]; > +extern void *__cpu_spinwait_task_pointer[]; > > #endif /* __ASM_HEAD_H */ > -- > 2.33.1 >
On 2021-12-04 00:20, Atish Patra wrote: > From: Atish Patra <atishp@rivosinc.com> > > The __cpu_up_stack/task_pointer array is only used for spinwait method > now. The per cpu array based lookup is also fragile for platforms with > discontiguous/sparse hartids. The spinwait method is only used for > M-mode Linux or older firmwares without SBI HSM extension. For general > Linux systems, ordered booting method is preferred anyways to support > cpu hotplug and kexec. > > Make sure that __cpu_up_stack/task_pointer is only used for spinwait > method. Take this opportunity to rename it to > __cpu_spinwait_stack/task_pointer to emphasize the purpose as well. > > Signed-off-by: Atish Patra <atishp@rivosinc.com> > --- > arch/riscv/include/asm/cpu_ops.h | 2 -- > arch/riscv/kernel/cpu_ops.c | 16 ---------------- > arch/riscv/kernel/cpu_ops_spinwait.c | 27 ++++++++++++++++++++++++++- > arch/riscv/kernel/head.S | 4 ++-- > arch/riscv/kernel/head.h | 4 ++-- > 5 files changed, 30 insertions(+), 23 deletions(-) > > diff --git a/arch/riscv/include/asm/cpu_ops.h > b/arch/riscv/include/asm/cpu_ops.h > index a8ec3c5c1bd2..134590f1b843 100644 > --- a/arch/riscv/include/asm/cpu_ops.h > +++ b/arch/riscv/include/asm/cpu_ops.h > @@ -40,7 +40,5 @@ struct cpu_operations { > > extern const struct cpu_operations *cpu_ops[NR_CPUS]; > void __init cpu_set_ops(int cpu); > -void cpu_update_secondary_bootdata(unsigned int cpuid, > - struct task_struct *tidle); > > #endif /* ifndef __ASM_CPU_OPS_H */ > diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c > index 3f5a38b03044..c1e30f403c3b 100644 > --- a/arch/riscv/kernel/cpu_ops.c > +++ b/arch/riscv/kernel/cpu_ops.c > @@ -8,31 +8,15 @@ > #include <linux/of.h> > #include <linux/string.h> > #include <linux/sched.h> > -#include <linux/sched/task_stack.h> > #include <asm/cpu_ops.h> > #include <asm/sbi.h> > #include <asm/smp.h> > > const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; > > -void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); > -void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); > - > extern const struct cpu_operations cpu_ops_sbi; > extern const struct cpu_operations cpu_ops_spinwait; > > -void cpu_update_secondary_bootdata(unsigned int cpuid, > - struct task_struct *tidle) > -{ > - int hartid = cpuid_to_hartid_map(cpuid); > - > - /* Make sure tidle is updated */ > - smp_mb(); > - WRITE_ONCE(__cpu_up_stack_pointer[hartid], > - task_stack_page(tidle) + THREAD_SIZE); > - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); > -} > - > void __init cpu_set_ops(int cpuid) > { > #if IS_ENABLED(CONFIG_RISCV_SBI) > diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c > b/arch/riscv/kernel/cpu_ops_spinwait.c > index b2c957bb68c1..9f398eb94f7a 100644 > --- a/arch/riscv/kernel/cpu_ops_spinwait.c > +++ b/arch/riscv/kernel/cpu_ops_spinwait.c > @@ -6,11 +6,36 @@ > #include <linux/errno.h> > #include <linux/of.h> > #include <linux/string.h> > +#include <linux/sched/task_stack.h> > #include <asm/cpu_ops.h> > #include <asm/sbi.h> > #include <asm/smp.h> > > const struct cpu_operations cpu_ops_spinwait; > +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); > +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); > + > +static void cpu_update_secondary_bootdata(unsigned int cpuid, > + struct task_struct *tidle) > +{ > + int hartid = cpuid_to_hartid_map(cpuid); > + > + /* > + * The hartid must be less than NR_CPUS to avoid out-of-bound access > + * errors for __cpu_spinwait_stack/task_pointer. That is not always > possible > + * for platforms with discontiguous hartid numbering scheme. That's > why > + * spinwait booting is not the recommended approach for any platforms > + * and will be removed in future. How can you do that? Yes, spinning schemes are terrible. However, once you started supporting them, you are stuck. Best case, you can have an allow-list and only allow some older platforms to use them. You can also make some features dependent on non-spin schemes (kexec being one). But dropping support isn't a valid option, I'm afraid. Thanks, M.
On Mon, Dec 13, 2021 at 4:59 AM Marc Zyngier <maz@kernel.org> wrote: > > On 2021-12-04 00:20, Atish Patra wrote: > > From: Atish Patra <atishp@rivosinc.com> > > > > The __cpu_up_stack/task_pointer array is only used for spinwait method > > now. The per cpu array based lookup is also fragile for platforms with > > discontiguous/sparse hartids. The spinwait method is only used for > > M-mode Linux or older firmwares without SBI HSM extension. For general > > Linux systems, ordered booting method is preferred anyways to support > > cpu hotplug and kexec. > > > > Make sure that __cpu_up_stack/task_pointer is only used for spinwait > > method. Take this opportunity to rename it to > > __cpu_spinwait_stack/task_pointer to emphasize the purpose as well. > > > > Signed-off-by: Atish Patra <atishp@rivosinc.com> > > --- > > arch/riscv/include/asm/cpu_ops.h | 2 -- > > arch/riscv/kernel/cpu_ops.c | 16 ---------------- > > arch/riscv/kernel/cpu_ops_spinwait.c | 27 ++++++++++++++++++++++++++- > > arch/riscv/kernel/head.S | 4 ++-- > > arch/riscv/kernel/head.h | 4 ++-- > > 5 files changed, 30 insertions(+), 23 deletions(-) > > > > diff --git a/arch/riscv/include/asm/cpu_ops.h > > b/arch/riscv/include/asm/cpu_ops.h > > index a8ec3c5c1bd2..134590f1b843 100644 > > --- a/arch/riscv/include/asm/cpu_ops.h > > +++ b/arch/riscv/include/asm/cpu_ops.h > > @@ -40,7 +40,5 @@ struct cpu_operations { > > > > extern const struct cpu_operations *cpu_ops[NR_CPUS]; > > void __init cpu_set_ops(int cpu); > > -void cpu_update_secondary_bootdata(unsigned int cpuid, > > - struct task_struct *tidle); > > > > #endif /* ifndef __ASM_CPU_OPS_H */ > > diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c > > index 3f5a38b03044..c1e30f403c3b 100644 > > --- a/arch/riscv/kernel/cpu_ops.c > > +++ b/arch/riscv/kernel/cpu_ops.c > > @@ -8,31 +8,15 @@ > > #include <linux/of.h> > > #include <linux/string.h> > > #include <linux/sched.h> > > -#include <linux/sched/task_stack.h> > > #include <asm/cpu_ops.h> > > #include <asm/sbi.h> > > #include <asm/smp.h> > > > > const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; > > > > -void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); > > -void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); > > - > > extern const struct cpu_operations cpu_ops_sbi; > > extern const struct cpu_operations cpu_ops_spinwait; > > > > -void cpu_update_secondary_bootdata(unsigned int cpuid, > > - struct task_struct *tidle) > > -{ > > - int hartid = cpuid_to_hartid_map(cpuid); > > - > > - /* Make sure tidle is updated */ > > - smp_mb(); > > - WRITE_ONCE(__cpu_up_stack_pointer[hartid], > > - task_stack_page(tidle) + THREAD_SIZE); > > - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); > > -} > > - > > void __init cpu_set_ops(int cpuid) > > { > > #if IS_ENABLED(CONFIG_RISCV_SBI) > > diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c > > b/arch/riscv/kernel/cpu_ops_spinwait.c > > index b2c957bb68c1..9f398eb94f7a 100644 > > --- a/arch/riscv/kernel/cpu_ops_spinwait.c > > +++ b/arch/riscv/kernel/cpu_ops_spinwait.c > > @@ -6,11 +6,36 @@ > > #include <linux/errno.h> > > #include <linux/of.h> > > #include <linux/string.h> > > +#include <linux/sched/task_stack.h> > > #include <asm/cpu_ops.h> > > #include <asm/sbi.h> > > #include <asm/smp.h> > > > > const struct cpu_operations cpu_ops_spinwait; > > +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); > > +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); > > + > > +static void cpu_update_secondary_bootdata(unsigned int cpuid, > > + struct task_struct *tidle) > > +{ > > + int hartid = cpuid_to_hartid_map(cpuid); > > + > > + /* > > + * The hartid must be less than NR_CPUS to avoid out-of-bound access > > + * errors for __cpu_spinwait_stack/task_pointer. That is not always > > possible > > + * for platforms with discontiguous hartid numbering scheme. That's > > why > > + * spinwait booting is not the recommended approach for any platforms > > + * and will be removed in future. > > How can you do that? Yes, spinning schemes are terrible. > However, once you started supporting them, you are stuck. > This was a typo. It is supposed to say "disabled" in the future based on other configs. For now, it is enabled by default. Any platform with sparse hartid needs to disable it in their own config. In the future, we may be able to make it only available to M-mode Linux if we can ensure that nobody is running older firmware without SBI HSM extension. > Best case, you can have an allow-list and only allow some > older platforms to use them. You can also make some features > dependent on non-spin schemes (kexec being one). > > But dropping support isn't a valid option, I'm afraid. > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny...
diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h index a8ec3c5c1bd2..134590f1b843 100644 --- a/arch/riscv/include/asm/cpu_ops.h +++ b/arch/riscv/include/asm/cpu_ops.h @@ -40,7 +40,5 @@ struct cpu_operations { extern const struct cpu_operations *cpu_ops[NR_CPUS]; void __init cpu_set_ops(int cpu); -void cpu_update_secondary_bootdata(unsigned int cpuid, - struct task_struct *tidle); #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 3f5a38b03044..c1e30f403c3b 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -8,31 +8,15 @@ #include <linux/of.h> #include <linux/string.h> #include <linux/sched.h> -#include <linux/sched/task_stack.h> #include <asm/cpu_ops.h> #include <asm/sbi.h> #include <asm/smp.h> const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); -void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); - extern const struct cpu_operations cpu_ops_sbi; extern const struct cpu_operations cpu_ops_spinwait; -void cpu_update_secondary_bootdata(unsigned int cpuid, - struct task_struct *tidle) -{ - int hartid = cpuid_to_hartid_map(cpuid); - - /* Make sure tidle is updated */ - smp_mb(); - WRITE_ONCE(__cpu_up_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); -} - void __init cpu_set_ops(int cpuid) { #if IS_ENABLED(CONFIG_RISCV_SBI) diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index b2c957bb68c1..9f398eb94f7a 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -6,11 +6,36 @@ #include <linux/errno.h> #include <linux/of.h> #include <linux/string.h> +#include <linux/sched/task_stack.h> #include <asm/cpu_ops.h> #include <asm/sbi.h> #include <asm/smp.h> const struct cpu_operations cpu_ops_spinwait; +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); + +static void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle) +{ + int hartid = cpuid_to_hartid_map(cpuid); + + /* + * The hartid must be less than NR_CPUS to avoid out-of-bound access + * errors for __cpu_spinwait_stack/task_pointer. That is not always possible + * for platforms with discontiguous hartid numbering scheme. That's why + * spinwait booting is not the recommended approach for any platforms + * and will be removed in future. + */ + if (hartid == INVALID_HARTID || hartid >= NR_CPUS) + return; + + /* Make sure tidle is updated */ + smp_mb(); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); +} static int spinwait_cpu_prepare(unsigned int cpuid) { @@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) * selects the first cpu to boot the kernel and causes the remainder * of the cpus to spin in a loop waiting for their stack pointer to be * setup by that main cpu. Writing to bootdata - * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they + * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they * can continue the boot process. */ cpu_update_secondary_bootdata(cpuid, tidle); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 40d4c625513c..6f8e99eac6a1 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -347,9 +347,9 @@ clear_bss_done: csrw CSR_TVEC, a3 slli a3, a0, LGREG - la a1, __cpu_up_stack_pointer + la a1, __cpu_spinwait_stack_pointer XIP_FIXUP_OFFSET a1 - la a2, __cpu_up_task_pointer + la a2, __cpu_spinwait_task_pointer XIP_FIXUP_OFFSET a2 add a1, a3, a1 add a2, a3, a2 diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index aabbc3ac3e48..5393cca77790 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -16,7 +16,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); asmlinkage void __init __copy_data(void); #endif -extern void *__cpu_up_stack_pointer[]; -extern void *__cpu_up_task_pointer[]; +extern void *__cpu_spinwait_stack_pointer[]; +extern void *__cpu_spinwait_task_pointer[]; #endif /* __ASM_HEAD_H */