Message ID | 20220925175356.681-3-jszhang@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | riscv: entry: further clean up and VMAP_STACK fix | expand |
On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(), > the only difference is whether call the fn(arg) or not, this can be > acchieved by testing fn is NULL or not, I.E s0 is 0 or not. > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > --- > arch/riscv/kernel/entry.S | 11 +++-------- > arch/riscv/kernel/process.c | 5 ++--- > 2 files changed, 5 insertions(+), 11 deletions(-) > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 2207cf44a3bc..a3e1ed2fa2ac 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow) > > ENTRY(ret_from_fork) > call schedule_tail > - move a0, sp /* pt_regs */ > - la ra, ret_from_exception > - tail syscall_exit_to_user_mode > -ENDPROC(ret_from_fork) > - > -ENTRY(ret_from_kernel_thread) > - call schedule_tail > + beqz s0, 1f /* not from kernel thread */ We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread. The s0=0 is also okay for ret_from_fork. /* p->thread holds context to be restored by __switch_to() */ if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; p->thread.ra = (unsigned long)ret_from_kernel_thread; p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; } else { *childregs = *(current_pt_regs()); ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ if (usp) /* User fork */ childregs->sp = usp; if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ p->thread.ra = (unsigned long)ret_from_fork; } p->thread.sp = (unsigned long)childregs; /* kernel sp */ > /* Call fn(arg) */ > move a0, s1 > jalr s0 > +1: > move a0, sp /* pt_regs */ > la ra, ret_from_exception > tail syscall_exit_to_user_mode > -ENDPROC(ret_from_kernel_thread) > +ENDPROC(ret_from_fork) > > #ifdef CONFIG_IRQ_STACKS > ENTRY(call_on_stack) > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index ceb9ebab6558..67e7cd123ceb 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -34,7 +34,6 @@ EXPORT_SYMBOL(__stack_chk_guard); > #endif > > extern asmlinkage void ret_from_fork(void); > -extern asmlinkage void ret_from_kernel_thread(void); > > void arch_cpu_idle(void) > { > @@ -172,7 +171,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > /* Supervisor/Machine, irqs on: */ > childregs->status = SR_PP | SR_PIE; > > - p->thread.ra = (unsigned long)ret_from_kernel_thread; > p->thread.s[0] = (unsigned long)args->fn; > p->thread.s[1] = (unsigned long)args->fn_arg; > } else { > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > if (clone_flags & CLONE_SETTLS) > childregs->tp = tls; > childregs->a0 = 0; /* Return value of fork() */ > - p->thread.ra = (unsigned long)ret_from_fork; > + p->thread.s[0] = 0; > } > + p->thread.ra = (unsigned long)ret_from_fork; > p->thread.sp = (unsigned long)childregs; /* kernel sp */ > return 0; > } > -- > 2.34.1 >
On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote: > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(), > > the only difference is whether call the fn(arg) or not, this can be > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not. > > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > > --- > > arch/riscv/kernel/entry.S | 11 +++-------- > > arch/riscv/kernel/process.c | 5 ++--- > > 2 files changed, 5 insertions(+), 11 deletions(-) > > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > index 2207cf44a3bc..a3e1ed2fa2ac 100644 > > --- a/arch/riscv/kernel/entry.S > > +++ b/arch/riscv/kernel/entry.S > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow) > > > > ENTRY(ret_from_fork) > > call schedule_tail > > - move a0, sp /* pt_regs */ > > - la ra, ret_from_exception > > - tail syscall_exit_to_user_mode > > -ENDPROC(ret_from_fork) > > - > > -ENTRY(ret_from_kernel_thread) > > - call schedule_tail > > + beqz s0, 1f /* not from kernel thread */ Hi Guo, > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread. > The s0=0 is also okay for ret_from_fork. IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in pt_regs. > > /* p->thread holds context to be restored by __switch_to() */ > if (unlikely(args->fn)) { > /* Kernel thread */ > memset(childregs, 0, sizeof(struct pt_regs)); > childregs->gp = gp_in_global; > /* Supervisor/Machine, irqs on: */ > childregs->status = SR_PP | SR_PIE; > > p->thread.ra = (unsigned long)ret_from_kernel_thread; > p->thread.s[0] = (unsigned long)args->fn; > p->thread.s[1] = (unsigned long)args->fn_arg; > } else { > *childregs = *(current_pt_regs()); > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > if (usp) /* User fork */ > childregs->sp = usp; > if (clone_flags & CLONE_SETTLS) > childregs->tp = tls; > childregs->a0 = 0; /* Return value of fork() */ > p->thread.ra = (unsigned long)ret_from_fork; > } > p->thread.sp = (unsigned long)childregs; /* kernel sp */ > <snip> > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > > if (clone_flags & CLONE_SETTLS) > > childregs->tp = tls; > > childregs->a0 = 0; /* Return value of fork() */ > > - p->thread.ra = (unsigned long)ret_from_fork; > > + p->thread.s[0] = 0; Here we assign 0 to p->thread.s[0] Thanks
On Tue, Sep 27, 2022 at 12:14 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote: > > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(), > > > the only difference is whether call the fn(arg) or not, this can be > > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not. > > > > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > > > --- > > > arch/riscv/kernel/entry.S | 11 +++-------- > > > arch/riscv/kernel/process.c | 5 ++--- > > > 2 files changed, 5 insertions(+), 11 deletions(-) > > > > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > > index 2207cf44a3bc..a3e1ed2fa2ac 100644 > > > --- a/arch/riscv/kernel/entry.S > > > +++ b/arch/riscv/kernel/entry.S > > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow) > > > > > > ENTRY(ret_from_fork) > > > call schedule_tail > > > - move a0, sp /* pt_regs */ > > > - la ra, ret_from_exception > > > - tail syscall_exit_to_user_mode > > > -ENDPROC(ret_from_fork) > > > - > > > -ENTRY(ret_from_kernel_thread) > > > - call schedule_tail > > > + beqz s0, 1f /* not from kernel thread */ > > Hi Guo, > > > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread. > > The s0=0 is also okay for ret_from_fork. > > IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in > pt_regs. Yes, you are correct. > > > > > /* p->thread holds context to be restored by __switch_to() */ > > if (unlikely(args->fn)) { > > /* Kernel thread */ > > memset(childregs, 0, sizeof(struct pt_regs)); > > childregs->gp = gp_in_global; > > /* Supervisor/Machine, irqs on: */ > > childregs->status = SR_PP | SR_PIE; > > > > p->thread.ra = (unsigned long)ret_from_kernel_thread; > > p->thread.s[0] = (unsigned long)args->fn; > > p->thread.s[1] = (unsigned long)args->fn_arg; > > } else { > > *childregs = *(current_pt_regs()); > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Oh, I'm wrong, It's switch_to_restore -> exception_restore. > > if (usp) /* User fork */ > > childregs->sp = usp; > > if (clone_flags & CLONE_SETTLS) > > childregs->tp = tls; > > childregs->a0 = 0; /* Return value of fork() */ > > p->thread.ra = (unsigned long)ret_from_fork; > > } > > p->thread.sp = (unsigned long)childregs; /* kernel sp */ > > > > <snip> > > > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > > > if (clone_flags & CLONE_SETTLS) > > > childregs->tp = tls; > > > childregs->a0 = 0; /* Return value of fork() */ > > > - p->thread.ra = (unsigned long)ret_from_fork; > > > + p->thread.s[0] = 0; > > Here we assign 0 to p->thread.s[0] I missed that. Merge thread & fork is not a good idea, and using fp as the flag is so implicit. ➜ linux git:(rv64sv32) grep ret_from_fork arch -r | grep entry.S arch/arc/kernel/entry.S:ENTRY(ret_from_fork) arch/arc/kernel/entry.S:END(ret_from_fork) arch/csky/kernel/entry.S:ENTRY(ret_from_fork) arch/x86/kernel/process_32.c: * the task-switch, and shows up in ret_from_fork in entry.S, arch/alpha/kernel/entry.S: .globl ret_from_fork arch/alpha/kernel/entry.S: .ent ret_from_fork arch/alpha/kernel/entry.S:ret_from_fork: arch/alpha/kernel/entry.S:.end ret_from_fork arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_fork) arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_fork) arch/hexagon/kernel/vm_entry.S: .globl ret_from_fork arch/hexagon/kernel/vm_entry.S:ret_from_fork: arch/microblaze/kernel/entry.S: (copy_thread makes ret_from_fork the return address in each new thread's arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_fork): arch/m68k/kernel/entry.S:ENTRY(ret_from_fork) arch/arm64/kernel/entry.S:SYM_CODE_START(ret_from_fork) arch/arm64/kernel/entry.S:SYM_CODE_END(ret_from_fork) arch/arm64/kernel/entry.S:NOKPROBE(ret_from_fork) arch/riscv/kernel/entry.S:ENTRY(ret_from_fork) arch/riscv/kernel/entry.S:ENDPROC(ret_from_fork) arch/s390/kernel/entry.S:# a new process exits the kernel with ret_from_fork arch/s390/kernel/entry.S:ENTRY(ret_from_fork) arch/s390/kernel/entry.S: brasl %r14,__ret_from_fork arch/s390/kernel/entry.S:ENDPROC(ret_from_fork) arch/mips/kernel/entry.S:FEXPORT(ret_from_fork) arch/openrisc/kernel/entry.S: /* All syscalls return here... just pay attention to ret_from_fork arch/openrisc/kernel/entry.S:ENTRY(ret_from_fork) arch/openrisc/kernel/entry.S: * that may be either schedule(), ret_from_fork(), or arch/nios2/kernel/entry.S:ENTRY(ret_from_fork) arch/xtensa/kernel/entry.S:ENTRY(ret_from_fork) arch/xtensa/kernel/entry.S:ENDPROC(ret_from_fork) arch/sparc/kernel/entry.S: .globl ret_from_fork arch/sparc/kernel/entry.S:ret_from_fork: ➜ linux git:(rv64sv32) grep ret_from_kernel_thread arch -r | grep entry.S arch/csky/kernel/entry.S:ENTRY(ret_from_kernel_thread) arch/alpha/kernel/entry.S: .globl ret_from_kernel_thread arch/alpha/kernel/entry.S: .ent ret_from_kernel_thread arch/alpha/kernel/entry.S:ret_from_kernel_thread: arch/alpha/kernel/entry.S:.end ret_from_kernel_thread arch/parisc/kernel/entry.S:ENTRY(ret_from_kernel_thread) arch/parisc/kernel/entry.S:END(ret_from_kernel_thread) arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_kernel_thread) arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_kernel_thread) arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_kernel_thread): arch/m68k/kernel/entry.S:ENTRY(ret_from_kernel_thread) arch/riscv/kernel/entry.S:ENTRY(ret_from_kernel_thread) arch/riscv/kernel/entry.S:ENDPROC(ret_from_kernel_thread) arch/mips/kernel/entry.S:FEXPORT(ret_from_kernel_thread) arch/openrisc/kernel/entry.S: * ret_from_kernel_thread(). If we are returning to a new thread, arch/nios2/kernel/entry.S:ENTRY(ret_from_kernel_thread) arch/xtensa/kernel/entry.S:ENTRY(ret_from_kernel_thread) arch/xtensa/kernel/entry.S:ENDPROC(ret_from_kernel_thread) arch/sparc/kernel/entry.S: .globl ret_from_kernel_thread arch/sparc/kernel/entry.S:ret_from_kernel_thread: Many architectures use a similar style. If you want to continue the patch, I think you should first rename ret_from_fork properly, and give an explicit flag definition, not just setting fp = 0. > > Thanks
On Tue, Sep 27, 2022 at 07:55:27AM +0800, Guo Ren wrote: > On Tue, Sep 27, 2022 at 12:14 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote: > > > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(), > > > > the only difference is whether call the fn(arg) or not, this can be > > > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not. > > > > > > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > > > > --- > > > > arch/riscv/kernel/entry.S | 11 +++-------- > > > > arch/riscv/kernel/process.c | 5 ++--- > > > > 2 files changed, 5 insertions(+), 11 deletions(-) > > > > > > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > > > index 2207cf44a3bc..a3e1ed2fa2ac 100644 > > > > --- a/arch/riscv/kernel/entry.S > > > > +++ b/arch/riscv/kernel/entry.S > > > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow) > > > > > > > > ENTRY(ret_from_fork) > > > > call schedule_tail > > > > - move a0, sp /* pt_regs */ > > > > - la ra, ret_from_exception > > > > - tail syscall_exit_to_user_mode > > > > -ENDPROC(ret_from_fork) > > > > - > > > > -ENTRY(ret_from_kernel_thread) > > > > - call schedule_tail > > > > + beqz s0, 1f /* not from kernel thread */ > > > > Hi Guo, > > > > > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread. > > > The s0=0 is also okay for ret_from_fork. > > > > IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in > > pt_regs. > Yes, you are correct. > > > > > > > > > /* p->thread holds context to be restored by __switch_to() */ > > > if (unlikely(args->fn)) { > > > /* Kernel thread */ > > > memset(childregs, 0, sizeof(struct pt_regs)); > > > childregs->gp = gp_in_global; > > > /* Supervisor/Machine, irqs on: */ > > > childregs->status = SR_PP | SR_PIE; > > > > > > p->thread.ra = (unsigned long)ret_from_kernel_thread; > > > p->thread.s[0] = (unsigned long)args->fn; > > > p->thread.s[1] = (unsigned long)args->fn_arg; > > > } else { > > > *childregs = *(current_pt_regs()); > > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > Oh, I'm wrong, It's switch_to_restore -> exception_restore. > > > > if (usp) /* User fork */ > > > childregs->sp = usp; > > > if (clone_flags & CLONE_SETTLS) > > > childregs->tp = tls; > > > childregs->a0 = 0; /* Return value of fork() */ > > > p->thread.ra = (unsigned long)ret_from_fork; > > > } > > > p->thread.sp = (unsigned long)childregs; /* kernel sp */ > > > > > > > <snip> > > > > > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > > > > if (clone_flags & CLONE_SETTLS) > > > > childregs->tp = tls; > > > > childregs->a0 = 0; /* Return value of fork() */ > > > > - p->thread.ra = (unsigned long)ret_from_fork; > > > > + p->thread.s[0] = 0; > > > > Here we assign 0 to p->thread.s[0] > I missed that. > > Merge thread & fork is not a good idea, and using fp as the flag is so implicit. > > ➜ linux git:(rv64sv32) grep ret_from_fork arch -r | grep entry.S > arch/arc/kernel/entry.S:ENTRY(ret_from_fork) > arch/arc/kernel/entry.S:END(ret_from_fork) > arch/csky/kernel/entry.S:ENTRY(ret_from_fork) > arch/x86/kernel/process_32.c: * the task-switch, and shows up in > ret_from_fork in entry.S, > arch/alpha/kernel/entry.S: .globl ret_from_fork > arch/alpha/kernel/entry.S: .ent ret_from_fork > arch/alpha/kernel/entry.S:ret_from_fork: > arch/alpha/kernel/entry.S:.end ret_from_fork > arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_fork) > arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_fork) > arch/hexagon/kernel/vm_entry.S: .globl ret_from_fork > arch/hexagon/kernel/vm_entry.S:ret_from_fork: > arch/microblaze/kernel/entry.S: (copy_thread makes ret_from_fork the > return address in each new thread's > arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_fork): > arch/m68k/kernel/entry.S:ENTRY(ret_from_fork) > arch/arm64/kernel/entry.S:SYM_CODE_START(ret_from_fork) > arch/arm64/kernel/entry.S:SYM_CODE_END(ret_from_fork) > arch/arm64/kernel/entry.S:NOKPROBE(ret_from_fork) > arch/riscv/kernel/entry.S:ENTRY(ret_from_fork) > arch/riscv/kernel/entry.S:ENDPROC(ret_from_fork) > arch/s390/kernel/entry.S:# a new process exits the kernel with ret_from_fork > arch/s390/kernel/entry.S:ENTRY(ret_from_fork) > arch/s390/kernel/entry.S: brasl %r14,__ret_from_fork > arch/s390/kernel/entry.S:ENDPROC(ret_from_fork) > arch/mips/kernel/entry.S:FEXPORT(ret_from_fork) > arch/openrisc/kernel/entry.S: /* All syscalls return here... just > pay attention to ret_from_fork > arch/openrisc/kernel/entry.S:ENTRY(ret_from_fork) > arch/openrisc/kernel/entry.S: * that may be either schedule(), > ret_from_fork(), or > arch/nios2/kernel/entry.S:ENTRY(ret_from_fork) > arch/xtensa/kernel/entry.S:ENTRY(ret_from_fork) > arch/xtensa/kernel/entry.S:ENDPROC(ret_from_fork) > arch/sparc/kernel/entry.S: .globl ret_from_fork > arch/sparc/kernel/entry.S:ret_from_fork: > ➜ linux git:(rv64sv32) grep ret_from_kernel_thread arch -r | grep entry.S > arch/csky/kernel/entry.S:ENTRY(ret_from_kernel_thread) > arch/alpha/kernel/entry.S: .globl ret_from_kernel_thread > arch/alpha/kernel/entry.S: .ent ret_from_kernel_thread > arch/alpha/kernel/entry.S:ret_from_kernel_thread: > arch/alpha/kernel/entry.S:.end ret_from_kernel_thread > arch/parisc/kernel/entry.S:ENTRY(ret_from_kernel_thread) > arch/parisc/kernel/entry.S:END(ret_from_kernel_thread) > arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_kernel_thread) > arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_kernel_thread) > arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_kernel_thread): > arch/m68k/kernel/entry.S:ENTRY(ret_from_kernel_thread) > arch/riscv/kernel/entry.S:ENTRY(ret_from_kernel_thread) > arch/riscv/kernel/entry.S:ENDPROC(ret_from_kernel_thread) > arch/mips/kernel/entry.S:FEXPORT(ret_from_kernel_thread) > arch/openrisc/kernel/entry.S: * ret_from_kernel_thread(). If we > are returning to a new thread, > arch/nios2/kernel/entry.S:ENTRY(ret_from_kernel_thread) > arch/xtensa/kernel/entry.S:ENTRY(ret_from_kernel_thread) > arch/xtensa/kernel/entry.S:ENDPROC(ret_from_kernel_thread) > arch/sparc/kernel/entry.S: .globl ret_from_kernel_thread > arch/sparc/kernel/entry.S:ret_from_kernel_thread: > > Many architectures use a similar style. If you want to continue the > patch, I think you should first rename ret_from_fork properly, and > give an explicit flag definition, not just setting fp = 0. > Above list also shows many architectures don't have a ret_from_kernel_thread, I think the reason is simple it behaves similarly as ret_from_fork. As for flag, IMHO, we may missed something as clearing the s[12] array in thread_struct when user fork, because s[12] may contain random kernel memory content, which may be finally leaked to userspace. This is a security hole. A trivial patch of memset(0) can fix it, after this fix, checking the s[0] is straightforward. diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 67e7cd123ceb..50a0f7e4327c 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -174,6 +174,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; } else { + memset(&p->thread.s, 0, sizeof(p->thread.s)); *childregs = *(current_pt_regs()); if (usp) /* User fork */ childregs->sp = usp;
On Thu, Sep 29, 2022 at 12:49 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > On Tue, Sep 27, 2022 at 07:55:27AM +0800, Guo Ren wrote: > > On Tue, Sep 27, 2022 at 12:14 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > On Mon, Sep 26, 2022 at 07:25:30AM +0800, Guo Ren wrote: > > > > On Mon, Sep 26, 2022 at 2:03 AM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > > > > > The ret_from_kernel_thread() behaves similarly with ret_from_fork(), > > > > > the only difference is whether call the fn(arg) or not, this can be > > > > > acchieved by testing fn is NULL or not, I.E s0 is 0 or not. > > > > > > > > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org> > > > > > --- > > > > > arch/riscv/kernel/entry.S | 11 +++-------- > > > > > arch/riscv/kernel/process.c | 5 ++--- > > > > > 2 files changed, 5 insertions(+), 11 deletions(-) > > > > > > > > > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > > > > index 2207cf44a3bc..a3e1ed2fa2ac 100644 > > > > > --- a/arch/riscv/kernel/entry.S > > > > > +++ b/arch/riscv/kernel/entry.S > > > > > @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow) > > > > > > > > > > ENTRY(ret_from_fork) > > > > > call schedule_tail > > > > > - move a0, sp /* pt_regs */ > > > > > - la ra, ret_from_exception > > > > > - tail syscall_exit_to_user_mode > > > > > -ENDPROC(ret_from_fork) > > > > > - > > > > > -ENTRY(ret_from_kernel_thread) > > > > > - call schedule_tail > > > > > + beqz s0, 1f /* not from kernel thread */ > > > > > > Hi Guo, > > > > > > > We can't use s0 as condition for ret_from_fork/ret_from_kernel_thread. > > > > The s0=0 is also okay for ret_from_fork. > > > > > > IIUC, in ret_from_fork, the s0 comes p->thread.s[0] rather than s0 in > > > pt_regs. > > Yes, you are correct. > > > > > > > > > > > > > /* p->thread holds context to be restored by __switch_to() */ > > > > if (unlikely(args->fn)) { > > > > /* Kernel thread */ > > > > memset(childregs, 0, sizeof(struct pt_regs)); > > > > childregs->gp = gp_in_global; > > > > /* Supervisor/Machine, irqs on: */ > > > > childregs->status = SR_PP | SR_PIE; > > > > > > > > p->thread.ra = (unsigned long)ret_from_kernel_thread; > > > > p->thread.s[0] = (unsigned long)args->fn; > > > > p->thread.s[1] = (unsigned long)args->fn_arg; > > > > } else { > > > > *childregs = *(current_pt_regs()); > > > > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > > Oh, I'm wrong, It's switch_to_restore -> exception_restore. > > > > > > if (usp) /* User fork */ > > > > childregs->sp = usp; > > > > if (clone_flags & CLONE_SETTLS) > > > > childregs->tp = tls; > > > > childregs->a0 = 0; /* Return value of fork() */ > > > > p->thread.ra = (unsigned long)ret_from_fork; > > > > } > > > > p->thread.sp = (unsigned long)childregs; /* kernel sp */ > > > > > > > > > > <snip> > > > > > > > > @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > > > > > if (clone_flags & CLONE_SETTLS) > > > > > childregs->tp = tls; > > > > > childregs->a0 = 0; /* Return value of fork() */ > > > > > - p->thread.ra = (unsigned long)ret_from_fork; > > > > > + p->thread.s[0] = 0; > > > > > > Here we assign 0 to p->thread.s[0] > > I missed that. > > > > Merge thread & fork is not a good idea, and using fp as the flag is so implicit. > > > > ➜ linux git:(rv64sv32) grep ret_from_fork arch -r | grep entry.S > > arch/arc/kernel/entry.S:ENTRY(ret_from_fork) > > arch/arc/kernel/entry.S:END(ret_from_fork) > > arch/csky/kernel/entry.S:ENTRY(ret_from_fork) > > arch/x86/kernel/process_32.c: * the task-switch, and shows up in > > ret_from_fork in entry.S, > > arch/alpha/kernel/entry.S: .globl ret_from_fork > > arch/alpha/kernel/entry.S: .ent ret_from_fork > > arch/alpha/kernel/entry.S:ret_from_fork: > > arch/alpha/kernel/entry.S:.end ret_from_fork > > arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_fork) > > arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_fork) > > arch/hexagon/kernel/vm_entry.S: .globl ret_from_fork > > arch/hexagon/kernel/vm_entry.S:ret_from_fork: > > arch/microblaze/kernel/entry.S: (copy_thread makes ret_from_fork the > > return address in each new thread's > > arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_fork): > > arch/m68k/kernel/entry.S:ENTRY(ret_from_fork) > > arch/arm64/kernel/entry.S:SYM_CODE_START(ret_from_fork) > > arch/arm64/kernel/entry.S:SYM_CODE_END(ret_from_fork) > > arch/arm64/kernel/entry.S:NOKPROBE(ret_from_fork) > > arch/riscv/kernel/entry.S:ENTRY(ret_from_fork) > > arch/riscv/kernel/entry.S:ENDPROC(ret_from_fork) > > arch/s390/kernel/entry.S:# a new process exits the kernel with ret_from_fork > > arch/s390/kernel/entry.S:ENTRY(ret_from_fork) > > arch/s390/kernel/entry.S: brasl %r14,__ret_from_fork > > arch/s390/kernel/entry.S:ENDPROC(ret_from_fork) > > arch/mips/kernel/entry.S:FEXPORT(ret_from_fork) > > arch/openrisc/kernel/entry.S: /* All syscalls return here... just > > pay attention to ret_from_fork > > arch/openrisc/kernel/entry.S:ENTRY(ret_from_fork) > > arch/openrisc/kernel/entry.S: * that may be either schedule(), > > ret_from_fork(), or > > arch/nios2/kernel/entry.S:ENTRY(ret_from_fork) > > arch/xtensa/kernel/entry.S:ENTRY(ret_from_fork) > > arch/xtensa/kernel/entry.S:ENDPROC(ret_from_fork) > > arch/sparc/kernel/entry.S: .globl ret_from_fork > > arch/sparc/kernel/entry.S:ret_from_fork: > > ➜ linux git:(rv64sv32) grep ret_from_kernel_thread arch -r | grep entry.S > > arch/csky/kernel/entry.S:ENTRY(ret_from_kernel_thread) > > arch/alpha/kernel/entry.S: .globl ret_from_kernel_thread > > arch/alpha/kernel/entry.S: .ent ret_from_kernel_thread > > arch/alpha/kernel/entry.S:ret_from_kernel_thread: > > arch/alpha/kernel/entry.S:.end ret_from_kernel_thread > > arch/parisc/kernel/entry.S:ENTRY(ret_from_kernel_thread) > > arch/parisc/kernel/entry.S:END(ret_from_kernel_thread) > > arch/loongarch/kernel/entry.S:SYM_CODE_START(ret_from_kernel_thread) > > arch/loongarch/kernel/entry.S:SYM_CODE_END(ret_from_kernel_thread) > > arch/microblaze/kernel/entry.S:C_ENTRY(ret_from_kernel_thread): > > arch/m68k/kernel/entry.S:ENTRY(ret_from_kernel_thread) > > arch/riscv/kernel/entry.S:ENTRY(ret_from_kernel_thread) > > arch/riscv/kernel/entry.S:ENDPROC(ret_from_kernel_thread) > > arch/mips/kernel/entry.S:FEXPORT(ret_from_kernel_thread) > > arch/openrisc/kernel/entry.S: * ret_from_kernel_thread(). If we > > are returning to a new thread, > > arch/nios2/kernel/entry.S:ENTRY(ret_from_kernel_thread) > > arch/xtensa/kernel/entry.S:ENTRY(ret_from_kernel_thread) > > arch/xtensa/kernel/entry.S:ENDPROC(ret_from_kernel_thread) > > arch/sparc/kernel/entry.S: .globl ret_from_kernel_thread > > arch/sparc/kernel/entry.S:ret_from_kernel_thread: > > > > Many architectures use a similar style. If you want to continue the > > patch, I think you should first rename ret_from_fork properly, and > > give an explicit flag definition, not just setting fp = 0. > > > > Above list also shows many architectures don't have a > ret_from_kernel_thread, I think the reason is simple it behaves > similarly as ret_from_fork. After looking at x86 & arm64, you've convinced me. Acked-by: Guo Ren <guoren@kernel.org> > As for flag, IMHO, we may missed something as clearing the s[12] > array in thread_struct when user fork, because s[12] may contain > random kernel memory content, which may be finally leaked to > userspace. This is a security hole. > > A trivial patch of memset(0) can fix it, after this fix, checking the > s[0] is straightforward. > > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index 67e7cd123ceb..50a0f7e4327c 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -174,6 +174,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) > p->thread.s[0] = (unsigned long)args->fn; > p->thread.s[1] = (unsigned long)args->fn_arg; > } else { > + memset(&p->thread.s, 0, sizeof(p->thread.s)); Good catch. s[12] may leave some information about the kernel. It could be a separate patch with a Fixes flag. > *childregs = *(current_pt_regs()); > if (usp) /* User fork */ > childregs->sp = usp; >
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 2207cf44a3bc..a3e1ed2fa2ac 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -323,20 +323,15 @@ END(handle_kernel_stack_overflow) ENTRY(ret_from_fork) call schedule_tail - move a0, sp /* pt_regs */ - la ra, ret_from_exception - tail syscall_exit_to_user_mode -ENDPROC(ret_from_fork) - -ENTRY(ret_from_kernel_thread) - call schedule_tail + beqz s0, 1f /* not from kernel thread */ /* Call fn(arg) */ move a0, s1 jalr s0 +1: move a0, sp /* pt_regs */ la ra, ret_from_exception tail syscall_exit_to_user_mode -ENDPROC(ret_from_kernel_thread) +ENDPROC(ret_from_fork) #ifdef CONFIG_IRQ_STACKS ENTRY(call_on_stack) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index ceb9ebab6558..67e7cd123ceb 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -34,7 +34,6 @@ EXPORT_SYMBOL(__stack_chk_guard); #endif extern asmlinkage void ret_from_fork(void); -extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { @@ -172,7 +171,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; - p->thread.ra = (unsigned long)ret_from_kernel_thread; p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; } else { @@ -182,8 +180,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ - p->thread.ra = (unsigned long)ret_from_fork; + p->thread.s[0] = 0; } + p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; }
The ret_from_kernel_thread() behaves similarly with ret_from_fork(), the only difference is whether call the fn(arg) or not, this can be acchieved by testing fn is NULL or not, I.E s0 is 0 or not. Signed-off-by: Jisheng Zhang <jszhang@kernel.org> --- arch/riscv/kernel/entry.S | 11 +++-------- arch/riscv/kernel/process.c | 5 ++--- 2 files changed, 5 insertions(+), 11 deletions(-)