diff mbox series

riscv: fix seccomp reject syscall code path

Message ID 20200208151817.12383-1-tycho@tycho.ws (mailing list archive)
State New, archived
Headers show
Series riscv: fix seccomp reject syscall code path | expand

Commit Message

Tycho Andersen Feb. 8, 2020, 3:18 p.m. UTC
If secure_computing() rejected a system call, we were previously setting
the system call number to -1, to indicate to later code that the syscall
failed. However, if something (e.g. a user notification) was sleeping, and
received a signal, we may set a0 to -ERESTARTSYS and re-try the system call
again.

In this case, seccomp "denies" the syscall (because of the signal), and we
would set a7 to -1, thus losing the value of the system call we want to
restart.

Instead, let's return -1 from do_syscall_trace_enter() to indicate that the
syscall was rejected, so we don't clobber the value in case of -ERESTARTSYS
or whatever.

This commit fixes the user_notification_signal seccomp selftest on riscv to
no longer hang. That test expects the system call to be re-issued after the
signal, and it wasn't due to the above bug. Now that it is, everything
works normally.

Note that in the ptrace (tracer) case, the tracer can set the register
values to whatever they want, so we still need to keep the code that
handles out-of-bounds syscalls. However, we can drop the comment.

We can also drop syscall_set_nr(), since it is no longer used anywhere, and
the code that re-loads the value in a7 because of it.

Reported in: https://lore.kernel.org/bpf/CAEn-LTp=ss0Dfv6J00=rCAy+N78U2AmhqJNjfqjr2FDpPYjxEQ@mail.gmail.com/

Reported-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
Signed-off-by: Tycho Andersen <tycho@tycho.ws>
CC: Kees Cook <keescook@chromium.org>
CC: Andy Lutomirski <luto@amacapital.net>
CC: Paul Walmsley <paul.walmsley@sifive.com>
CC: Oleg Nesterov <oleg@redhat.com>
---
 arch/riscv/include/asm/syscall.h |  7 -------
 arch/riscv/kernel/entry.S        | 11 +++--------
 arch/riscv/kernel/ptrace.c       | 11 +++++------
 3 files changed, 8 insertions(+), 21 deletions(-)

Comments

Kees Cook Feb. 10, 2020, 1:11 a.m. UTC | #1
On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
> If secure_computing() rejected a system call, we were previously setting
> the system call number to -1, to indicate to later code that the syscall
> failed. However, if something (e.g. a user notification) was sleeping, and
> received a signal, we may set a0 to -ERESTARTSYS and re-try the system call
> again.
> 
> In this case, seccomp "denies" the syscall (because of the signal), and we
> would set a7 to -1, thus losing the value of the system call we want to
> restart.
> 
> Instead, let's return -1 from do_syscall_trace_enter() to indicate that the
> syscall was rejected, so we don't clobber the value in case of -ERESTARTSYS
> or whatever.
> 
> This commit fixes the user_notification_signal seccomp selftest on riscv to
> no longer hang. That test expects the system call to be re-issued after the
> signal, and it wasn't due to the above bug. Now that it is, everything
> works normally.
> 
> Note that in the ptrace (tracer) case, the tracer can set the register
> values to whatever they want, so we still need to keep the code that
> handles out-of-bounds syscalls. However, we can drop the comment.
> 
> We can also drop syscall_set_nr(), since it is no longer used anywhere, and
> the code that re-loads the value in a7 because of it.
> 
> Reported in: https://lore.kernel.org/bpf/CAEn-LTp=ss0Dfv6J00=rCAy+N78U2AmhqJNjfqjr2FDpPYjxEQ@mail.gmail.com/
> 
> Reported-by: David Abdurachmanov <david.abdurachmanov@gmail.com>
> Signed-off-by: Tycho Andersen <tycho@tycho.ws>

Funky! Good catch. :)

Reviewed-by: Kees Cook <keescook@chromium.org>

-Kees

> CC: Kees Cook <keescook@chromium.org>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Paul Walmsley <paul.walmsley@sifive.com>
> CC: Oleg Nesterov <oleg@redhat.com>
> ---
>  arch/riscv/include/asm/syscall.h |  7 -------
>  arch/riscv/kernel/entry.S        | 11 +++--------
>  arch/riscv/kernel/ptrace.c       | 11 +++++------
>  3 files changed, 8 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
> index 42347d0981e7..49350c8bd7b0 100644
> --- a/arch/riscv/include/asm/syscall.h
> +++ b/arch/riscv/include/asm/syscall.h
> @@ -28,13 +28,6 @@ static inline int syscall_get_nr(struct task_struct *task,
>  	return regs->a7;
>  }
>  
> -static inline void syscall_set_nr(struct task_struct *task,
> -				  struct pt_regs *regs,
> -				  int sysno)
> -{
> -	regs->a7 = sysno;
> -}
> -
>  static inline void syscall_rollback(struct task_struct *task,
>  				    struct pt_regs *regs)
>  {
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index bad4d85b5e91..208702d8c18e 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -228,20 +228,13 @@ check_syscall_nr:
>  	/* Check to make sure we don't jump to a bogus syscall number. */
>  	li t0, __NR_syscalls
>  	la s0, sys_ni_syscall
> -	/*
> -	 * The tracer can change syscall number to valid/invalid value.
> -	 * We use syscall_set_nr helper in syscall_trace_enter thus we
> -	 * cannot trust the current value in a7 and have to reload from
> -	 * the current task pt_regs.
> -	 */
> -	REG_L a7, PT_A7(sp)
>  	/*
>  	 * Syscall number held in a7.
>  	 * If syscall number is above allowed value, redirect to ni_syscall.
>  	 */
>  	bge a7, t0, 1f
>  	/*
> -	 * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
> +	 * Check if syscall is rejected by tracer, i.e., a7 == -1.
>  	 * If yes, we pretend it was executed.
>  	 */
>  	li t1, -1
> @@ -334,6 +327,7 @@ work_resched:
>  handle_syscall_trace_enter:
>  	move a0, sp
>  	call do_syscall_trace_enter
> +	move t0, a0
>  	REG_L a0, PT_A0(sp)
>  	REG_L a1, PT_A1(sp)
>  	REG_L a2, PT_A2(sp)
> @@ -342,6 +336,7 @@ handle_syscall_trace_enter:
>  	REG_L a5, PT_A5(sp)
>  	REG_L a6, PT_A6(sp)
>  	REG_L a7, PT_A7(sp)
> +	bnez t0, ret_from_syscall_rejected
>  	j check_syscall_nr
>  handle_syscall_trace_exit:
>  	move a0, sp
> diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
> index 407464201b91..444dc7b0fd78 100644
> --- a/arch/riscv/kernel/ptrace.c
> +++ b/arch/riscv/kernel/ptrace.c
> @@ -148,21 +148,19 @@ long arch_ptrace(struct task_struct *child, long request,
>   * Allows PTRACE_SYSCALL to work.  These are called from entry.S in
>   * {handle,ret_from}_syscall.
>   */
> -__visible void do_syscall_trace_enter(struct pt_regs *regs)
> +__visible int do_syscall_trace_enter(struct pt_regs *regs)
>  {
>  	if (test_thread_flag(TIF_SYSCALL_TRACE))
>  		if (tracehook_report_syscall_entry(regs))
> -			syscall_set_nr(current, regs, -1);
> +			return -1;
>  
>  	/*
>  	 * Do the secure computing after ptrace; failures should be fast.
>  	 * If this fails we might have return value in a0 from seccomp
>  	 * (via SECCOMP_RET_ERRNO/TRACE).
>  	 */
> -	if (secure_computing() == -1) {
> -		syscall_set_nr(current, regs, -1);
> -		return;
> -	}
> +	if (secure_computing() == -1)
> +		return -1;
>  
>  #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
>  	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
> @@ -170,6 +168,7 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs)
>  #endif
>  
>  	audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
> +	return 0;
>  }
>  
>  __visible void do_syscall_trace_exit(struct pt_regs *regs)
> -- 
> 2.20.1
>
Tycho Andersen Feb. 23, 2020, 5:17 p.m. UTC | #2
On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
> ...

Ping, any risc-v people have thoughts on this?

Tycho
Kees Cook March 3, 2020, 4:46 a.m. UTC | #3
On Sun, Feb 23, 2020 at 10:17:57AM -0700, Tycho Andersen wrote:
> On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
> > ...
> 
> Ping, any risc-v people have thoughts on this?
> 
> Tycho

Re-ping. :) Can someone please pick this up? Original patch here:
https://lore.kernel.org/lkml/20200208151817.12383-1-tycho@tycho.ws/
Palmer Dabbelt March 3, 2020, 5:55 p.m. UTC | #4
On Mon, 02 Mar 2020 20:46:46 PST (-0800), keescook@chromium.org wrote:
> On Sun, Feb 23, 2020 at 10:17:57AM -0700, Tycho Andersen wrote:
>> On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
>> > ...
>>
>> Ping, any risc-v people have thoughts on this?
>>
>> Tycho
>
> Re-ping. :) Can someone please pick this up? Original patch here:
> https://lore.kernel.org/lkml/20200208151817.12383-1-tycho@tycho.ws/

Sorry, the other messages didn't end up in my inbox.  I'll take a look, as this
seems like a good candidate for rc5.
Kees Cook March 3, 2020, 9:12 p.m. UTC | #5
On Tue, Mar 03, 2020 at 09:55:10AM -0800, Palmer Dabbelt wrote:
> On Mon, 02 Mar 2020 20:46:46 PST (-0800), keescook@chromium.org wrote:
> > On Sun, Feb 23, 2020 at 10:17:57AM -0700, Tycho Andersen wrote:
> > > On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
> > > > ...
> > > 
> > > Ping, any risc-v people have thoughts on this?
> > > 
> > > Tycho
> > 
> > Re-ping. :) Can someone please pick this up? Original patch here:
> > https://lore.kernel.org/lkml/20200208151817.12383-1-tycho@tycho.ws/
> 
> Sorry, the other messages didn't end up in my inbox.  I'll take a look, as this
> seems like a good candidate for rc5.

Awesome; thanks!

-Kees
Palmer Dabbelt March 6, 2020, 12:02 a.m. UTC | #6
On Sun, 23 Feb 2020 09:17:57 PST (-0800), tycho@tycho.ws wrote:
> On Sat, Feb 08, 2020 at 08:18:17AM -0700, Tycho Andersen wrote:
>> ...
>
> Ping, any risc-v people have thoughts on this?

Thanks for fixing this.  It ended up escaping my inbox, but it's on fixes now
and targeted for the next RC.
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 42347d0981e7..49350c8bd7b0 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -28,13 +28,6 @@  static inline int syscall_get_nr(struct task_struct *task,
 	return regs->a7;
 }
 
-static inline void syscall_set_nr(struct task_struct *task,
-				  struct pt_regs *regs,
-				  int sysno)
-{
-	regs->a7 = sysno;
-}
-
 static inline void syscall_rollback(struct task_struct *task,
 				    struct pt_regs *regs)
 {
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index bad4d85b5e91..208702d8c18e 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -228,20 +228,13 @@  check_syscall_nr:
 	/* Check to make sure we don't jump to a bogus syscall number. */
 	li t0, __NR_syscalls
 	la s0, sys_ni_syscall
-	/*
-	 * The tracer can change syscall number to valid/invalid value.
-	 * We use syscall_set_nr helper in syscall_trace_enter thus we
-	 * cannot trust the current value in a7 and have to reload from
-	 * the current task pt_regs.
-	 */
-	REG_L a7, PT_A7(sp)
 	/*
 	 * Syscall number held in a7.
 	 * If syscall number is above allowed value, redirect to ni_syscall.
 	 */
 	bge a7, t0, 1f
 	/*
-	 * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
+	 * Check if syscall is rejected by tracer, i.e., a7 == -1.
 	 * If yes, we pretend it was executed.
 	 */
 	li t1, -1
@@ -334,6 +327,7 @@  work_resched:
 handle_syscall_trace_enter:
 	move a0, sp
 	call do_syscall_trace_enter
+	move t0, a0
 	REG_L a0, PT_A0(sp)
 	REG_L a1, PT_A1(sp)
 	REG_L a2, PT_A2(sp)
@@ -342,6 +336,7 @@  handle_syscall_trace_enter:
 	REG_L a5, PT_A5(sp)
 	REG_L a6, PT_A6(sp)
 	REG_L a7, PT_A7(sp)
+	bnez t0, ret_from_syscall_rejected
 	j check_syscall_nr
 handle_syscall_trace_exit:
 	move a0, sp
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 407464201b91..444dc7b0fd78 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -148,21 +148,19 @@  long arch_ptrace(struct task_struct *child, long request,
  * Allows PTRACE_SYSCALL to work.  These are called from entry.S in
  * {handle,ret_from}_syscall.
  */
-__visible void do_syscall_trace_enter(struct pt_regs *regs)
+__visible int do_syscall_trace_enter(struct pt_regs *regs)
 {
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		if (tracehook_report_syscall_entry(regs))
-			syscall_set_nr(current, regs, -1);
+			return -1;
 
 	/*
 	 * Do the secure computing after ptrace; failures should be fast.
 	 * If this fails we might have return value in a0 from seccomp
 	 * (via SECCOMP_RET_ERRNO/TRACE).
 	 */
-	if (secure_computing() == -1) {
-		syscall_set_nr(current, regs, -1);
-		return;
-	}
+	if (secure_computing() == -1)
+		return -1;
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
@@ -170,6 +168,7 @@  __visible void do_syscall_trace_enter(struct pt_regs *regs)
 #endif
 
 	audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
+	return 0;
 }
 
 __visible void do_syscall_trace_exit(struct pt_regs *regs)