Message ID | 20181206150156.28210-2-david.abdurachmanov@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: add support for SECCOMP and SECCOMP_FILTER | expand |
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov <david.abdurachmanov@gmail.com> wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com> > --- > arch/riscv/Kconfig | 14 ++++++++++++++ > arch/riscv/include/asm/thread_info.h | 5 ++++- > arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++-- > arch/riscv/kernel/ptrace.c | 8 ++++++++ > 4 files changed, 51 insertions(+), 3 deletions(-) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index a4f48f757204..49cd8e251547 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -29,6 +29,7 @@ config RISCV > select GENERIC_SMP_IDLE_THREAD > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > select HAVE_ARCH_AUDITSYSCALL > + select HAVE_ARCH_SECCOMP_FILTER > select HAVE_MEMBLOCK_NODE_MAP > select HAVE_DMA_CONTIGUOUS > select HAVE_FUTEX_CMPXCHG if FUTEX > @@ -228,6 +229,19 @@ menu "Kernel features" > > source "kernel/Kconfig.hz" > > +config SECCOMP > + bool "Enable seccomp to safely compute untrusted bytecode" > + help > + This kernel feature is useful for number crunching applications > + that may need to compute untrusted bytecode during their > + execution. By using pipes or other transports made available to > + the process as file descriptors supporting the read/write > + syscalls, it's possible to isolate those applications in > + their own address space using seccomp. Once seccomp is > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > + and the task is only allowed to execute a few safe syscalls > + defined by each seccomp mode. > + > endmenu > > menu "Boot options" > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h > index 1c9cc8389928..1fd6e4130cab 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -81,6 +81,7 @@ struct thread_info { > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > +#define TIF_SECCOMP 8 /* syscall secure computing */ > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > @@ -88,11 +89,13 @@ struct thread_info { > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > #define _TIF_WORK_MASK \ > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_WORK \ > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > + _TIF_SECCOMP ) > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 355166f57205..e88ccbfa61ee 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -207,8 +207,25 @@ check_syscall_nr: > /* Check to make sure we don't jump to a bogus syscall number. */ > li t0, __NR_syscalls > la s0, sys_ni_syscall > - /* Syscall number held in a7 */ > - bgeu a7, t0, 1f > + /* > + * The tracer can change syscall number to valid/invalid value. > + * We use syscall_set_nr helper in syscall_trace_enter thus we > + * cannot trust the current value in a7 and have to reload from > + * the current task pt_regs. > + */ > + REG_L a7, PT_A7(sp) > + /* > + * Syscall number held in a7. > + * If syscall number is above allowed value, redirect to ni_syscall. > + */ > + bge a7, t0, 1f > + /* > + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. > + * If yes, we pretend it was executed. > + */ > + li t1, -1 > + beq a7, t1, ret_from_syscall_rejected > + /* Call syscall */ > la s0, sys_call_table > slli t0, a7, RISCV_LGPTR > add s0, s0, t0 > @@ -219,6 +236,12 @@ check_syscall_nr: > ret_from_syscall: > /* Set user a0 to kernel a0 */ > REG_S a0, PT_A0(sp) > + /* > + * We didn't execute the actual syscall. > + * Seccomp already set return value for the current task pt_regs. > + * (If it was configured with SECCOMP_RET_ERRNO/TRACE) > + */ > +ret_from_syscall_rejected: > /* Trace syscalls, but only if requested by the user. */ > REG_L t0, TASK_TI_FLAGS(tp) > andi t0, t0, _TIF_SYSCALL_WORK > diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c > index c1b51539c3e2..598e48b8ca2b 100644 > --- a/arch/riscv/kernel/ptrace.c > +++ b/arch/riscv/kernel/ptrace.c > @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs) > if (tracehook_report_syscall_entry(regs)) > syscall_set_nr(current, regs, -1); > > + /* > + * Do the secure computing after ptrace; failures should be fast. > + * If this fails we might have return value in a0 from seccomp > + * (via SECCOMP_RET_ERRNO/TRACE). > + */ > + if (secure_computing(NULL) == -1) > + syscall_set_nr(current, regs, -1); On a -1 return, this should return immediately -- it should not continue to process trace_sys_enter(), etc. -Kees > + > #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS > if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) > trace_sys_enter(regs, syscall_get_nr(current, regs)); > -- > 2.19.2 >
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov <david.abdurachmanov@gmail.com> wrote: > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c as well? That selftest finds a lot of weird corner-cases... > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h > index 1c9cc8389928..1fd6e4130cab 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -81,6 +81,7 @@ struct thread_info { > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > +#define TIF_SECCOMP 8 /* syscall secure computing */ Nit: extra tab needs to be removed.
On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov
<david.abdurachmanov@gmail.com> wrote:
> The patch adds support for SECCOMP and SECCOMP_FILTER (BPF).
I built this against linux-next but it's missing seccomp.h. Was that
accidentally left out of the commit?
CC arch/riscv/kernel/asm-offsets.s
In file included from ./include/linux/sched.h:21:0,
from arch/riscv/kernel/asm-offsets.c:18:
./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such
file or directory
#include <asm/seccomp.h>
^~~~~~~~~~~~~~~
On Thu, Dec 6, 2018 at 5:47 PM Kees Cook <keescook@chromium.org> wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > <david.abdurachmanov@gmail.com> wrote: > > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com> > > --- > > arch/riscv/Kconfig | 14 ++++++++++++++ > > arch/riscv/include/asm/thread_info.h | 5 ++++- > > arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++-- > > arch/riscv/kernel/ptrace.c | 8 ++++++++ > > 4 files changed, 51 insertions(+), 3 deletions(-) > > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > > index a4f48f757204..49cd8e251547 100644 > > --- a/arch/riscv/Kconfig > > +++ b/arch/riscv/Kconfig > > @@ -29,6 +29,7 @@ config RISCV > > select GENERIC_SMP_IDLE_THREAD > > select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A > > select HAVE_ARCH_AUDITSYSCALL > > + select HAVE_ARCH_SECCOMP_FILTER > > select HAVE_MEMBLOCK_NODE_MAP > > select HAVE_DMA_CONTIGUOUS > > select HAVE_FUTEX_CMPXCHG if FUTEX > > @@ -228,6 +229,19 @@ menu "Kernel features" > > > > source "kernel/Kconfig.hz" > > > > +config SECCOMP > > + bool "Enable seccomp to safely compute untrusted bytecode" > > + help > > + This kernel feature is useful for number crunching applications > > + that may need to compute untrusted bytecode during their > > + execution. By using pipes or other transports made available to > > + the process as file descriptors supporting the read/write > > + syscalls, it's possible to isolate those applications in > > + their own address space using seccomp. Once seccomp is > > + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled > > + and the task is only allowed to execute a few safe syscalls > > + defined by each seccomp mode. > > + > > endmenu > > > > menu "Boot options" > > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP 8 /* syscall secure computing */ > > > > #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) > > #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) > > @@ -88,11 +89,13 @@ struct thread_info { > > #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) > > #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) > > #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) > > +#define _TIF_SECCOMP (1 << TIF_SECCOMP) > > > > #define _TIF_WORK_MASK \ > > (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) > > > > #define _TIF_SYSCALL_WORK \ > > - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) > > + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ > > + _TIF_SECCOMP ) > > > > #endif /* _ASM_RISCV_THREAD_INFO_H */ > > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > > index 355166f57205..e88ccbfa61ee 100644 > > --- a/arch/riscv/kernel/entry.S > > +++ b/arch/riscv/kernel/entry.S > > @@ -207,8 +207,25 @@ check_syscall_nr: > > /* Check to make sure we don't jump to a bogus syscall number. */ > > li t0, __NR_syscalls > > la s0, sys_ni_syscall > > - /* Syscall number held in a7 */ > > - bgeu a7, t0, 1f > > + /* > > + * The tracer can change syscall number to valid/invalid value. > > + * We use syscall_set_nr helper in syscall_trace_enter thus we > > + * cannot trust the current value in a7 and have to reload from > > + * the current task pt_regs. > > + */ > > + REG_L a7, PT_A7(sp) > > + /* > > + * Syscall number held in a7. > > + * If syscall number is above allowed value, redirect to ni_syscall. > > + */ > > + bge a7, t0, 1f > > + /* > > + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. > > + * If yes, we pretend it was executed. > > + */ > > + li t1, -1 > > + beq a7, t1, ret_from_syscall_rejected > > + /* Call syscall */ > > la s0, sys_call_table > > slli t0, a7, RISCV_LGPTR > > add s0, s0, t0 > > @@ -219,6 +236,12 @@ check_syscall_nr: > > ret_from_syscall: > > /* Set user a0 to kernel a0 */ > > REG_S a0, PT_A0(sp) > > + /* > > + * We didn't execute the actual syscall. > > + * Seccomp already set return value for the current task pt_regs. > > + * (If it was configured with SECCOMP_RET_ERRNO/TRACE) > > + */ > > +ret_from_syscall_rejected: > > /* Trace syscalls, but only if requested by the user. */ > > REG_L t0, TASK_TI_FLAGS(tp) > > andi t0, t0, _TIF_SYSCALL_WORK > > diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c > > index c1b51539c3e2..598e48b8ca2b 100644 > > --- a/arch/riscv/kernel/ptrace.c > > +++ b/arch/riscv/kernel/ptrace.c > > @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs) > > if (tracehook_report_syscall_entry(regs)) > > syscall_set_nr(current, regs, -1); > > > > + /* > > + * Do the secure computing after ptrace; failures should be fast. > > + * If this fails we might have return value in a0 from seccomp > > + * (via SECCOMP_RET_ERRNO/TRACE). > > + */ > > + if (secure_computing(NULL) == -1) > > + syscall_set_nr(current, regs, -1); > > On a -1 return, this should return immediately -- it should not > continue to process trace_sys_enter(), etc. Ops! No idea how I missed that. Will fix it. > -Kees > > > + > > #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS > > if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) > > trace_sys_enter(regs, syscall_get_nr(current, regs)); > > -- > > 2.19.2 > > > > > -- > Kees Cook
On Thu, Dec 6, 2018 at 5:52 PM Kees Cook <keescook@chromium.org> wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > <david.abdurachmanov@gmail.com> wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > as well? That selftest finds a lot of weird corner-cases... > > > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP 8 /* syscall secure computing */ > > Nit: extra tab needs to be removed. Will fix it. I need to cleanup my vim configuration for tab with. david
On Thu, Dec 6, 2018 at 6:07 PM Kees Cook <keescook@chromium.org> wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > <david.abdurachmanov@gmail.com> wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > I built this against linux-next but it's missing seccomp.h. Was that > accidentally left out of the commit? > > > CC arch/riscv/kernel/asm-offsets.s > In file included from ./include/linux/sched.h:21:0, > from arch/riscv/kernel/asm-offsets.c:18: > ./include/linux/seccomp.h:14:10: fatal error: asm/seccomp.h: No such > file or directory > #include <asm/seccomp.h> > ^~~~~~~~~~~~~~~ > I forgot to add it... Will fix it. david
On Thu, Dec 6, 2018 at 5:52 PM Kees Cook <keescook@chromium.org> wrote: > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > <david.abdurachmanov@gmail.com> wrote: > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > as well? That selftest finds a lot of weird corner-cases... I hate it locally and will include in v2. The results see fine (tested in QEMU). TAP version 13 selftests: seccomp: seccomp_bpf ======================================== [==========] Running 64 tests from 1 test cases. [ RUN ] global.mode_strict_support [ OK ] global.mode_strict_support [ RUN ] global.mode_strict_cannot_call_prctl [ OK ] global.mode_strict_cannot_call_prctl [ RUN ] global.no_new_privs_support [ OK ] global.no_new_privs_support [ RUN ] global.mode_filter_support [ OK ] global.mode_filter_support [ RUN ] global.mode_filter_without_nnp [ OK ] global.mode_filter_without_nnp [ RUN ] global.filter_size_limits [ OK ] global.filter_size_limits [ RUN ] global.filter_chain_limits [ OK ] global.filter_chain_limits [ RUN ] global.mode_filter_cannot_move_to_strict [ OK ] global.mode_filter_cannot_move_to_strict [ RUN ] global.mode_filter_get_seccomp [ OK ] global.mode_filter_get_seccomp [ RUN ] global.ALLOW_all [ OK ] global.ALLOW_all [ RUN ] global.empty_prog [ OK ] global.empty_prog [ RUN ] global.log_all [ OK ] global.log_all [ RUN ] global.unknown_ret_is_kill_inside [ OK ] global.unknown_ret_is_kill_inside [ RUN ] global.unknown_ret_is_kill_above_allow [ OK ] global.unknown_ret_is_kill_above_allow [ RUN ] global.KILL_all [ OK ] global.KILL_all [ RUN ] global.KILL_one [ OK ] global.KILL_one [ RUN ] global.KILL_one_arg_one [ OK ] global.KILL_one_arg_one [ RUN ] global.KILL_one_arg_six [ OK ] global.KILL_one_arg_six [ RUN ] global.KILL_thread [ OK ] global.KILL_thread [ RUN ] global.KILL_process [ OK ] global.KILL_process [ RUN ] global.arg_out_of_range [ OK ] global.arg_out_of_range [ RUN ] global.ERRNO_valid [ OK ] global.ERRNO_valid [ RUN ] global.ERRNO_zero [ OK ] global.ERRNO_zero [ RUN ] global.ERRNO_capped [ OK ] global.ERRNO_capped [ RUN ] global.ERRNO_order [ OK ] global.ERRNO_order [ RUN ] TRAP.dfl [ OK ] TRAP.dfl [ RUN ] TRAP.ign [ OK ] TRAP.ign [ RUN ] TRAP.handler [ OK ] TRAP.handler [ RUN ] precedence.allow_ok [ OK ] precedence.allow_ok [ RUN ] precedence.kill_is_highest [ OK ] precedence.kill_is_highest [ RUN ] precedence.kill_is_highest_in_any_order [ OK ] precedence.kill_is_highest_in_any_order [ RUN ] precedence.trap_is_second [ OK ] precedence.trap_is_second [ RUN ] precedence.trap_is_second_in_any_order [ OK ] precedence.trap_is_second_in_any_order [ RUN ] precedence.errno_is_third [ OK ] precedence.errno_is_third [ RUN ] precedence.errno_is_third_in_any_order [ OK ] precedence.errno_is_third_in_any_order [ RUN ] precedence.trace_is_fourth [ OK ] precedence.trace_is_fourth [ RUN ] precedence.trace_is_fourth_in_any_order [ OK ] precedence.trace_is_fourth_in_any_order [ RUN ] precedence.log_is_fifth [ OK ] precedence.log_is_fifth [ RUN ] precedence.log_is_fifth_in_any_order [ OK ] precedence.log_is_fifth_in_any_order [ RUN ] TRACE_poke.read_has_side_effects [ OK ] TRACE_poke.read_has_side_effects [ RUN ] TRACE_poke.getpid_runs_normally [ OK ] TRACE_poke.getpid_runs_normally [ RUN ] TRACE_syscall.ptrace_syscall_redirected [ OK ] TRACE_syscall.ptrace_syscall_redirected [ RUN ] TRACE_syscall.ptrace_syscall_dropped [ OK ] TRACE_syscall.ptrace_syscall_dropped [ RUN ] TRACE_syscall.syscall_allowed [ OK ] TRACE_syscall.syscall_allowed [ RUN ] TRACE_syscall.syscall_redirected [ OK ] TRACE_syscall.syscall_redirected [ RUN ] TRACE_syscall.syscall_dropped [ OK ] TRACE_syscall.syscall_dropped [ RUN ] TRACE_syscall.skip_after_RET_TRACE [ OK ] TRACE_syscall.skip_after_RET_TRACE [ RUN ] TRACE_syscall.kill_after_RET_TRACE [ OK ] TRACE_syscall.kill_after_RET_TRACE [ RUN ] TRACE_syscall.skip_after_ptrace [ OK ] TRACE_syscall.skip_after_ptrace [ RUN ] TRACE_syscall.kill_after_ptrace [ OK ] TRACE_syscall.kill_after_ptrace [ RUN ] global.seccomp_syscall [ OK ] global.seccomp_syscall [ RUN ] global.seccomp_syscall_mode_lock [ OK ] global.seccomp_syscall_mode_lock [ RUN ] global.detect_seccomp_filter_flags [ OK ] global.detect_seccomp_filter_flags [ RUN ] global.TSYNC_first [ OK ] global.TSYNC_first [ RUN ] TSYNC.siblings_fail_prctl [ OK ] TSYNC.siblings_fail_prctl [ RUN ] TSYNC.two_siblings_with_ancestor [ OK ] TSYNC.two_siblings_with_ancestor [ RUN ] TSYNC.two_sibling_want_nnp [ OK ] TSYNC.two_sibling_want_nnp [ RUN ] TSYNC.two_siblings_with_no_filter [ OK ] TSYNC.two_siblings_with_no_filter [ RUN ] TSYNC.two_siblings_with_one_divergence [ OK ] TSYNC.two_siblings_with_one_divergence [ RUN ] TSYNC.two_siblings_not_under_filter [ OK ] TSYNC.two_siblings_not_under_filter [ RUN ] global.syscall_restart [ OK ] global.syscall_restart [ RUN ] global.filter_flag_log [ OK ] global.filter_flag_log [ RUN ] global.get_action_avail [ OK ] global.get_action_avail [ RUN ] global.get_metadata [ OK ] global.get_metadata [==========] 64 / 64 tests passed. [ PASSED ] ok 1..1 selftests: seccomp: seccomp_bpf [PASS] selftests: seccomp: seccomp_benchmark ======================================== Calibrating reasonable sample size... 1544120467.383132905 - 1544120467.382814604 = 318301 1544120467.384111505 - 1544120467.383931405 = 180100 1544120467.385728706 - 1544120467.384510905 = 1217801 1544120467.386858006 - 1544120467.386096506 = 761500 1544120467.388563407 - 1544120467.387171006 = 1392401 1544120467.392465908 - 1544120467.390143107 = 2322801 1544120467.397988410 - 1544120467.393666109 = 4322301 1544120467.406494614 - 1544120467.398347511 = 8147103 1544120467.427372522 - 1544120467.406955414 = 20417108 1544120467.467600338 - 1544120467.427772222 = 39828116 1544120467.542484667 - 1544120467.467954738 = 74529929 1544120467.693806026 - 1544120467.543004867 = 150801159 1544120467.970921334 - 1544120467.694244026 = 276677308 1544120468.522149049 - 1544120467.971549534 = 550599515 1544120469.637696984 - 1544120468.522606749 = 1115090235 1544120471.829467338 - 1544120469.638147084 = 2191320254 1544120476.263601568 - 1544120471.829850239 = 4433751329 1544120485.135465027 - 1544120476.263980268 = 8871484759 Benchmarking 4194304 samples... 26.716000000 - 17.812000000 = 8904000000 getpid native: 2122 ns 46.548000000 - 26.716000000 = 19832000000 getpid RET_ALLOW: 4728 ns Estimated seccomp overhead per syscall: 2606 ns ok 1..2 selftests: seccomp: seccomp_benchmark [PASS] > > > diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h > > index 1c9cc8389928..1fd6e4130cab 100644 > > --- a/arch/riscv/include/asm/thread_info.h > > +++ b/arch/riscv/include/asm/thread_info.h > > @@ -81,6 +81,7 @@ struct thread_info { > > #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ > > #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ > > #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ > > +#define TIF_SECCOMP 8 /* syscall secure computing */ > > Nit: extra tab needs to be removed. > > -- > Kees Cook
On Thu, Dec 6, 2018 at 10:26 AM David Abdurachmanov <david.abdurachmanov@gmail.com> wrote: > > On Thu, Dec 6, 2018 at 5:52 PM Kees Cook <keescook@chromium.org> wrote: > > > > On Thu, Dec 6, 2018 at 7:02 AM David Abdurachmanov > > <david.abdurachmanov@gmail.com> wrote: > > > The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). > > > > Can you add support to tools/testing/selftests/seccomp/seccomp_bpf.c > > as well? That selftest finds a lot of weird corner-cases... > > I hate it locally and will include in v2. I hate it too. ;) But seriously (reading through the "have" typo), that's awesome. Thanks! > The results see fine (tested in QEMU). Great!
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4f48f757204..49cd8e251547 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,6 +29,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_SECCOMP_FILTER select HAVE_MEMBLOCK_NODE_MAP select HAVE_DMA_CONTIGUOUS select HAVE_FUTEX_CMPXCHG if FUTEX @@ -228,6 +229,19 @@ menu "Kernel features" source "kernel/Kconfig.hz" +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + endmenu menu "Boot options" diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index 1c9cc8389928..1fd6e4130cab 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -81,6 +81,7 @@ struct thread_info { #define TIF_MEMDIE 5 /* is terminating due to OOM killer */ #define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing */ +#define TIF_SECCOMP 8 /* syscall secure computing */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -88,11 +89,13 @@ struct thread_info { #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_WORK_MASK \ (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED) #define _TIF_SYSCALL_WORK \ - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT) + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT \ + _TIF_SECCOMP ) #endif /* _ASM_RISCV_THREAD_INFO_H */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 355166f57205..e88ccbfa61ee 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -207,8 +207,25 @@ check_syscall_nr: /* Check to make sure we don't jump to a bogus syscall number. */ li t0, __NR_syscalls la s0, sys_ni_syscall - /* Syscall number held in a7 */ - bgeu a7, t0, 1f + /* + * The tracer can change syscall number to valid/invalid value. + * We use syscall_set_nr helper in syscall_trace_enter thus we + * cannot trust the current value in a7 and have to reload from + * the current task pt_regs. + */ + REG_L a7, PT_A7(sp) + /* + * Syscall number held in a7. + * If syscall number is above allowed value, redirect to ni_syscall. + */ + bge a7, t0, 1f + /* + * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1. + * If yes, we pretend it was executed. + */ + li t1, -1 + beq a7, t1, ret_from_syscall_rejected + /* Call syscall */ la s0, sys_call_table slli t0, a7, RISCV_LGPTR add s0, s0, t0 @@ -219,6 +236,12 @@ check_syscall_nr: ret_from_syscall: /* Set user a0 to kernel a0 */ REG_S a0, PT_A0(sp) + /* + * We didn't execute the actual syscall. + * Seccomp already set return value for the current task pt_regs. + * (If it was configured with SECCOMP_RET_ERRNO/TRACE) + */ +ret_from_syscall_rejected: /* Trace syscalls, but only if requested by the user. */ REG_L t0, TASK_TI_FLAGS(tp) andi t0, t0, _TIF_SYSCALL_WORK diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index c1b51539c3e2..598e48b8ca2b 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -160,6 +160,14 @@ void do_syscall_trace_enter(struct pt_regs *regs) if (tracehook_report_syscall_entry(regs)) syscall_set_nr(current, regs, -1); + /* + * Do the secure computing after ptrace; failures should be fast. + * If this fails we might have return value in a0 from seccomp + * (via SECCOMP_RET_ERRNO/TRACE). + */ + if (secure_computing(NULL) == -1) + syscall_set_nr(current, regs, -1); + #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, syscall_get_nr(current, regs));
The patch adds support for SECCOMP and SECCOMP_FILTER (BPF). Signed-off-by: David Abdurachmanov <david.abdurachmanov@gmail.com> --- arch/riscv/Kconfig | 14 ++++++++++++++ arch/riscv/include/asm/thread_info.h | 5 ++++- arch/riscv/kernel/entry.S | 27 +++++++++++++++++++++++++-- arch/riscv/kernel/ptrace.c | 8 ++++++++ 4 files changed, 51 insertions(+), 3 deletions(-)