Message ID | 20230713023232.1411523-6-memxor@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Exceptions - 1/2 | expand |
On Thu, Jul 13, 2023 at 08:02:27AM +0530, Kumar Kartikeya Dwivedi wrote: > The plumbing for offline unwinding when we throw an exception in > programs would require walking the stack, hence introduce a new > arch_bpf_stack_walk function. This is provided when the JIT supports > exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific > code is really minimal, hence it should straightforward to extend this > support to other architectures as well, as it reuses the logic of > arch_stack_walk, but allowing access to unwind_state data. > > Once the stack pointer and frame pointer are known for the main subprog > during the unwinding, we know the stack layout and location of any > callee-saved registers which must be restored before we return back to > the kernel. > > This handling will be added in the next patch. > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > --- > arch/x86/net/bpf_jit_comp.c | 21 +++++++++++++++++++++ > include/linux/filter.h | 2 ++ > kernel/bpf/core.c | 9 +++++++++ > 3 files changed, 32 insertions(+) > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > index 438adb695daa..d326503ce242 100644 > --- a/arch/x86/net/bpf_jit_comp.c > +++ b/arch/x86/net/bpf_jit_comp.c > @@ -16,6 +16,7 @@ > #include <asm/set_memory.h> > #include <asm/nospec-branch.h> > #include <asm/text-patching.h> > +#include <asm/unwind.h> > > static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) > { > @@ -2660,3 +2661,23 @@ void bpf_jit_free(struct bpf_prog *prog) > > bpf_prog_unlock_free(prog); > } > + > +bool bpf_jit_supports_exceptions(void) > +{ > + return IS_ENABLED(CONFIG_UNWINDER_ORC) || IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER); > +} > + > +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) > +{ > +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) > + struct unwind_state state; > + unsigned long addr; > + > + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); > + unwind_next_frame(&state)) { > + addr = unwind_get_return_address(&state); I think these steps will work even with UNWINDER_GUESS. What is the reason for #ifdef ? > + if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp)) > + break; > + } > +#endif > +} > diff --git a/include/linux/filter.h b/include/linux/filter.h > index f69114083ec7..21ac801330bb 100644 > --- a/include/linux/filter.h > +++ b/include/linux/filter.h > @@ -920,6 +920,8 @@ bool bpf_jit_needs_zext(void); > bool bpf_jit_supports_subprog_tailcalls(void); > bool bpf_jit_supports_kfunc_call(void); > bool bpf_jit_supports_far_kfunc_call(void); > +bool bpf_jit_supports_exceptions(void); > +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); > bool bpf_helper_changes_pkt_data(void *func); > > static inline bool bpf_dump_raw_ok(const struct cred *cred) > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index 5c484b2bc3d6..5e224cf0ec27 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -2770,6 +2770,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len) > return -ENOTSUPP; > } > > +bool __weak bpf_jit_supports_exceptions(void) > +{ > + return false; > +} > + > +void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) > +{ > +} > + > #ifdef CONFIG_BPF_SYSCALL > static int __init bpf_global_ma_init(void) > { > -- > 2.40.1 >
On Sat, 15 Jul 2023 at 03:35, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Thu, Jul 13, 2023 at 08:02:27AM +0530, Kumar Kartikeya Dwivedi wrote: > > The plumbing for offline unwinding when we throw an exception in > > programs would require walking the stack, hence introduce a new > > arch_bpf_stack_walk function. This is provided when the JIT supports > > exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific > > code is really minimal, hence it should straightforward to extend this > > support to other architectures as well, as it reuses the logic of > > arch_stack_walk, but allowing access to unwind_state data. > > > > Once the stack pointer and frame pointer are known for the main subprog > > during the unwinding, we know the stack layout and location of any > > callee-saved registers which must be restored before we return back to > > the kernel. > > > > This handling will be added in the next patch. > > > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > --- > > arch/x86/net/bpf_jit_comp.c | 21 +++++++++++++++++++++ > > include/linux/filter.h | 2 ++ > > kernel/bpf/core.c | 9 +++++++++ > > 3 files changed, 32 insertions(+) > > > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > > index 438adb695daa..d326503ce242 100644 > > --- a/arch/x86/net/bpf_jit_comp.c > > +++ b/arch/x86/net/bpf_jit_comp.c > > @@ -16,6 +16,7 @@ > > #include <asm/set_memory.h> > > #include <asm/nospec-branch.h> > > #include <asm/text-patching.h> > > +#include <asm/unwind.h> > > > > static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) > > { > > @@ -2660,3 +2661,23 @@ void bpf_jit_free(struct bpf_prog *prog) > > > > bpf_prog_unlock_free(prog); > > } > > + > > +bool bpf_jit_supports_exceptions(void) > > +{ > > + return IS_ENABLED(CONFIG_UNWINDER_ORC) || IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER); > > +} > > + > > +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) > > +{ > > +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) > > + struct unwind_state state; > > + unsigned long addr; > > + > > + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); > > + unwind_next_frame(&state)) { > > + addr = unwind_get_return_address(&state); > > I think these steps will work even with UNWINDER_GUESS. > What is the reason for #ifdef ? I think we require both unwind_state::sp and unwind_state::bp, but arch/x86/include/asm/unwind.h does not include unwind_state::bp when both UNWINDER_ORC and UNWINDER_FRAME_POINTER are unset. Although it might be possible to calculate and save bp offset during JIT in bpf_prog_aux (by adding roundup(stack_depth) + 8 (push rax if tail call reachable) + callee_regs_saved) for the subprog corresponding to a frame. Then we can make it work everywhere. The JIT will abstract get_prog_bp(sp) using an arch specific helper. Let me know if I misunderstood something. > > > + if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp)) > > + break; > > + } > > +#endif > > +} > > diff --git a/include/linux/filter.h b/include/linux/filter.h > > index f69114083ec7..21ac801330bb 100644 > > --- a/include/linux/filter.h > > +++ b/include/linux/filter.h > > @@ -920,6 +920,8 @@ bool bpf_jit_needs_zext(void); > > bool bpf_jit_supports_subprog_tailcalls(void); > > bool bpf_jit_supports_kfunc_call(void); > > bool bpf_jit_supports_far_kfunc_call(void); > > +bool bpf_jit_supports_exceptions(void); > > +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); > > bool bpf_helper_changes_pkt_data(void *func); > > > > static inline bool bpf_dump_raw_ok(const struct cred *cred) > > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > > index 5c484b2bc3d6..5e224cf0ec27 100644 > > --- a/kernel/bpf/core.c > > +++ b/kernel/bpf/core.c > > @@ -2770,6 +2770,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len) > > return -ENOTSUPP; > > } > > > > +bool __weak bpf_jit_supports_exceptions(void) > > +{ > > + return false; > > +} > > + > > +void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) > > +{ > > +} > > + > > #ifdef CONFIG_BPF_SYSCALL > > static int __init bpf_global_ma_init(void) > > { > > -- > > 2.40.1 > >
On Mon, Jul 17, 2023 at 9:29 AM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote: > > On Sat, 15 Jul 2023 at 03:35, Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > On Thu, Jul 13, 2023 at 08:02:27AM +0530, Kumar Kartikeya Dwivedi wrote: > > > The plumbing for offline unwinding when we throw an exception in > > > programs would require walking the stack, hence introduce a new > > > arch_bpf_stack_walk function. This is provided when the JIT supports > > > exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific > > > code is really minimal, hence it should straightforward to extend this > > > support to other architectures as well, as it reuses the logic of > > > arch_stack_walk, but allowing access to unwind_state data. > > > > > > Once the stack pointer and frame pointer are known for the main subprog > > > during the unwinding, we know the stack layout and location of any > > > callee-saved registers which must be restored before we return back to > > > the kernel. > > > > > > This handling will be added in the next patch. > > > > > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > > --- > > > arch/x86/net/bpf_jit_comp.c | 21 +++++++++++++++++++++ > > > include/linux/filter.h | 2 ++ > > > kernel/bpf/core.c | 9 +++++++++ > > > 3 files changed, 32 insertions(+) > > > > > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > > > index 438adb695daa..d326503ce242 100644 > > > --- a/arch/x86/net/bpf_jit_comp.c > > > +++ b/arch/x86/net/bpf_jit_comp.c > > > @@ -16,6 +16,7 @@ > > > #include <asm/set_memory.h> > > > #include <asm/nospec-branch.h> > > > #include <asm/text-patching.h> > > > +#include <asm/unwind.h> > > > > > > static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) > > > { > > > @@ -2660,3 +2661,23 @@ void bpf_jit_free(struct bpf_prog *prog) > > > > > > bpf_prog_unlock_free(prog); > > > } > > > + > > > +bool bpf_jit_supports_exceptions(void) > > > +{ > > > + return IS_ENABLED(CONFIG_UNWINDER_ORC) || IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER); > > > +} > > > + > > > +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) > > > +{ > > > +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) > > > + struct unwind_state state; > > > + unsigned long addr; > > > + > > > + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); > > > + unwind_next_frame(&state)) { > > > + addr = unwind_get_return_address(&state); > > > > I think these steps will work even with UNWINDER_GUESS. > > What is the reason for #ifdef ? > > I think we require both unwind_state::sp and unwind_state::bp, but > arch/x86/include/asm/unwind.h does not include unwind_state::bp when > both UNWINDER_ORC and UNWINDER_FRAME_POINTER are unset. > > Although it might be possible to calculate and save bp offset during > JIT in bpf_prog_aux (by adding roundup(stack_depth) + 8 (push rax if > tail call reachable) + callee_regs_saved) for the subprog > corresponding to a frame. Then we can make it work everywhere. > The JIT will abstract get_prog_bp(sp) using an arch specific helper. > > Let me know if I misunderstood something. JITed progs always have frames. So we're effectively doing unconditional UNWINDER_FRAME_POINTER. I think the intended usage of arch_bpf_stack_walk() is to only walk bpf frames _in this patch set_, if so the extra #ifdefs are misleading. If in follow-ups we're going to unwind through JITed progs _and_ through kfunc/helpers then this ifdef will be necessary. I suspect we might want something like this in the future. So the ifdef is ok to have from the start, but the comment is necessary to describe what it is for.
On Mon, 17 Jul 2023 at 22:59, Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Mon, Jul 17, 2023 at 9:29 AM Kumar Kartikeya Dwivedi > <memxor@gmail.com> wrote: > > > > On Sat, 15 Jul 2023 at 03:35, Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > > > > > > On Thu, Jul 13, 2023 at 08:02:27AM +0530, Kumar Kartikeya Dwivedi wrote: > > > > The plumbing for offline unwinding when we throw an exception in > > > > programs would require walking the stack, hence introduce a new > > > > arch_bpf_stack_walk function. This is provided when the JIT supports > > > > exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific > > > > code is really minimal, hence it should straightforward to extend this > > > > support to other architectures as well, as it reuses the logic of > > > > arch_stack_walk, but allowing access to unwind_state data. > > > > > > > > Once the stack pointer and frame pointer are known for the main subprog > > > > during the unwinding, we know the stack layout and location of any > > > > callee-saved registers which must be restored before we return back to > > > > the kernel. > > > > > > > > This handling will be added in the next patch. > > > > > > > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> > > > > --- > > > > arch/x86/net/bpf_jit_comp.c | 21 +++++++++++++++++++++ > > > > include/linux/filter.h | 2 ++ > > > > kernel/bpf/core.c | 9 +++++++++ > > > > 3 files changed, 32 insertions(+) > > > > > > > > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c > > > > index 438adb695daa..d326503ce242 100644 > > > > --- a/arch/x86/net/bpf_jit_comp.c > > > > +++ b/arch/x86/net/bpf_jit_comp.c > > > > @@ -16,6 +16,7 @@ > > > > #include <asm/set_memory.h> > > > > #include <asm/nospec-branch.h> > > > > #include <asm/text-patching.h> > > > > +#include <asm/unwind.h> > > > > > > > > static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) > > > > { > > > > @@ -2660,3 +2661,23 @@ void bpf_jit_free(struct bpf_prog *prog) > > > > > > > > bpf_prog_unlock_free(prog); > > > > } > > > > + > > > > +bool bpf_jit_supports_exceptions(void) > > > > +{ > > > > + return IS_ENABLED(CONFIG_UNWINDER_ORC) || IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER); > > > > +} > > > > + > > > > +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) > > > > +{ > > > > +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) > > > > + struct unwind_state state; > > > > + unsigned long addr; > > > > + > > > > + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); > > > > + unwind_next_frame(&state)) { > > > > + addr = unwind_get_return_address(&state); > > > > > > I think these steps will work even with UNWINDER_GUESS. > > > What is the reason for #ifdef ? > > > > I think we require both unwind_state::sp and unwind_state::bp, but > > arch/x86/include/asm/unwind.h does not include unwind_state::bp when > > both UNWINDER_ORC and UNWINDER_FRAME_POINTER are unset. > > > > Although it might be possible to calculate and save bp offset during > > JIT in bpf_prog_aux (by adding roundup(stack_depth) + 8 (push rax if > > tail call reachable) + callee_regs_saved) for the subprog > > corresponding to a frame. Then we can make it work everywhere. > > The JIT will abstract get_prog_bp(sp) using an arch specific helper. > > > > Let me know if I misunderstood something. > > JITed progs always have frames. So we're effectively doing > unconditional UNWINDER_FRAME_POINTER. > I think the intended usage of arch_bpf_stack_walk() is to only walk > bpf frames _in this patch set_, if so the extra #ifdefs are misleading. > If in follow-ups we're going to unwind through JITed progs _and_ > through kfunc/helpers then this ifdef will be necessary. > I suspect we might want something like this in the future. I think we actually do unwind through bpf_throw at the very least, so we are going through both kernel and BPF frames. > So the ifdef is ok to have from the start, but the comment is necessary > to describe what it is for. I'll add the comment in v2.
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 438adb695daa..d326503ce242 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -16,6 +16,7 @@ #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> +#include <asm/unwind.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -2660,3 +2661,23 @@ void bpf_jit_free(struct bpf_prog *prog) bpf_prog_unlock_free(prog); } + +bool bpf_jit_supports_exceptions(void) +{ + return IS_ENABLED(CONFIG_UNWINDER_ORC) || IS_ENABLED(CONFIG_UNWINDER_FRAME_POINTER); +} + +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) +{ +#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) + struct unwind_state state; + unsigned long addr; + + for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); + unwind_next_frame(&state)) { + addr = unwind_get_return_address(&state); + if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp)) + break; + } +#endif +} diff --git a/include/linux/filter.h b/include/linux/filter.h index f69114083ec7..21ac801330bb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -920,6 +920,8 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +bool bpf_jit_supports_exceptions(void); +void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); static inline bool bpf_dump_raw_ok(const struct cred *cred) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 5c484b2bc3d6..5e224cf0ec27 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2770,6 +2770,15 @@ int __weak bpf_arch_text_invalidate(void *dst, size_t len) return -ENOTSUPP; } +bool __weak bpf_jit_supports_exceptions(void) +{ + return false; +} + +void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) +{ +} + #ifdef CONFIG_BPF_SYSCALL static int __init bpf_global_ma_init(void) {
The plumbing for offline unwinding when we throw an exception in programs would require walking the stack, hence introduce a new arch_bpf_stack_walk function. This is provided when the JIT supports exceptions, i.e. bpf_jit_supports_exceptions is true. The arch-specific code is really minimal, hence it should straightforward to extend this support to other architectures as well, as it reuses the logic of arch_stack_walk, but allowing access to unwind_state data. Once the stack pointer and frame pointer are known for the main subprog during the unwinding, we know the stack layout and location of any callee-saved registers which must be restored before we return back to the kernel. This handling will be added in the next patch. Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com> --- arch/x86/net/bpf_jit_comp.c | 21 +++++++++++++++++++++ include/linux/filter.h | 2 ++ kernel/bpf/core.c | 9 +++++++++ 3 files changed, 32 insertions(+)