diff mbox series

[RFC,bpf-next,v1,1/2] bpf: Explore PTR_TO_STACK as R0 for bpf_dynptr_slice_rdwr

Message ID 20250219125117.1956939-2-memxor@gmail.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series Fix bpf_dynptr_slice_rdwr stack state | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-11 success Logs for aarch64-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-12 success Logs for aarch64-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-21 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / GCC BPF
bpf/vmtest-bpf-next-VM_Test-19 success Logs for s390x-gcc / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-17 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-20 success Logs for s390x-gcc / veristat-meta
bpf/vmtest-bpf-next-VM_Test-44 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-43 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-17 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-49 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-50 success Logs for x86_64-llvm-18 / veristat-kernel
bpf/vmtest-bpf-next-VM_Test-51 success Logs for x86_64-llvm-18 / veristat-meta
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-gcc / veristat-kernel / x86_64-gcc veristat_kernel
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-47 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-48 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-45 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / GCC BPF / GCC BPF
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-gcc / veristat-meta / x86_64-gcc veristat_meta
bpf/vmtest-bpf-next-VM_Test-37 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-46 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-9 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers fail 1 blamed authors not CCed: joannelkoong@gmail.com; 9 maintainers not CCed: kpsingh@kernel.org sdf@fomichev.me jolsa@kernel.org yonghong.song@linux.dev joannelkoong@gmail.com song@kernel.org john.fastabend@gmail.com haoluo@google.com martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 111 this patch: 111
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 10 this patch: 10
netdev/checkpatch warning CHECK: Blank lines aren't necessary after an open brace '{' WARNING: line length of 117 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Kumar Kartikeya Dwivedi Feb. 19, 2025, 12:51 p.m. UTC
For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
to the underlying packet (if the requested slice is linear), or copy out
the data to the buffer passed into the kfunc. The verifier performs
symbolic execution assuming the returned value is a PTR_TO_MEM of a
certain size (passed into the kfunc), and ensures reads and writes are
within bounds.

A complication arises when the passed in buffer is a stack pointer. The
returned pointer may be used to perform writes (unlike bpf_dynptr_slice),
but the verifier will be unaware of which stack slots such writes may
end up overwriting. As such, a program may end up overwriting stack data
(such as spilled pointers) through such return values by accident, which
may cause undefined behavior.

Fix this by exploring an additional path whenever the passed in argument
is a PTR_TO_STACK, and explore a path where the returned buffer is the
same as this stack pointer. This allows us to ensure that the program
doesn't introduce unsafe behavior when this condition is triggered at
runtime.

The push_stack() call is performed after kfunc processing is over,
simply fixing up the return type to PTR_TO_STACK with proper frameno,
off, and var_off.

Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
---
 kernel/bpf/verifier.c | 51 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

Comments

Kumar Kartikeya Dwivedi Feb. 19, 2025, 12:56 p.m. UTC | #1
On Wed, 19 Feb 2025 at 13:51, Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote:
>
> For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> to the underlying packet (if the requested slice is linear), or copy out
> the data to the buffer passed into the kfunc. The verifier performs
> symbolic execution assuming the returned value is a PTR_TO_MEM of a
> certain size (passed into the kfunc), and ensures reads and writes are
> within bounds.
>
> A complication arises when the passed in buffer is a stack pointer. The
> returned pointer may be used to perform writes (unlike bpf_dynptr_slice),
> but the verifier will be unaware of which stack slots such writes may
> end up overwriting. As such, a program may end up overwriting stack data
> (such as spilled pointers) through such return values by accident, which
> may cause undefined behavior.
>
> Fix this by exploring an additional path whenever the passed in argument
> is a PTR_TO_STACK, and explore a path where the returned buffer is the
> same as this stack pointer. This allows us to ensure that the program
> doesn't introduce unsafe behavior when this condition is triggered at
> runtime.
>
> The push_stack() call is performed after kfunc processing is over,
> simply fixing up the return type to PTR_TO_STACK with proper frameno,
> off, and var_off.

I just sent this out to have discussion with code and context (in selftest).

The current approach has two problems:
It fails xdp_dynptr selftest with misaligned stack access (-72+30 =
42) size 4 error.
This was happening before as well, but is surfaced because we see
writes to the stack.

It also leads to veristat regression (+80-100% in states) in two selftests.

We probably want to avoid doing push_stack due to the states increase,
and instead mark the stack slot instead whenever the returned
PTR_TO_MEM is used for writing, but we'll have to keep remarking
whenever writes happen, so it requires stashing some stack slot state
in the register.
The other option is invalidating the returned PTR_TO_MEM when the
buffer on the stack is written to (i.e. the stack location gets
reused).
Kumar Kartikeya Dwivedi Feb. 19, 2025, 12:58 p.m. UTC | #2
On Wed, 19 Feb 2025 at 13:51, Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote:
>
> For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> to the underlying packet (if the requested slice is linear), or copy out
> the data to the buffer passed into the kfunc. The verifier performs
> symbolic execution assuming the returned value is a PTR_TO_MEM of a
> certain size (passed into the kfunc), and ensures reads and writes are
> within bounds.
>
> A complication arises when the passed in buffer is a stack pointer. The
> returned pointer may be used to perform writes (unlike bpf_dynptr_slice),
> but the verifier will be unaware of which stack slots such writes may
> end up overwriting. As such, a program may end up overwriting stack data
> (such as spilled pointers) through such return values by accident, which
> may cause undefined behavior.
>
> Fix this by exploring an additional path whenever the passed in argument
> is a PTR_TO_STACK, and explore a path where the returned buffer is the
> same as this stack pointer. This allows us to ensure that the program
> doesn't introduce unsafe behavior when this condition is triggered at
> runtime.
>
> The push_stack() call is performed after kfunc processing is over,
> simply fixing up the return type to PTR_TO_STACK with proper frameno,
> off, and var_off.
>
> Fixes: 66e3a13e7c2c ("bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr")
> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> ---
>  [...]
>         }
>
> +       /* Push a state for exploration where the returned buffer is pointing to
> +        * the stack buffer passed into bpf_dynptr_slice_rdwr, otherwise we
> +        * cannot see writes to the stack solely through marking it as
> +        * PTR_TO_MEM. We don't do the same for bpf_dynptr_slice, because the
> +        * returned pointer is MEM_RDONLY, hence cannot be used to write to the
> +        * stack.
> +        */
> +       if (!insn->off && meta.arg_stack.found &&
> +           insn->imm == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
> +               struct bpf_verifier_state *branch;
> +               struct bpf_reg_state *regs, *r0;
> +
> +               branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
> +               if (!branch) {
> +                       verbose(env, "failed to push state to explore stack buffer in r0\n");
> +                       return -ENOMEM;
> +               }
> +
> +               regs = branch->frame[branch->curframe]->regs;
> +               r0 = &regs[BPF_REG_0];
> +
> +               r0->type = PTR_TO_STACK;

I forgot to fold in a hunk.
This needs to be marked PTR_MAYBE_NULL (preserve the set reg->id).

> [...]
>
Alexei Starovoitov Feb. 19, 2025, 5:41 p.m. UTC | #3
On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> to the underlying packet (if the requested slice is linear), or copy out
> the data to the buffer passed into the kfunc. The verifier performs
> symbolic execution assuming the returned value is a PTR_TO_MEM of a
> certain size (passed into the kfunc), and ensures reads and writes are
> within bounds.

sounds like
check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
check_helper_mem_access()
   case PTR_TO_STACK:
      check_stack_range_initialized()
         clobber = true
             if (clobber) {
                  __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);

is somehow broken?

ohh. It might be:
|| !is_kfunc_arg_optional(meta->btf, buff_arg)

This bit is wrong then.
When arg is not-null check_kfunc_mem_size_reg() should be called.
The PTR_TO_STACK abuse is a small subset of issues
if check_kfunc_mem_size_reg() is indeed not called.
Kumar Kartikeya Dwivedi Feb. 19, 2025, 6:09 p.m. UTC | #4
On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> <memxor@gmail.com> wrote:
> >
> > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > to the underlying packet (if the requested slice is linear), or copy out
> > the data to the buffer passed into the kfunc. The verifier performs
> > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > certain size (passed into the kfunc), and ensures reads and writes are
> > within bounds.
>
> sounds like
> check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> check_helper_mem_access()
>    case PTR_TO_STACK:
>       check_stack_range_initialized()
>          clobber = true
>              if (clobber) {
>                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
>
> is somehow broken?
>
> ohh. It might be:
> || !is_kfunc_arg_optional(meta->btf, buff_arg)
>
> This bit is wrong then.
> When arg is not-null check_kfunc_mem_size_reg() should be called.
> The PTR_TO_STACK abuse is a small subset of issues
> if check_kfunc_mem_size_reg() is indeed not called.

The condition looks ok to me.

The condition to do check_mem_size_reg is !null || !opt.
So when it's null, and it's opt, it will be skipped.
When it's null, and it's not opt, the check will happen.
When arg is not-null, the said function is called, opt does not matter then.
So the stack slots are marked misc.

In our case we're not passing a NULL pointer in the selftest.

The problem occurs once we spill to that slot _after_ the call, and
then do a write through returned mem pointer.

The final few lines from the selftest do the dirty thing, where r0 is
aliasing fp-8, and r1 = 0.

+ *(u64 *)(r10 - 8) = r8; \
+ *(u64 *)(r0 + 0) = r1; \
+ r8 = *(u64 *)(r10 - 8); \
+ r0 = *(u64 *)(r8 + 0); \

The write through r0 must re-mark the stack, but the verifier doesn't
know it's pointing to the stack.
push_stack was the conceptually cleaner/simpler fix, but it apparently
isn't good enough.

Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
when r8 is spilled to fp-8 first time after the call are two options.
Both have some concerns (first, the misaligned stack access is not
caught, second PTR_TO_MEM may outlive stack frame).

I don't recall if there was a hardware/JIT specific reason to care
about stack access alignment or not (on some architectures), but
otherwise we can over approximately mark at 8-byte granularity for any
slot(s) that overlap with the buffer to cover such a case. The second
problem is slightly trickier, which makes me lean towards invalidating
returned PTR_TO_MEM when stack slot is overwritten or frame is
destroyed.
Eduard Zingerman Feb. 19, 2025, 9:20 p.m. UTC | #5
On Wed, 2025-02-19 at 13:56 +0100, Kumar Kartikeya Dwivedi wrote:

[...]

> It also leads to veristat regression (+80-100% in states) in two selftests.
> 
> We probably want to avoid doing push_stack due to the states increase,
> and instead mark the stack slot instead whenever the returned
> PTR_TO_MEM is used for writing, but we'll have to keep remarking
> whenever writes happen, so it requires stashing some stack slot state
> in the register.
> The other option is invalidating the returned PTR_TO_MEM when the
> buffer on the stack is written to (i.e. the stack location gets
> reused).

Would it be wrong, to always consider r0 to be a pointer to stack if
buffer is provided? It's like modelling the PTR_TO_MEM with some
additional precision.

Otherwise, I think push_stack() is fine, as it keeps implementation simple.
Alexei Starovoitov Feb. 19, 2025, 10:16 p.m. UTC | #6
On Wed, Feb 19, 2025 at 1:20 PM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Wed, 2025-02-19 at 13:56 +0100, Kumar Kartikeya Dwivedi wrote:
>
> [...]
>
> > It also leads to veristat regression (+80-100% in states) in two selftests.
> >
> > We probably want to avoid doing push_stack due to the states increase,
> > and instead mark the stack slot instead whenever the returned
> > PTR_TO_MEM is used for writing, but we'll have to keep remarking
> > whenever writes happen, so it requires stashing some stack slot state
> > in the register.
> > The other option is invalidating the returned PTR_TO_MEM when the
> > buffer on the stack is written to (i.e. the stack location gets
> > reused).
>
> Would it be wrong, to always consider r0 to be a pointer to stack if
> buffer is provided? It's like modelling the PTR_TO_MEM with some
> additional precision.
>
> Otherwise, I think push_stack() is fine, as it keeps implementation simple.

Discussed plenty of options offline with Kumar.
The next step is to experiment returning PTR_TO_STACK with mem_size.
Andrii Nakryiko Feb. 20, 2025, 12:12 a.m. UTC | #7
On Wed, Feb 19, 2025 at 10:10 AM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> > <memxor@gmail.com> wrote:
> > >
> > > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > > to the underlying packet (if the requested slice is linear), or copy out
> > > the data to the buffer passed into the kfunc. The verifier performs
> > > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > > certain size (passed into the kfunc), and ensures reads and writes are
> > > within bounds.
> >
> > sounds like
> > check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> > check_helper_mem_access()
> >    case PTR_TO_STACK:
> >       check_stack_range_initialized()
> >          clobber = true
> >              if (clobber) {
> >                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
> >
> > is somehow broken?
> >
> > ohh. It might be:
> > || !is_kfunc_arg_optional(meta->btf, buff_arg)
> >
> > This bit is wrong then.
> > When arg is not-null check_kfunc_mem_size_reg() should be called.
> > The PTR_TO_STACK abuse is a small subset of issues
> > if check_kfunc_mem_size_reg() is indeed not called.
>
> The condition looks ok to me.
>
> The condition to do check_mem_size_reg is !null || !opt.
> So when it's null, and it's opt, it will be skipped.
> When it's null, and it's not opt, the check will happen.
> When arg is not-null, the said function is called, opt does not matter then.
> So the stack slots are marked misc.
>
> In our case we're not passing a NULL pointer in the selftest.
>
> The problem occurs once we spill to that slot _after_ the call, and
> then do a write through returned mem pointer.
>
> The final few lines from the selftest do the dirty thing, where r0 is
> aliasing fp-8, and r1 = 0.
>
> + *(u64 *)(r10 - 8) = r8; \
> + *(u64 *)(r0 + 0) = r1; \
> + r8 = *(u64 *)(r10 - 8); \
> + r0 = *(u64 *)(r8 + 0); \
>
> The write through r0 must re-mark the stack, but the verifier doesn't
> know it's pointing to the stack.
> push_stack was the conceptually cleaner/simpler fix, but it apparently
> isn't good enough.
>
> Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
> when r8 is spilled to fp-8 first time after the call are two options.
> Both have some concerns (first, the misaligned stack access is not
> caught, second PTR_TO_MEM may outlive stack frame).

Reading the description of the problem my first instinct was to have
stack slots with associated obj_ref_id for such cases and when
something writes into that stack slot, invalidate everything with that
obj_ref_id. So that's probably what you mean by invalidating
PTR_TO_MEM, right?

Not sure I understand what "PTR_TO_STACK with mem_size" (that Alexei
mentioned in another email) means, though, so hard to compare.

>
> I don't recall if there was a hardware/JIT specific reason to care
> about stack access alignment or not (on some architectures), but
> otherwise we can over approximately mark at 8-byte granularity for any
> slot(s) that overlap with the buffer to cover such a case. The second
> problem is slightly trickier, which makes me lean towards invalidating
> returned PTR_TO_MEM when stack slot is overwritten or frame is
> destroyed.
Kumar Kartikeya Dwivedi Feb. 20, 2025, 3:41 p.m. UTC | #8
On Thu, 20 Feb 2025 at 01:13, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> On Wed, Feb 19, 2025 at 10:10 AM Kumar Kartikeya Dwivedi
> <memxor@gmail.com> wrote:
> >
> > On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> > > <memxor@gmail.com> wrote:
> > > >
> > > > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > > > to the underlying packet (if the requested slice is linear), or copy out
> > > > the data to the buffer passed into the kfunc. The verifier performs
> > > > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > > > certain size (passed into the kfunc), and ensures reads and writes are
> > > > within bounds.
> > >
> > > sounds like
> > > check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> > > check_helper_mem_access()
> > >    case PTR_TO_STACK:
> > >       check_stack_range_initialized()
> > >          clobber = true
> > >              if (clobber) {
> > >                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
> > >
> > > is somehow broken?
> > >
> > > ohh. It might be:
> > > || !is_kfunc_arg_optional(meta->btf, buff_arg)
> > >
> > > This bit is wrong then.
> > > When arg is not-null check_kfunc_mem_size_reg() should be called.
> > > The PTR_TO_STACK abuse is a small subset of issues
> > > if check_kfunc_mem_size_reg() is indeed not called.
> >
> > The condition looks ok to me.
> >
> > The condition to do check_mem_size_reg is !null || !opt.
> > So when it's null, and it's opt, it will be skipped.
> > When it's null, and it's not opt, the check will happen.
> > When arg is not-null, the said function is called, opt does not matter then.
> > So the stack slots are marked misc.
> >
> > In our case we're not passing a NULL pointer in the selftest.
> >
> > The problem occurs once we spill to that slot _after_ the call, and
> > then do a write through returned mem pointer.
> >
> > The final few lines from the selftest do the dirty thing, where r0 is
> > aliasing fp-8, and r1 = 0.
> >
> > + *(u64 *)(r10 - 8) = r8; \
> > + *(u64 *)(r0 + 0) = r1; \
> > + r8 = *(u64 *)(r10 - 8); \
> > + r0 = *(u64 *)(r8 + 0); \
> >
> > The write through r0 must re-mark the stack, but the verifier doesn't
> > know it's pointing to the stack.
> > push_stack was the conceptually cleaner/simpler fix, but it apparently
> > isn't good enough.
> >
> > Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
> > when r8 is spilled to fp-8 first time after the call are two options.
> > Both have some concerns (first, the misaligned stack access is not
> > caught, second PTR_TO_MEM may outlive stack frame).
>
> Reading the description of the problem my first instinct was to have
> stack slots with associated obj_ref_id for such cases and when
> something writes into that stack slot, invalidate everything with that
> obj_ref_id. So that's probably what you mean by invalidating
> PTR_TO_MEM, right?
>
> Not sure I understand what "PTR_TO_STACK with mem_size" (that Alexei
> mentioned in another email) means, though, so hard to compare.
>

Invalidation is certainly one option. The one Alexei mentioned was
where we discussed bounding how much data can be read through the
PTR_TO_STACK (similar to PTR_TO_MEM), and mark r0 as PTR_TO_STACK.
This ends up combining the constraints of both types of pointers (it
may as well be called PTR_TO_STACK_OR_MEM) without forking paths.

The benefit over the push_stack approach is that we avoid the states
regression for cls_redirect and balancer_ingress.
For the selftest failure, I plan to just silence the error by changing it.

> >
> > I don't recall if there was a hardware/JIT specific reason to care
> > about stack access alignment or not (on some architectures), but
> > otherwise we can over approximately mark at 8-byte granularity for any
> > slot(s) that overlap with the buffer to cover such a case. The second
> > problem is slightly trickier, which makes me lean towards invalidating
> > returned PTR_TO_MEM when stack slot is overwritten or frame is
> > destroyed.
Andrii Nakryiko Feb. 21, 2025, 12:27 a.m. UTC | #9
On Thu, Feb 20, 2025 at 7:41 AM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Thu, 20 Feb 2025 at 01:13, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > On Wed, Feb 19, 2025 at 10:10 AM Kumar Kartikeya Dwivedi
> > <memxor@gmail.com> wrote:
> > >
> > > On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> > > > <memxor@gmail.com> wrote:
> > > > >
> > > > > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > > > > to the underlying packet (if the requested slice is linear), or copy out
> > > > > the data to the buffer passed into the kfunc. The verifier performs
> > > > > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > > > > certain size (passed into the kfunc), and ensures reads and writes are
> > > > > within bounds.
> > > >
> > > > sounds like
> > > > check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> > > > check_helper_mem_access()
> > > >    case PTR_TO_STACK:
> > > >       check_stack_range_initialized()
> > > >          clobber = true
> > > >              if (clobber) {
> > > >                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
> > > >
> > > > is somehow broken?
> > > >
> > > > ohh. It might be:
> > > > || !is_kfunc_arg_optional(meta->btf, buff_arg)
> > > >
> > > > This bit is wrong then.
> > > > When arg is not-null check_kfunc_mem_size_reg() should be called.
> > > > The PTR_TO_STACK abuse is a small subset of issues
> > > > if check_kfunc_mem_size_reg() is indeed not called.
> > >
> > > The condition looks ok to me.
> > >
> > > The condition to do check_mem_size_reg is !null || !opt.
> > > So when it's null, and it's opt, it will be skipped.
> > > When it's null, and it's not opt, the check will happen.
> > > When arg is not-null, the said function is called, opt does not matter then.
> > > So the stack slots are marked misc.
> > >
> > > In our case we're not passing a NULL pointer in the selftest.
> > >
> > > The problem occurs once we spill to that slot _after_ the call, and
> > > then do a write through returned mem pointer.
> > >
> > > The final few lines from the selftest do the dirty thing, where r0 is
> > > aliasing fp-8, and r1 = 0.
> > >
> > > + *(u64 *)(r10 - 8) = r8; \
> > > + *(u64 *)(r0 + 0) = r1; \
> > > + r8 = *(u64 *)(r10 - 8); \
> > > + r0 = *(u64 *)(r8 + 0); \
> > >
> > > The write through r0 must re-mark the stack, but the verifier doesn't
> > > know it's pointing to the stack.
> > > push_stack was the conceptually cleaner/simpler fix, but it apparently
> > > isn't good enough.
> > >
> > > Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
> > > when r8 is spilled to fp-8 first time after the call are two options.
> > > Both have some concerns (first, the misaligned stack access is not
> > > caught, second PTR_TO_MEM may outlive stack frame).
> >
> > Reading the description of the problem my first instinct was to have
> > stack slots with associated obj_ref_id for such cases and when
> > something writes into that stack slot, invalidate everything with that
> > obj_ref_id. So that's probably what you mean by invalidating
> > PTR_TO_MEM, right?
> >
> > Not sure I understand what "PTR_TO_STACK with mem_size" (that Alexei
> > mentioned in another email) means, though, so hard to compare.
> >
>
> Invalidation is certainly one option. The one Alexei mentioned was
> where we discussed bounding how much data can be read through the
> PTR_TO_STACK (similar to PTR_TO_MEM), and mark r0 as PTR_TO_STACK.
> This ends up combining the constraints of both types of pointers (it
> may as well be called PTR_TO_STACK_OR_MEM) without forking paths.

Yeah, PTR_TO_STACK_OR_MEM would be more precise. But how does that
help with this problem? Not sure I follow the idea of the solution
(but I can wait for patches to be posted).

>
> The benefit over the push_stack approach is that we avoid the states
> regression for cls_redirect and balancer_ingress.

yeah, of course, I don't particularly like the push_stack approach.

> For the selftest failure, I plan to just silence the error by changing it.
>
> > >
> > > I don't recall if there was a hardware/JIT specific reason to care
> > > about stack access alignment or not (on some architectures), but
> > > otherwise we can over approximately mark at 8-byte granularity for any
> > > slot(s) that overlap with the buffer to cover such a case. The second
> > > problem is slightly trickier, which makes me lean towards invalidating
> > > returned PTR_TO_MEM when stack slot is overwritten or frame is
> > > destroyed.
Kumar Kartikeya Dwivedi Feb. 21, 2025, 12:36 a.m. UTC | #10
On Fri, 21 Feb 2025 at 01:27, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Feb 20, 2025 at 7:41 AM Kumar Kartikeya Dwivedi
> <memxor@gmail.com> wrote:
> >
> > On Thu, 20 Feb 2025 at 01:13, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Wed, Feb 19, 2025 at 10:10 AM Kumar Kartikeya Dwivedi
> > > <memxor@gmail.com> wrote:
> > > >
> > > > On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
> > > > <alexei.starovoitov@gmail.com> wrote:
> > > > >
> > > > > On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> > > > > <memxor@gmail.com> wrote:
> > > > > >
> > > > > > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > > > > > to the underlying packet (if the requested slice is linear), or copy out
> > > > > > the data to the buffer passed into the kfunc. The verifier performs
> > > > > > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > > > > > certain size (passed into the kfunc), and ensures reads and writes are
> > > > > > within bounds.
> > > > >
> > > > > sounds like
> > > > > check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> > > > > check_helper_mem_access()
> > > > >    case PTR_TO_STACK:
> > > > >       check_stack_range_initialized()
> > > > >          clobber = true
> > > > >              if (clobber) {
> > > > >                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
> > > > >
> > > > > is somehow broken?
> > > > >
> > > > > ohh. It might be:
> > > > > || !is_kfunc_arg_optional(meta->btf, buff_arg)
> > > > >
> > > > > This bit is wrong then.
> > > > > When arg is not-null check_kfunc_mem_size_reg() should be called.
> > > > > The PTR_TO_STACK abuse is a small subset of issues
> > > > > if check_kfunc_mem_size_reg() is indeed not called.
> > > >
> > > > The condition looks ok to me.
> > > >
> > > > The condition to do check_mem_size_reg is !null || !opt.
> > > > So when it's null, and it's opt, it will be skipped.
> > > > When it's null, and it's not opt, the check will happen.
> > > > When arg is not-null, the said function is called, opt does not matter then.
> > > > So the stack slots are marked misc.
> > > >
> > > > In our case we're not passing a NULL pointer in the selftest.
> > > >
> > > > The problem occurs once we spill to that slot _after_ the call, and
> > > > then do a write through returned mem pointer.
> > > >
> > > > The final few lines from the selftest do the dirty thing, where r0 is
> > > > aliasing fp-8, and r1 = 0.
> > > >
> > > > + *(u64 *)(r10 - 8) = r8; \
> > > > + *(u64 *)(r0 + 0) = r1; \
> > > > + r8 = *(u64 *)(r10 - 8); \
> > > > + r0 = *(u64 *)(r8 + 0); \
> > > >
> > > > The write through r0 must re-mark the stack, but the verifier doesn't
> > > > know it's pointing to the stack.
> > > > push_stack was the conceptually cleaner/simpler fix, but it apparently
> > > > isn't good enough.
> > > >
> > > > Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
> > > > when r8 is spilled to fp-8 first time after the call are two options.
> > > > Both have some concerns (first, the misaligned stack access is not
> > > > caught, second PTR_TO_MEM may outlive stack frame).
> > >
> > > Reading the description of the problem my first instinct was to have
> > > stack slots with associated obj_ref_id for such cases and when
> > > something writes into that stack slot, invalidate everything with that
> > > obj_ref_id. So that's probably what you mean by invalidating
> > > PTR_TO_MEM, right?
> > >
> > > Not sure I understand what "PTR_TO_STACK with mem_size" (that Alexei
> > > mentioned in another email) means, though, so hard to compare.
> > >
> >
> > Invalidation is certainly one option. The one Alexei mentioned was
> > where we discussed bounding how much data can be read through the
> > PTR_TO_STACK (similar to PTR_TO_MEM), and mark r0 as PTR_TO_STACK.
> > This ends up combining the constraints of both types of pointers (it
> > may as well be called PTR_TO_STACK_OR_MEM) without forking paths.
>
> Yeah, PTR_TO_STACK_OR_MEM would be more precise. But how does that
> help with this problem? Not sure I follow the idea of the solution
> (but I can wait for patches to be posted).

The reason for push_stack was to ensure writes through the returned
pointer take effect on stack state.
By keeping it PTR_TO_STACK, we get that behavior.
However, in the other path of this patch, the verifier would verify
the pointer as PTR_TO_MEM, with a bounded mem_size.
Thus it would not allow writing beyond a certain size.
If we simply set r0 to PTR_TO_STACK now, it would possibly allow going
beyond the size that was passed to kfunc.
E.g. say buffer was fp-24 and size was 8, we can also modify fp-8
through it and not just fp-16.
Adding an additional mem_size field and checking it (when set) for
PTR_TO_STACK allows us to ensure we cannot write beyond 8 bytes
through r0=fp-24.

I hope this is clearer.

>
> >
> [...]
Andrii Nakryiko Feb. 21, 2025, 1:02 a.m. UTC | #11
On Thu, Feb 20, 2025 at 4:37 PM Kumar Kartikeya Dwivedi
<memxor@gmail.com> wrote:
>
> On Fri, 21 Feb 2025 at 01:27, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> >
> > On Thu, Feb 20, 2025 at 7:41 AM Kumar Kartikeya Dwivedi
> > <memxor@gmail.com> wrote:
> > >
> > > On Thu, 20 Feb 2025 at 01:13, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Wed, Feb 19, 2025 at 10:10 AM Kumar Kartikeya Dwivedi
> > > > <memxor@gmail.com> wrote:
> > > > >
> > > > > On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
> > > > > <alexei.starovoitov@gmail.com> wrote:
> > > > > >
> > > > > > On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> > > > > > <memxor@gmail.com> wrote:
> > > > > > >
> > > > > > > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > > > > > > to the underlying packet (if the requested slice is linear), or copy out
> > > > > > > the data to the buffer passed into the kfunc. The verifier performs
> > > > > > > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > > > > > > certain size (passed into the kfunc), and ensures reads and writes are
> > > > > > > within bounds.
> > > > > >
> > > > > > sounds like
> > > > > > check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> > > > > > check_helper_mem_access()
> > > > > >    case PTR_TO_STACK:
> > > > > >       check_stack_range_initialized()
> > > > > >          clobber = true
> > > > > >              if (clobber) {
> > > > > >                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
> > > > > >
> > > > > > is somehow broken?
> > > > > >
> > > > > > ohh. It might be:
> > > > > > || !is_kfunc_arg_optional(meta->btf, buff_arg)
> > > > > >
> > > > > > This bit is wrong then.
> > > > > > When arg is not-null check_kfunc_mem_size_reg() should be called.
> > > > > > The PTR_TO_STACK abuse is a small subset of issues
> > > > > > if check_kfunc_mem_size_reg() is indeed not called.
> > > > >
> > > > > The condition looks ok to me.
> > > > >
> > > > > The condition to do check_mem_size_reg is !null || !opt.
> > > > > So when it's null, and it's opt, it will be skipped.
> > > > > When it's null, and it's not opt, the check will happen.
> > > > > When arg is not-null, the said function is called, opt does not matter then.
> > > > > So the stack slots are marked misc.
> > > > >
> > > > > In our case we're not passing a NULL pointer in the selftest.
> > > > >
> > > > > The problem occurs once we spill to that slot _after_ the call, and
> > > > > then do a write through returned mem pointer.
> > > > >
> > > > > The final few lines from the selftest do the dirty thing, where r0 is
> > > > > aliasing fp-8, and r1 = 0.
> > > > >
> > > > > + *(u64 *)(r10 - 8) = r8; \
> > > > > + *(u64 *)(r0 + 0) = r1; \
> > > > > + r8 = *(u64 *)(r10 - 8); \
> > > > > + r0 = *(u64 *)(r8 + 0); \
> > > > >
> > > > > The write through r0 must re-mark the stack, but the verifier doesn't
> > > > > know it's pointing to the stack.
> > > > > push_stack was the conceptually cleaner/simpler fix, but it apparently
> > > > > isn't good enough.
> > > > >
> > > > > Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
> > > > > when r8 is spilled to fp-8 first time after the call are two options.
> > > > > Both have some concerns (first, the misaligned stack access is not
> > > > > caught, second PTR_TO_MEM may outlive stack frame).
> > > >
> > > > Reading the description of the problem my first instinct was to have
> > > > stack slots with associated obj_ref_id for such cases and when
> > > > something writes into that stack slot, invalidate everything with that
> > > > obj_ref_id. So that's probably what you mean by invalidating
> > > > PTR_TO_MEM, right?
> > > >
> > > > Not sure I understand what "PTR_TO_STACK with mem_size" (that Alexei
> > > > mentioned in another email) means, though, so hard to compare.
> > > >
> > >
> > > Invalidation is certainly one option. The one Alexei mentioned was
> > > where we discussed bounding how much data can be read through the
> > > PTR_TO_STACK (similar to PTR_TO_MEM), and mark r0 as PTR_TO_STACK.
> > > This ends up combining the constraints of both types of pointers (it
> > > may as well be called PTR_TO_STACK_OR_MEM) without forking paths.
> >
> > Yeah, PTR_TO_STACK_OR_MEM would be more precise. But how does that
> > help with this problem? Not sure I follow the idea of the solution
> > (but I can wait for patches to be posted).
>
> The reason for push_stack was to ensure writes through the returned
> pointer take effect on stack state.
> By keeping it PTR_TO_STACK, we get that behavior.
> However, in the other path of this patch, the verifier would verify
> the pointer as PTR_TO_MEM, with a bounded mem_size.
> Thus it would not allow writing beyond a certain size.
> If we simply set r0 to PTR_TO_STACK now, it would possibly allow going
> beyond the size that was passed to kfunc.
> E.g. say buffer was fp-24 and size was 8, we can also modify fp-8
> through it and not just fp-16.
> Adding an additional mem_size field and checking it (when set) for
> PTR_TO_STACK allows us to ensure we cannot write beyond 8 bytes
> through r0=fp-24.

yeah, this part is clear, of course, but I was actually more worried
about handling the following situation:


struct bpf_dynptr dptr;
void *slice;
union {
    char buf[32];
    void *map_value;
} blah;

bpf_dynptr_from_skb(..., &dptr);

slice = bpf_dynptr_slice_rdwr(&dptr, 0, &blah.buf, sizeof(blah.buf));
if (!slice) return 0; /* whatever */

/* now slice points to blah.{buf,map_value}, right? */

map_value = bpf_map_lookup_elem(&some_map, ...);
if (!map_value) return 0;

/* we shouldn't allow working with slice at this point */
*(u64 *)slice = 0xdeadbeef; /* overwrite map_value */

*(u64 *)map_value = 123; /* BOOM */


Would this PTR_TO_STACK_OR_MEM approach handle this? If so, can you
briefly walk me through how? Thanks!

>
> I hope this is clearer.
>
> >
> > >
> > [...]
Kumar Kartikeya Dwivedi Feb. 21, 2025, 1:13 a.m. UTC | #12
On Fri, 21 Feb 2025 at 02:02, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
>
> On Thu, Feb 20, 2025 at 4:37 PM Kumar Kartikeya Dwivedi
> <memxor@gmail.com> wrote:
> >
> > On Fri, 21 Feb 2025 at 01:27, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Thu, Feb 20, 2025 at 7:41 AM Kumar Kartikeya Dwivedi
> > > <memxor@gmail.com> wrote:
> > > >
> > > > On Thu, 20 Feb 2025 at 01:13, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Wed, Feb 19, 2025 at 10:10 AM Kumar Kartikeya Dwivedi
> > > > > <memxor@gmail.com> wrote:
> > > > > >
> > > > > > On Wed, 19 Feb 2025 at 18:41, Alexei Starovoitov
> > > > > > <alexei.starovoitov@gmail.com> wrote:
> > > > > > >
> > > > > > > On Wed, Feb 19, 2025 at 4:51 AM Kumar Kartikeya Dwivedi
> > > > > > > <memxor@gmail.com> wrote:
> > > > > > > >
> > > > > > > > For the bpf_dynptr_slice_rdwr kfunc, the verifier may return a pointer
> > > > > > > > to the underlying packet (if the requested slice is linear), or copy out
> > > > > > > > the data to the buffer passed into the kfunc. The verifier performs
> > > > > > > > symbolic execution assuming the returned value is a PTR_TO_MEM of a
> > > > > > > > certain size (passed into the kfunc), and ensures reads and writes are
> > > > > > > > within bounds.
> > > > > > >
> > > > > > > sounds like
> > > > > > > check_kfunc_mem_size_reg() -> check_mem_size_reg() ->
> > > > > > > check_helper_mem_access()
> > > > > > >    case PTR_TO_STACK:
> > > > > > >       check_stack_range_initialized()
> > > > > > >          clobber = true
> > > > > > >              if (clobber) {
> > > > > > >                   __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
> > > > > > >
> > > > > > > is somehow broken?
> > > > > > >
> > > > > > > ohh. It might be:
> > > > > > > || !is_kfunc_arg_optional(meta->btf, buff_arg)
> > > > > > >
> > > > > > > This bit is wrong then.
> > > > > > > When arg is not-null check_kfunc_mem_size_reg() should be called.
> > > > > > > The PTR_TO_STACK abuse is a small subset of issues
> > > > > > > if check_kfunc_mem_size_reg() is indeed not called.
> > > > > >
> > > > > > The condition looks ok to me.
> > > > > >
> > > > > > The condition to do check_mem_size_reg is !null || !opt.
> > > > > > So when it's null, and it's opt, it will be skipped.
> > > > > > When it's null, and it's not opt, the check will happen.
> > > > > > When arg is not-null, the said function is called, opt does not matter then.
> > > > > > So the stack slots are marked misc.
> > > > > >
> > > > > > In our case we're not passing a NULL pointer in the selftest.
> > > > > >
> > > > > > The problem occurs once we spill to that slot _after_ the call, and
> > > > > > then do a write through returned mem pointer.
> > > > > >
> > > > > > The final few lines from the selftest do the dirty thing, where r0 is
> > > > > > aliasing fp-8, and r1 = 0.
> > > > > >
> > > > > > + *(u64 *)(r10 - 8) = r8; \
> > > > > > + *(u64 *)(r0 + 0) = r1; \
> > > > > > + r8 = *(u64 *)(r10 - 8); \
> > > > > > + r0 = *(u64 *)(r8 + 0); \
> > > > > >
> > > > > > The write through r0 must re-mark the stack, but the verifier doesn't
> > > > > > know it's pointing to the stack.
> > > > > > push_stack was the conceptually cleaner/simpler fix, but it apparently
> > > > > > isn't good enough.
> > > > > >
> > > > > > Remarking the stack on write to PTR_TO_MEM, or invalidating PTR_TO_MEM
> > > > > > when r8 is spilled to fp-8 first time after the call are two options.
> > > > > > Both have some concerns (first, the misaligned stack access is not
> > > > > > caught, second PTR_TO_MEM may outlive stack frame).
> > > > >
> > > > > Reading the description of the problem my first instinct was to have
> > > > > stack slots with associated obj_ref_id for such cases and when
> > > > > something writes into that stack slot, invalidate everything with that
> > > > > obj_ref_id. So that's probably what you mean by invalidating
> > > > > PTR_TO_MEM, right?
> > > > >
> > > > > Not sure I understand what "PTR_TO_STACK with mem_size" (that Alexei
> > > > > mentioned in another email) means, though, so hard to compare.
> > > > >
> > > >
> > > > Invalidation is certainly one option. The one Alexei mentioned was
> > > > where we discussed bounding how much data can be read through the
> > > > PTR_TO_STACK (similar to PTR_TO_MEM), and mark r0 as PTR_TO_STACK.
> > > > This ends up combining the constraints of both types of pointers (it
> > > > may as well be called PTR_TO_STACK_OR_MEM) without forking paths.
> > >
> > > Yeah, PTR_TO_STACK_OR_MEM would be more precise. But how does that
> > > help with this problem? Not sure I follow the idea of the solution
> > > (but I can wait for patches to be posted).
> >
> > The reason for push_stack was to ensure writes through the returned
> > pointer take effect on stack state.
> > By keeping it PTR_TO_STACK, we get that behavior.
> > However, in the other path of this patch, the verifier would verify
> > the pointer as PTR_TO_MEM, with a bounded mem_size.
> > Thus it would not allow writing beyond a certain size.
> > If we simply set r0 to PTR_TO_STACK now, it would possibly allow going
> > beyond the size that was passed to kfunc.
> > E.g. say buffer was fp-24 and size was 8, we can also modify fp-8
> > through it and not just fp-16.
> > Adding an additional mem_size field and checking it (when set) for
> > PTR_TO_STACK allows us to ensure we cannot write beyond 8 bytes
> > through r0=fp-24.
>
> yeah, this part is clear, of course, but I was actually more worried
> about handling the following situation:
>

For simplicity, let's just forget about stack_or_mem, and assume stack
pointer with this extra mem_size.
I haven't written the code yet but the idea would be to limit writing
to [fp-N, fp-N+mem_size) through it.
A screening check before we pass through to the normal stack access
handling code.

>
> struct bpf_dynptr dptr;
> void *slice;
> union {
>     char buf[32];
>     void *map_value;
> } blah;
>
> bpf_dynptr_from_skb(..., &dptr);

At this point we mark dptr.

>
> slice = bpf_dynptr_slice_rdwr(&dptr, 0, &blah.buf, sizeof(blah.buf));
> if (!slice) return 0; /* whatever */
>

slice is PTR_TO_STACK w/ mem_size = 32.

We mark the bytes for buf as STACK_MISC at this point.

> /* now slice points to blah.{buf,map_value}, right? */
>
> map_value = bpf_map_lookup_elem(&some_map, ...);
> if (!map_value) return 0;

The 8-byte slot for map_value in these 32-bytes becomes map_value.
Rest remains misc.

>
> /* we shouldn't allow working with slice at this point */
> *(u64 *)slice = 0xdeadbeef; /* overwrite map_value */

And because slice is PTR_TO_STACK, we will notice the overwrite of map
value with scalar.

>
> *(u64 *)map_value = 123; /* BOOM */
>

So this would fail as deref of scalar.

Let me know if I missed something you were trying to point out.
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e57b7c949860..ad57144f044c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -329,6 +329,12 @@  struct bpf_kfunc_call_arg_meta {
 	struct {
 		struct btf_field *field;
 	} arg_rbtree_root;
+	struct {
+		u32 frameno;
+		s32 off;
+		struct tnum var_off;
+		bool found;
+	} arg_stack;
 	struct {
 		enum bpf_dynptr_type type;
 		u32 id;
@@ -7287,6 +7293,7 @@  static int check_stack_access_within_bounds(
 		min_off = (s64)reg->var_off.value + off;
 		max_off = min_off + access_size;
 	} else {
+
 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
@@ -13017,6 +13024,22 @@  static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				meta->arg_constant.value = size_reg->var_off.value;
 			}
 
+			/* We need to simulate a path where return value is the
+			 * stack buffer passed into the kfunc, therefore store
+			 * its metadata here.
+			 */
+			if (buff_reg->type == PTR_TO_STACK &&
+			    meta->func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
+				if (meta->arg_stack.found) {
+					verbose(env, "verifier internal error: only one stack argument permitted\n");
+					return -EFAULT;
+				}
+				meta->arg_stack.found = true;
+				meta->arg_stack.frameno = buff_reg->frameno;
+				meta->arg_stack.off = buff_reg->off;
+				meta->arg_stack.var_off = buff_reg->var_off;
+			}
+
 			/* Skip next '__sz' or '__szk' argument */
 			i++;
 			break;
@@ -13598,6 +13621,34 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			return err;
 	}
 
+	/* Push a state for exploration where the returned buffer is pointing to
+	 * the stack buffer passed into bpf_dynptr_slice_rdwr, otherwise we
+	 * cannot see writes to the stack solely through marking it as
+	 * PTR_TO_MEM. We don't do the same for bpf_dynptr_slice, because the
+	 * returned pointer is MEM_RDONLY, hence cannot be used to write to the
+	 * stack.
+	 */
+	if (!insn->off && meta.arg_stack.found &&
+	    insn->imm == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
+		struct bpf_verifier_state *branch;
+		struct bpf_reg_state *regs, *r0;
+
+		branch = push_stack(env, env->insn_idx + 1, env->insn_idx, false);
+		if (!branch) {
+			verbose(env, "failed to push state to explore stack buffer in r0\n");
+			return -ENOMEM;
+		}
+
+		regs = branch->frame[branch->curframe]->regs;
+		r0 = &regs[BPF_REG_0];
+
+		r0->type = PTR_TO_STACK;
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		r0->frameno = meta.arg_stack.frameno;
+		r0->off = meta.arg_stack.off;
+		r0->var_off = meta.arg_stack.var_off;
+	}
+
 	return 0;
 }