diff mbox series

[bpf-next,3/7] bpf: states_equal() must build idmap for all function frames

Message ID 20221209135733.28851-4-eddyz87@gmail.com (mailing list archive)
State Accepted
Commit 5dd9cdbc9dec3e99b19e483767e247d15ca8cc0d
Delegated to: BPF
Headers show
Series stricter register ID checking in regsafe() | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 66 this patch: 66
netdev/cc_maintainers warning 7 maintainers not CCed: kpsingh@kernel.org haoluo@google.com song@kernel.org martin.lau@linux.dev sdf@google.com john.fastabend@gmail.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 17 this patch: 17
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 66 this patch: 66
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for aarch64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-9 success Logs for set-matrix

Commit Message

Eduard Zingerman Dec. 9, 2022, 1:57 p.m. UTC
verifier.c:states_equal() must maintain register ID mapping across all
function frames. Otherwise the following example might be erroneously
marked as safe:

main:
    fp[-24] = map_lookup_elem(...)  ; frame[0].fp[-24].id == 1
    fp[-32] = map_lookup_elem(...)  ; frame[0].fp[-32].id == 2
    r1 = &fp[-24]
    r2 = &fp[-32]
    call foo()
    r0 = 0
    exit

foo:
  0: r9 = r1
  1: r8 = r2
  2: r7 = ktime_get_ns()
  3: r6 = ktime_get_ns()
  4: if (r6 > r7) goto skip_assign
  5: r9 = r8

skip_assign:                ; <--- checkpoint
  6: r9 = *r9               ; (a) frame[1].r9.id == 2
                            ; (b) frame[1].r9.id == 1

  7: if r9 == 0 goto exit:  ; mark_ptr_or_null_regs() transfers != 0 info
                            ; for all regs sharing ID:
                            ;   (a) r9 != 0 => &frame[0].fp[-32] != 0
                            ;   (b) r9 != 0 => &frame[0].fp[-24] != 0

  8: r8 = *r8               ; (a) r8 == &frame[0].fp[-32]
                            ; (b) r8 == &frame[0].fp[-32]
  9: r0 = *r8               ; (a) safe
                            ; (b) unsafe

exit:
 10: exit

While processing call to foo() verifier considers the following
execution paths:

(a) 0-10
(b) 0-4,6-10
(There is also path 0-7,10 but it is not interesting for the issue at
 hand. (a) is verified first.)

Suppose that checkpoint is created at (6) when path (a) is verified,
next path (b) is verified and (6) is reached.

If states_equal() maintains separate 'idmap' for each frame the
mapping at (6) for frame[1] would be empty and
regsafe(r9)::check_ids() would add a pair 2->1 and return true,
which is an error.

If states_equal() maintains single 'idmap' for all frames the mapping
at (6) would be { 1->1, 2->2 } and regsafe(r9)::check_ids() would
return false when trying to add a pair 2->1.

This issue was suggested in the following discussion:
https://lore.kernel.org/bpf/CAEf4BzbFB5g4oUfyxk9rHy-PJSLQ3h8q9mV=rVoXfr_JVm8+1Q@mail.gmail.com/

Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 include/linux/bpf_verifier.h | 4 ++--
 kernel/bpf/verifier.c        | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

Comments

Andrii Nakryiko Dec. 14, 2022, 12:35 a.m. UTC | #1
On Fri, Dec 9, 2022 at 5:58 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> verifier.c:states_equal() must maintain register ID mapping across all
> function frames. Otherwise the following example might be erroneously
> marked as safe:
>
> main:
>     fp[-24] = map_lookup_elem(...)  ; frame[0].fp[-24].id == 1
>     fp[-32] = map_lookup_elem(...)  ; frame[0].fp[-32].id == 2
>     r1 = &fp[-24]
>     r2 = &fp[-32]
>     call foo()
>     r0 = 0
>     exit
>
> foo:
>   0: r9 = r1
>   1: r8 = r2
>   2: r7 = ktime_get_ns()
>   3: r6 = ktime_get_ns()
>   4: if (r6 > r7) goto skip_assign
>   5: r9 = r8
>
> skip_assign:                ; <--- checkpoint
>   6: r9 = *r9               ; (a) frame[1].r9.id == 2
>                             ; (b) frame[1].r9.id == 1
>
>   7: if r9 == 0 goto exit:  ; mark_ptr_or_null_regs() transfers != 0 info
>                             ; for all regs sharing ID:
>                             ;   (a) r9 != 0 => &frame[0].fp[-32] != 0
>                             ;   (b) r9 != 0 => &frame[0].fp[-24] != 0
>
>   8: r8 = *r8               ; (a) r8 == &frame[0].fp[-32]
>                             ; (b) r8 == &frame[0].fp[-32]
>   9: r0 = *r8               ; (a) safe
>                             ; (b) unsafe
>
> exit:
>  10: exit
>
> While processing call to foo() verifier considers the following
> execution paths:
>
> (a) 0-10
> (b) 0-4,6-10
> (There is also path 0-7,10 but it is not interesting for the issue at
>  hand. (a) is verified first.)
>
> Suppose that checkpoint is created at (6) when path (a) is verified,
> next path (b) is verified and (6) is reached.
>
> If states_equal() maintains separate 'idmap' for each frame the
> mapping at (6) for frame[1] would be empty and
> regsafe(r9)::check_ids() would add a pair 2->1 and return true,
> which is an error.
>
> If states_equal() maintains single 'idmap' for all frames the mapping
> at (6) would be { 1->1, 2->2 } and regsafe(r9)::check_ids() would
> return false when trying to add a pair 2->1.
>
> This issue was suggested in the following discussion:
> https://lore.kernel.org/bpf/CAEf4BzbFB5g4oUfyxk9rHy-PJSLQ3h8q9mV=rVoXfr_JVm8+1Q@mail.gmail.com/
>
> Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> ---
>  include/linux/bpf_verifier.h | 4 ++--
>  kernel/bpf/verifier.c        | 3 ++-
>  2 files changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index 70d06a99f0b8..c1f769515beb 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h
> @@ -273,9 +273,9 @@ struct bpf_id_pair {
>         u32 cur;
>  };
>
> -/* Maximum number of register states that can exist at once */
> -#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
>  #define MAX_CALL_FRAMES 8
> +/* Maximum number of register states that can exist at once */
> +#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)

this is overly pessimistic, the total number of stack slots doesn't
change no matter how many call frames we have, it would be better to
define this as:

#define BPF_ID_MAP_SIZE (MAX_BPF_REG * MAX_CALL_FRAMES + MAX_BPF_STACK
/ BPF_REG_SIZE)

Unless I missed something.



>  struct bpf_verifier_state {
>         /* call stack tracking */
>         struct bpf_func_state *frame[MAX_CALL_FRAMES];
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index d05c5d0344c6..9188370a7ebe 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -13122,7 +13122,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
>  {
>         int i;
>
> -       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
>         for (i = 0; i < MAX_BPF_REG; i++)
>                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
>                              env->idmap_scratch))
> @@ -13146,6 +13145,8 @@ static bool states_equal(struct bpf_verifier_env *env,
>         if (old->curframe != cur->curframe)
>                 return false;
>
> +       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
> +
>         /* Verification state from speculative execution simulation
>          * must never prune a non-speculative execution one.
>          */
> --
> 2.34.1
>
Eduard Zingerman Dec. 14, 2022, 3:33 p.m. UTC | #2
On Tue, 2022-12-13 at 16:35 -0800, Andrii Nakryiko wrote:
> On Fri, Dec 9, 2022 at 5:58 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > 
> > verifier.c:states_equal() must maintain register ID mapping across all
> > function frames. Otherwise the following example might be erroneously
> > marked as safe:
> > 
> > main:
> >     fp[-24] = map_lookup_elem(...)  ; frame[0].fp[-24].id == 1
> >     fp[-32] = map_lookup_elem(...)  ; frame[0].fp[-32].id == 2
> >     r1 = &fp[-24]
> >     r2 = &fp[-32]
> >     call foo()
> >     r0 = 0
> >     exit
> > 
> > foo:
> >   0: r9 = r1
> >   1: r8 = r2
> >   2: r7 = ktime_get_ns()
> >   3: r6 = ktime_get_ns()
> >   4: if (r6 > r7) goto skip_assign
> >   5: r9 = r8
> > 
> > skip_assign:                ; <--- checkpoint
> >   6: r9 = *r9               ; (a) frame[1].r9.id == 2
> >                             ; (b) frame[1].r9.id == 1
> > 
> >   7: if r9 == 0 goto exit:  ; mark_ptr_or_null_regs() transfers != 0 info
> >                             ; for all regs sharing ID:
> >                             ;   (a) r9 != 0 => &frame[0].fp[-32] != 0
> >                             ;   (b) r9 != 0 => &frame[0].fp[-24] != 0
> > 
> >   8: r8 = *r8               ; (a) r8 == &frame[0].fp[-32]
> >                             ; (b) r8 == &frame[0].fp[-32]
> >   9: r0 = *r8               ; (a) safe
> >                             ; (b) unsafe
> > 
> > exit:
> >  10: exit
> > 
> > While processing call to foo() verifier considers the following
> > execution paths:
> > 
> > (a) 0-10
> > (b) 0-4,6-10
> > (There is also path 0-7,10 but it is not interesting for the issue at
> >  hand. (a) is verified first.)
> > 
> > Suppose that checkpoint is created at (6) when path (a) is verified,
> > next path (b) is verified and (6) is reached.
> > 
> > If states_equal() maintains separate 'idmap' for each frame the
> > mapping at (6) for frame[1] would be empty and
> > regsafe(r9)::check_ids() would add a pair 2->1 and return true,
> > which is an error.
> > 
> > If states_equal() maintains single 'idmap' for all frames the mapping
> > at (6) would be { 1->1, 2->2 } and regsafe(r9)::check_ids() would
> > return false when trying to add a pair 2->1.
> > 
> > This issue was suggested in the following discussion:
> > https://lore.kernel.org/bpf/CAEf4BzbFB5g4oUfyxk9rHy-PJSLQ3h8q9mV=rVoXfr_JVm8+1Q@mail.gmail.com/
> > 
> > Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > ---
> >  include/linux/bpf_verifier.h | 4 ++--
> >  kernel/bpf/verifier.c        | 3 ++-
> >  2 files changed, 4 insertions(+), 3 deletions(-)
> > 
> > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > index 70d06a99f0b8..c1f769515beb 100644
> > --- a/include/linux/bpf_verifier.h
> > +++ b/include/linux/bpf_verifier.h
> > @@ -273,9 +273,9 @@ struct bpf_id_pair {
> >         u32 cur;
> >  };
> > 
> > -/* Maximum number of register states that can exist at once */
> > -#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
> >  #define MAX_CALL_FRAMES 8
> > +/* Maximum number of register states that can exist at once */
> > +#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
> 
> this is overly pessimistic, the total number of stack slots doesn't
> change no matter how many call frames we have, it would be better to
> define this as:
> 
> #define BPF_ID_MAP_SIZE (MAX_BPF_REG * MAX_CALL_FRAMES + MAX_BPF_STACK
> / BPF_REG_SIZE)
> 
> Unless I missed something.

Current bpf_check() mechanics looks as follows:
- do_check_{subprogs,main}() (indirectly):
  - when a pseudo-function is called the call is processed by
    __check_func_call(), it allocates callee's struct bpf_func_state
    using kzalloc() and does not update ->stack and ->allocated_stack fields;
  - when a stack write is processed by check_mem_access():
    - check_stack_access_within_bounds() verifies that offset is within
      0..-MAX_BPF_STACK;
    - check_stack_write_{fixed,var}_off() calls grow_stack_state() which uses
      realloc_array() to increase ->stack as necessary;
    - update_stack_depth() is used to increase
      env->subprog_info[...].stack_depth as appropriate;
- check_max_stack_depth() verifies that cumulative stack depth does not
  exceed MAX_BPF_STACK using env->subprog_info[...].stack_depth values.

This means that during do_check_*() we can have MAX_CALL_FRAMES functions
each with a stack of size MAX_BPF_STACK. The following example could be
used to verify the above assumptions:

{
	"check_max_depth",
	.insns = {
		BPF_ST_MEM(BPF_DW, BPF_REG_FP, -512, 0),
		BPF_CALL_REL(2),
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),

		BPF_ST_MEM(BPF_DW, BPF_REG_FP, -512, 0),
		BPF_MOV64_IMM(BPF_REG_0, 0),
		BPF_EXIT_INSN(),
	},
	.result = REJECT,
},

Here is the verifier log that shows that two frames both of size
MAX_BPF_STACK slots were present during verification:

# ./test_verifier -vv 1030
#1030/p check_max_depth FAIL
Unexpected error message!
	EXP: (null)
	RES:
func#0 @0
func#1 @4
0: R1=ctx(off=0,imm=0) R10=fp0
0: (7a) *(u64 *)(r10 -512) = 0        ; R10=fp0 fp-512_w=mmmmmmmm
1: (85) call pc+2
caller:
 R10=fp0 fp-512_w=mmmmmmmm
callee:
 frame1: R1=ctx(off=0,imm=0) R10=fp0
4: (7a) *(u64 *)(r10 -512) = 0        ; frame1: R10=fp0 fp-512_w=mmmmmmmm
5: (b7) r0 = 0                        ; frame1: R0_w=0
6: (95) exit
returning from callee:
 frame1: R0_w=0 R1=ctx(off=0,imm=0) R10=fp0 fp-512_w=mmmmmmmm
to caller at 2:
 R0_w=0 R10=fp0 fp-512_w=mmmmmmmm

from 6 to 2: R0_w=0 R10=fp0 fp-512_w=mmmmmmmm
2: (b7) r0 = 0                        ; R0_w=0
3: (95) exit
combined stack size of 2 calls is 1024. Too large
verification time 541 usec
stack depth 512+512

> 
> 
> 
> >  struct bpf_verifier_state {
> >         /* call stack tracking */
> >         struct bpf_func_state *frame[MAX_CALL_FRAMES];
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index d05c5d0344c6..9188370a7ebe 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -13122,7 +13122,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
> >  {
> >         int i;
> > 
> > -       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
> >         for (i = 0; i < MAX_BPF_REG; i++)
> >                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
> >                              env->idmap_scratch))
> > @@ -13146,6 +13145,8 @@ static bool states_equal(struct bpf_verifier_env *env,
> >         if (old->curframe != cur->curframe)
> >                 return false;
> > 
> > +       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
> > +
> >         /* Verification state from speculative execution simulation
> >          * must never prune a non-speculative execution one.
> >          */
> > --
> > 2.34.1
> >
Andrii Nakryiko Dec. 14, 2022, 5:24 p.m. UTC | #3
On Wed, Dec 14, 2022 at 7:33 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
>
> On Tue, 2022-12-13 at 16:35 -0800, Andrii Nakryiko wrote:
> > On Fri, Dec 9, 2022 at 5:58 AM Eduard Zingerman <eddyz87@gmail.com> wrote:
> > >
> > > verifier.c:states_equal() must maintain register ID mapping across all
> > > function frames. Otherwise the following example might be erroneously
> > > marked as safe:
> > >
> > > main:
> > >     fp[-24] = map_lookup_elem(...)  ; frame[0].fp[-24].id == 1
> > >     fp[-32] = map_lookup_elem(...)  ; frame[0].fp[-32].id == 2
> > >     r1 = &fp[-24]
> > >     r2 = &fp[-32]
> > >     call foo()
> > >     r0 = 0
> > >     exit
> > >
> > > foo:
> > >   0: r9 = r1
> > >   1: r8 = r2
> > >   2: r7 = ktime_get_ns()
> > >   3: r6 = ktime_get_ns()
> > >   4: if (r6 > r7) goto skip_assign
> > >   5: r9 = r8
> > >
> > > skip_assign:                ; <--- checkpoint
> > >   6: r9 = *r9               ; (a) frame[1].r9.id == 2
> > >                             ; (b) frame[1].r9.id == 1
> > >
> > >   7: if r9 == 0 goto exit:  ; mark_ptr_or_null_regs() transfers != 0 info
> > >                             ; for all regs sharing ID:
> > >                             ;   (a) r9 != 0 => &frame[0].fp[-32] != 0
> > >                             ;   (b) r9 != 0 => &frame[0].fp[-24] != 0
> > >
> > >   8: r8 = *r8               ; (a) r8 == &frame[0].fp[-32]
> > >                             ; (b) r8 == &frame[0].fp[-32]
> > >   9: r0 = *r8               ; (a) safe
> > >                             ; (b) unsafe
> > >
> > > exit:
> > >  10: exit
> > >
> > > While processing call to foo() verifier considers the following
> > > execution paths:
> > >
> > > (a) 0-10
> > > (b) 0-4,6-10
> > > (There is also path 0-7,10 but it is not interesting for the issue at
> > >  hand. (a) is verified first.)
> > >
> > > Suppose that checkpoint is created at (6) when path (a) is verified,
> > > next path (b) is verified and (6) is reached.
> > >
> > > If states_equal() maintains separate 'idmap' for each frame the
> > > mapping at (6) for frame[1] would be empty and
> > > regsafe(r9)::check_ids() would add a pair 2->1 and return true,
> > > which is an error.
> > >
> > > If states_equal() maintains single 'idmap' for all frames the mapping
> > > at (6) would be { 1->1, 2->2 } and regsafe(r9)::check_ids() would
> > > return false when trying to add a pair 2->1.
> > >
> > > This issue was suggested in the following discussion:
> > > https://lore.kernel.org/bpf/CAEf4BzbFB5g4oUfyxk9rHy-PJSLQ3h8q9mV=rVoXfr_JVm8+1Q@mail.gmail.com/
> > >
> > > Suggested-by: Andrii Nakryiko <andrii.nakryiko@gmail.com>
> > > Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
> > > ---
> > >  include/linux/bpf_verifier.h | 4 ++--
> > >  kernel/bpf/verifier.c        | 3 ++-
> > >  2 files changed, 4 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> > > index 70d06a99f0b8..c1f769515beb 100644
> > > --- a/include/linux/bpf_verifier.h
> > > +++ b/include/linux/bpf_verifier.h
> > > @@ -273,9 +273,9 @@ struct bpf_id_pair {
> > >         u32 cur;
> > >  };
> > >
> > > -/* Maximum number of register states that can exist at once */
> > > -#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
> > >  #define MAX_CALL_FRAMES 8
> > > +/* Maximum number of register states that can exist at once */
> > > +#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
> >
> > this is overly pessimistic, the total number of stack slots doesn't
> > change no matter how many call frames we have, it would be better to
> > define this as:
> >
> > #define BPF_ID_MAP_SIZE (MAX_BPF_REG * MAX_CALL_FRAMES + MAX_BPF_STACK
> > / BPF_REG_SIZE)
> >
> > Unless I missed something.
>
> Current bpf_check() mechanics looks as follows:
> - do_check_{subprogs,main}() (indirectly):
>   - when a pseudo-function is called the call is processed by
>     __check_func_call(), it allocates callee's struct bpf_func_state
>     using kzalloc() and does not update ->stack and ->allocated_stack fields;
>   - when a stack write is processed by check_mem_access():
>     - check_stack_access_within_bounds() verifies that offset is within
>       0..-MAX_BPF_STACK;
>     - check_stack_write_{fixed,var}_off() calls grow_stack_state() which uses
>       realloc_array() to increase ->stack as necessary;
>     - update_stack_depth() is used to increase
>       env->subprog_info[...].stack_depth as appropriate;
> - check_max_stack_depth() verifies that cumulative stack depth does not
>   exceed MAX_BPF_STACK using env->subprog_info[...].stack_depth values.
>
> This means that during do_check_*() we can have MAX_CALL_FRAMES functions
> each with a stack of size MAX_BPF_STACK. The following example could be
> used to verify the above assumptions:
>
> {
>         "check_max_depth",
>         .insns = {
>                 BPF_ST_MEM(BPF_DW, BPF_REG_FP, -512, 0),
>                 BPF_CALL_REL(2),
>                 BPF_MOV64_IMM(BPF_REG_0, 0),
>                 BPF_EXIT_INSN(),
>
>                 BPF_ST_MEM(BPF_DW, BPF_REG_FP, -512, 0),
>                 BPF_MOV64_IMM(BPF_REG_0, 0),
>                 BPF_EXIT_INSN(),
>         },
>         .result = REJECT,
> },
>
> Here is the verifier log that shows that two frames both of size
> MAX_BPF_STACK slots were present during verification:
>
> # ./test_verifier -vv 1030
> #1030/p check_max_depth FAIL
> Unexpected error message!
>         EXP: (null)
>         RES:
> func#0 @0
> func#1 @4
> 0: R1=ctx(off=0,imm=0) R10=fp0
> 0: (7a) *(u64 *)(r10 -512) = 0        ; R10=fp0 fp-512_w=mmmmmmmm
> 1: (85) call pc+2
> caller:
>  R10=fp0 fp-512_w=mmmmmmmm
> callee:
>  frame1: R1=ctx(off=0,imm=0) R10=fp0
> 4: (7a) *(u64 *)(r10 -512) = 0        ; frame1: R10=fp0 fp-512_w=mmmmmmmm
> 5: (b7) r0 = 0                        ; frame1: R0_w=0
> 6: (95) exit
> returning from callee:
>  frame1: R0_w=0 R1=ctx(off=0,imm=0) R10=fp0 fp-512_w=mmmmmmmm
> to caller at 2:
>  R0_w=0 R10=fp0 fp-512_w=mmmmmmmm
>
> from 6 to 2: R0_w=0 R10=fp0 fp-512_w=mmmmmmmm
> 2: (b7) r0 = 0                        ; R0_w=0
> 3: (95) exit
> combined stack size of 2 calls is 1024. Too large
> verification time 541 usec
> stack depth 512+512
>

It's all true, but I'm not clear on what point you are trying to make.
BPF program did get rejected after using so much stack trace, right?
So it should be ok to reduce idmap size.

It's the difference between maintaining 152 (which is already quite
overpesimisstic for any real application) vs 600 entries (1216 bytes
vs 4800 bytes). On each state equality check call. We can probably
just drop that WARN_ON_ONCE(1) in check_ids and reduce the size of
idmap, no more changes, right?

> >
> >
> >
> > >  struct bpf_verifier_state {
> > >         /* call stack tracking */
> > >         struct bpf_func_state *frame[MAX_CALL_FRAMES];
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index d05c5d0344c6..9188370a7ebe 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -13122,7 +13122,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
> > >  {
> > >         int i;
> > >
> > > -       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
> > >         for (i = 0; i < MAX_BPF_REG; i++)
> > >                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
> > >                              env->idmap_scratch))
> > > @@ -13146,6 +13145,8 @@ static bool states_equal(struct bpf_verifier_env *env,
> > >         if (old->curframe != cur->curframe)
> > >                 return false;
> > >
> > > +       memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
> > > +
> > >         /* Verification state from speculative execution simulation
> > >          * must never prune a non-speculative execution one.
> > >          */
> > > --
> > > 2.34.1
> > >
>
diff mbox series

Patch

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 70d06a99f0b8..c1f769515beb 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -273,9 +273,9 @@  struct bpf_id_pair {
 	u32 cur;
 };
 
-/* Maximum number of register states that can exist at once */
-#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
 #define MAX_CALL_FRAMES 8
+/* Maximum number of register states that can exist at once */
+#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d05c5d0344c6..9188370a7ebe 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13122,7 +13122,6 @@  static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat
 {
 	int i;
 
-	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
 			     env->idmap_scratch))
@@ -13146,6 +13145,8 @@  static bool states_equal(struct bpf_verifier_env *env,
 	if (old->curframe != cur->curframe)
 		return false;
 
+	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
+
 	/* Verification state from speculative execution simulation
 	 * must never prune a non-speculative execution one.
 	 */