diff mbox series

[bpf-next] selftests/bpf: Fix flaky test verif_scale_strobemeta_subprogs

Message ID 20240206063010.1352503-1-yonghong.song@linux.dev (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next] selftests/bpf: Fix flaky test verif_scale_strobemeta_subprogs | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1060 this patch: 1060
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 17 maintainers not CCed: john.fastabend@gmail.com nathan@kernel.org morbo@google.com aou@eecs.berkeley.edu kpsingh@kernel.org llvm@lists.linux.dev jolsa@kernel.org linux-riscv@lists.infradead.org martin.lau@linux.dev song@kernel.org palmer@dabbelt.com sdf@google.com justinstitt@google.com eddyz87@gmail.com paul.walmsley@sifive.com ndesaulniers@google.com haoluo@google.com
netdev/build_clang success Errors and warnings before: 1066 this patch: 1066
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1077 this patch: 1077
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 37 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Yonghong Song Feb. 6, 2024, 6:30 a.m. UTC
With latest llvm19, I hit the following selftest failures with

  $ ./test_progs -j
  libbpf: prog 'on_event': BPF program load failed: Permission denied
  libbpf: prog 'on_event': -- BEGIN PROG LOAD LOG --
  combined stack size of 4 calls is 544. Too large
  verification time 1344153 usec
  stack depth 24+440+0+32
  processed 51008 insns (limit 1000000) max_states_per_insn 19 total_states 1467 peak_states 303 mark_read 146
  -- END PROG LOAD LOG --
  libbpf: prog 'on_event': failed to load: -13
  libbpf: failed to load object 'strobemeta_subprogs.bpf.o'
  scale_test:FAIL:expect_success unexpected error: -13 (errno 13)
  #498     verif_scale_strobemeta_subprogs:FAIL

The verifier complains too big of the combined stack size (544 bytes) which
exceeds the maximum stack limit 512. This is a regression from llvm19 ([1]).

In the above error log, the original stack depth is 24+440+0+32.
To satisfy interpreter's need, in verifier the stack depth is adjusted to
32+448+32+32=544 which exceeds 512, hence the error. The same adjusted
stack size is also used for jit case.

But the jitted codes could use smaller stack size.

  $ egrep -r stack_depth | grep round_up
  arm64/net/bpf_jit_comp.c:       ctx->stack_size = round_up(prog->aux->stack_depth, 16);
  loongarch/net/bpf_jit.c:        bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
  powerpc/net/bpf_jit_comp.c:     cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
  riscv/net/bpf_jit_comp32.c:             round_up(ctx->prog->aux->stack_depth, STACK_ALIGN);
  riscv/net/bpf_jit_comp64.c:     bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
  s390/net/bpf_jit_comp.c:        u32 stack_depth = round_up(fp->aux->stack_depth, 8);
  sparc/net/bpf_jit_comp_64.c:            stack_needed += round_up(stack_depth, 16);
  x86/net/bpf_jit_comp.c:         EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
  x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8);
  x86/net/bpf_jit_comp.c:                     round_up(stack_depth, 8));
  x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8);
  x86/net/bpf_jit_comp.c:         EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));

In the above, STACK_ALIGN in riscv/net/bpf_jit_comp32.c is defined as 16.
So stack is aligned in either 8 or 16, x86/s390 having 8-byte stack alignment and
the rest having 16-byte alignment.

This patch calculates total stack depth based on 16-byte alignment if jit is requested.
For the above failing case, the new stack size will be 32+448+0+32=512 and no verification
failure. llvm19 regression will be discussed separately in llvm upstream.

  [1] https://lore.kernel.org/bpf/32bde0f0-1881-46c9-931a-673be566c61d@linux.dev/

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/verifier.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

Comments

Andrii Nakryiko Feb. 8, 2024, 12:10 a.m. UTC | #1
On Mon, Feb 5, 2024 at 10:30 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> With latest llvm19, I hit the following selftest failures with
>
>   $ ./test_progs -j
>   libbpf: prog 'on_event': BPF program load failed: Permission denied
>   libbpf: prog 'on_event': -- BEGIN PROG LOAD LOG --
>   combined stack size of 4 calls is 544. Too large
>   verification time 1344153 usec
>   stack depth 24+440+0+32
>   processed 51008 insns (limit 1000000) max_states_per_insn 19 total_states 1467 peak_states 303 mark_read 146
>   -- END PROG LOAD LOG --
>   libbpf: prog 'on_event': failed to load: -13
>   libbpf: failed to load object 'strobemeta_subprogs.bpf.o'
>   scale_test:FAIL:expect_success unexpected error: -13 (errno 13)
>   #498     verif_scale_strobemeta_subprogs:FAIL
>
> The verifier complains too big of the combined stack size (544 bytes) which
> exceeds the maximum stack limit 512. This is a regression from llvm19 ([1]).
>
> In the above error log, the original stack depth is 24+440+0+32.
> To satisfy interpreter's need, in verifier the stack depth is adjusted to
> 32+448+32+32=544 which exceeds 512, hence the error. The same adjusted
> stack size is also used for jit case.
>
> But the jitted codes could use smaller stack size.
>
>   $ egrep -r stack_depth | grep round_up
>   arm64/net/bpf_jit_comp.c:       ctx->stack_size = round_up(prog->aux->stack_depth, 16);
>   loongarch/net/bpf_jit.c:        bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
>   powerpc/net/bpf_jit_comp.c:     cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>   riscv/net/bpf_jit_comp32.c:             round_up(ctx->prog->aux->stack_depth, STACK_ALIGN);
>   riscv/net/bpf_jit_comp64.c:     bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
>   s390/net/bpf_jit_comp.c:        u32 stack_depth = round_up(fp->aux->stack_depth, 8);
>   sparc/net/bpf_jit_comp_64.c:            stack_needed += round_up(stack_depth, 16);
>   x86/net/bpf_jit_comp.c:         EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
>   x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8);
>   x86/net/bpf_jit_comp.c:                     round_up(stack_depth, 8));
>   x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8);
>   x86/net/bpf_jit_comp.c:         EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
>
> In the above, STACK_ALIGN in riscv/net/bpf_jit_comp32.c is defined as 16.
> So stack is aligned in either 8 or 16, x86/s390 having 8-byte stack alignment and
> the rest having 16-byte alignment.
>
> This patch calculates total stack depth based on 16-byte alignment if jit is requested.
> For the above failing case, the new stack size will be 32+448+0+32=512 and no verification
> failure. llvm19 regression will be discussed separately in llvm upstream.
>
>   [1] https://lore.kernel.org/bpf/32bde0f0-1881-46c9-931a-673be566c61d@linux.dev/
>
> Suggested-by: Alexei Starovoitov <ast@kernel.org>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---

Seems like a few selftests have to be adjusted, current BPF CI is unhappy ([0])

  [0] https://github.com/kernel-patches/bpf/actions/runs/7795686155/job/21259132248

pw-bot: cr

>  kernel/bpf/verifier.c | 19 ++++++++++++++-----
>  1 file changed, 14 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index ddaf09db1175..10e33d49ca21 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -5812,6 +5812,17 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
>                                            strict);
>  }
>
> +static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
> +{
> +       if (env->prog->jit_requested)
> +               return round_up(stack_depth, 16);
> +
> +       /* round up to 32-bytes, since this is granularity
> +        * of interpreter stack size
> +        */
> +       return round_up(max_t(u32, stack_depth, 1), 32);
> +}
> +
>  /* starting from main bpf function walk all instructions of the function
>   * and recursively walk all callees that given function can call.
>   * Ignore jump and exit insns.
> @@ -5855,10 +5866,8 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
>                         depth);
>                 return -EACCES;
>         }
> -       /* round up to 32-bytes, since this is granularity
> -        * of interpreter stack size
> -        */
> -       depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
> +
> +       depth += round_up_stack_depth(env, subprog[idx].stack_depth);
>         if (depth > MAX_BPF_STACK) {
>                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
>                         frame + 1, depth);
> @@ -5952,7 +5961,7 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
>          */
>         if (frame == 0)
>                 return 0;
> -       depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
> +       depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
>         frame--;
>         i = ret_insn[frame];
>         idx = ret_prog[frame];
> --
> 2.34.1
>
Yonghong Song Feb. 8, 2024, 1:43 a.m. UTC | #2
On 2/7/24 4:10 PM, Andrii Nakryiko wrote:
> On Mon, Feb 5, 2024 at 10:30 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> With latest llvm19, I hit the following selftest failures with
>>
>>    $ ./test_progs -j
>>    libbpf: prog 'on_event': BPF program load failed: Permission denied
>>    libbpf: prog 'on_event': -- BEGIN PROG LOAD LOG --
>>    combined stack size of 4 calls is 544. Too large
>>    verification time 1344153 usec
>>    stack depth 24+440+0+32
>>    processed 51008 insns (limit 1000000) max_states_per_insn 19 total_states 1467 peak_states 303 mark_read 146
>>    -- END PROG LOAD LOG --
>>    libbpf: prog 'on_event': failed to load: -13
>>    libbpf: failed to load object 'strobemeta_subprogs.bpf.o'
>>    scale_test:FAIL:expect_success unexpected error: -13 (errno 13)
>>    #498     verif_scale_strobemeta_subprogs:FAIL
>>
>> The verifier complains too big of the combined stack size (544 bytes) which
>> exceeds the maximum stack limit 512. This is a regression from llvm19 ([1]).
>>
>> In the above error log, the original stack depth is 24+440+0+32.
>> To satisfy interpreter's need, in verifier the stack depth is adjusted to
>> 32+448+32+32=544 which exceeds 512, hence the error. The same adjusted
>> stack size is also used for jit case.
>>
>> But the jitted codes could use smaller stack size.
>>
>>    $ egrep -r stack_depth | grep round_up
>>    arm64/net/bpf_jit_comp.c:       ctx->stack_size = round_up(prog->aux->stack_depth, 16);
>>    loongarch/net/bpf_jit.c:        bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
>>    powerpc/net/bpf_jit_comp.c:     cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>>    riscv/net/bpf_jit_comp32.c:             round_up(ctx->prog->aux->stack_depth, STACK_ALIGN);
>>    riscv/net/bpf_jit_comp64.c:     bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
>>    s390/net/bpf_jit_comp.c:        u32 stack_depth = round_up(fp->aux->stack_depth, 8);
>>    sparc/net/bpf_jit_comp_64.c:            stack_needed += round_up(stack_depth, 16);
>>    x86/net/bpf_jit_comp.c:         EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
>>    x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8);
>>    x86/net/bpf_jit_comp.c:                     round_up(stack_depth, 8));
>>    x86/net/bpf_jit_comp.c: int tcc_off = -4 - round_up(stack_depth, 8);
>>    x86/net/bpf_jit_comp.c:         EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
>>
>> In the above, STACK_ALIGN in riscv/net/bpf_jit_comp32.c is defined as 16.
>> So stack is aligned in either 8 or 16, x86/s390 having 8-byte stack alignment and
>> the rest having 16-byte alignment.
>>
>> This patch calculates total stack depth based on 16-byte alignment if jit is requested.
>> For the above failing case, the new stack size will be 32+448+0+32=512 and no verification
>> failure. llvm19 regression will be discussed separately in llvm upstream.
>>
>>    [1] https://lore.kernel.org/bpf/32bde0f0-1881-46c9-931a-673be566c61d@linux.dev/
>>
>> Suggested-by: Alexei Starovoitov <ast@kernel.org>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
> Seems like a few selftests have to be adjusted, current BPF CI is unhappy ([0])
>
>    [0] https://github.com/kernel-patches/bpf/actions/runs/7795686155/job/21259132248
>
> pw-bot: cr

Thanks for reminder! Will take a look soon.

>
>>   kernel/bpf/verifier.c | 19 ++++++++++++++-----
>>   1 file changed, 14 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index ddaf09db1175..10e33d49ca21 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -5812,6 +5812,17 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
>>                                             strict);
>>   }
>>
>> +static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
>> +{
>> +       if (env->prog->jit_requested)
>> +               return round_up(stack_depth, 16);
>> +
>> +       /* round up to 32-bytes, since this is granularity
>> +        * of interpreter stack size
>> +        */
>> +       return round_up(max_t(u32, stack_depth, 1), 32);
>> +}
>> +
[...]
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ddaf09db1175..10e33d49ca21 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5812,6 +5812,17 @@  static int check_ptr_alignment(struct bpf_verifier_env *env,
 					   strict);
 }
 
+static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
+{
+	if (env->prog->jit_requested)
+		return round_up(stack_depth, 16);
+
+	/* round up to 32-bytes, since this is granularity
+	 * of interpreter stack size
+	 */
+	return round_up(max_t(u32, stack_depth, 1), 32);
+}
+
 /* starting from main bpf function walk all instructions of the function
  * and recursively walk all callees that given function can call.
  * Ignore jump and exit insns.
@@ -5855,10 +5866,8 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 			depth);
 		return -EACCES;
 	}
-	/* round up to 32-bytes, since this is granularity
-	 * of interpreter stack size
-	 */
-	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
+
+	depth += round_up_stack_depth(env, subprog[idx].stack_depth);
 	if (depth > MAX_BPF_STACK) {
 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
 			frame + 1, depth);
@@ -5952,7 +5961,7 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 	 */
 	if (frame == 0)
 		return 0;
-	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
+	depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
 	frame--;
 	i = ret_insn[frame];
 	idx = ret_prog[frame];