diff mbox series

[bpf-next,v8,3/9] bpf: Check potential private stack recursion for progs with async callback

Message ID 20241101031006.2678685-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support private stack for bpf progs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 8 maintainers not CCed: sdf@fomichev.me kpsingh@kernel.org john.fastabend@gmail.com eddyz87@gmail.com martin.lau@linux.dev song@kernel.org haoluo@google.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 14 this patch: 14
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-10 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-7 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release

Commit Message

Yonghong Song Nov. 1, 2024, 3:10 a.m. UTC
In previous patch, tracing progs are enabled for private stack since
recursion checking ensures there exists no nested same bpf prog run on
the same cpu.

But it is still possible for nested bpf subprog run on the same cpu
if the same subprog is called in both main prog and async callback,
or in different async callbacks. For example,
  main_prog
   bpf_timer_set_callback(timer, timer_cb);
   call sub1
  sub1
   ...
  time_cb
   call sub1

In the above case, nested subprog run for sub1 is possible with one in
process context and the other in softirq context. If this is the case,
the verifier will disable private stack for this bpf prog.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 5 deletions(-)

Comments

Alexei Starovoitov Nov. 1, 2024, 7:55 p.m. UTC | #1
On Thu, Oct 31, 2024 at 8:12 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> In previous patch, tracing progs are enabled for private stack since
> recursion checking ensures there exists no nested same bpf prog run on
> the same cpu.
>
> But it is still possible for nested bpf subprog run on the same cpu
> if the same subprog is called in both main prog and async callback,
> or in different async callbacks. For example,
>   main_prog
>    bpf_timer_set_callback(timer, timer_cb);
>    call sub1
>   sub1
>    ...
>   time_cb
>    call sub1
>
> In the above case, nested subprog run for sub1 is possible with one in
> process context and the other in softirq context. If this is the case,
> the verifier will disable private stack for this bpf prog.
>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index d3f4cbab97bc..596afd29f088 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6070,7 +6070,8 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
>   */
>  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>                                          int *subtree_depth, int *depth_frame,
> -                                        int priv_stack_supported)
> +                                        int priv_stack_supported,
> +                                        char *subprog_visited)
>  {
>         struct bpf_subprog_info *subprog = env->subprog_info;
>         struct bpf_insn *insn = env->prog->insnsi;
> @@ -6120,8 +6121,12 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>                                         idx, subprog_depth);
>                                 return -EACCES;
>                         }
> -                       if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
> +                       if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) {
>                                 subprog[idx].use_priv_stack = true;
> +                               subprog_visited[idx] = 1;
> +                       }
> +               } else {
> +                       subprog_visited[idx] = 1;
>                 }
>         }
>  continue_func:
> @@ -6222,19 +6227,42 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>  static int check_max_stack_depth(struct bpf_verifier_env *env)
>  {
>         struct bpf_subprog_info *si = env->subprog_info;
> +       char *subprogs1 = NULL, *subprogs2 = NULL;
>         int ret, subtree_depth = 0, depth_frame;
> +       int orig_priv_stack_supported;
>         int priv_stack_supported;
>
>         priv_stack_supported = bpf_enable_priv_stack(env);
>         if (priv_stack_supported < 0)
>                 return priv_stack_supported;
>
> +       orig_priv_stack_supported = priv_stack_supported;
> +       if (orig_priv_stack_supported != NO_PRIV_STACK) {
> +               subprogs1 = kvmalloc(env->subprog_cnt * 2, __GFP_ZERO);

Just __GFP_ZERO ?!

Overall the algo is messy. Pls think of a cleaner way of checking.
Add two bool flags to bpf_subprog_info:
visited_with_priv_stack
visited_without_priv_stack
and after walking all subrpogs add another loop over subprogs
that checks for exclusivity of these flags?

Probably other algos are possible.

> +               if (!subprogs1)
> +                       priv_stack_supported = NO_PRIV_STACK;
> +               else
> +                       subprogs2 = subprogs1 + env->subprog_cnt;
> +       }
> +
>         for (int i = 0; i < env->subprog_cnt; i++) {
>                 if (!i || si[i].is_async_cb) {
>                         ret = check_max_stack_depth_subprog(env, i, &subtree_depth, &depth_frame,
> -                                                           priv_stack_supported);
> +                                                           priv_stack_supported, subprogs2);
>                         if (ret < 0)
> -                               return ret;
> +                               goto out;
> +
> +                       if (priv_stack_supported != NO_PRIV_STACK) {
> +                               for (int j = 0; j < env->subprog_cnt; j++) {
> +                                       if (subprogs1[j] && subprogs2[j]) {
> +                                               priv_stack_supported = NO_PRIV_STACK;
> +                                               break;
> +                                       }
> +                                       subprogs1[j] |= subprogs2[j];
> +                               }
> +                       }
> +                       if (priv_stack_supported != NO_PRIV_STACK)
> +                               memset(subprogs2, 0, env->subprog_cnt);
>                 }
>         }
>         if (priv_stack_supported == NO_PRIV_STACK) {
> @@ -6243,10 +6271,18 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
>                                 depth_frame, subtree_depth);
>                         return -EACCES;
>                 }
> +               if (orig_priv_stack_supported == PRIV_STACK_ADAPTIVE) {
> +                       for (int i = 0; i < env->subprog_cnt; i++)
> +                               si[i].use_priv_stack = false;
> +               }
>         }
>         if (si[0].use_priv_stack)
>                 env->prog->aux->use_priv_stack = true;
> -       return 0;
> +       ret = 0;
> +
> +out:
> +       kvfree(subprogs1);
> +       return ret;
>  }
>
>  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
> --
> 2.43.5
>
Yonghong Song Nov. 4, 2024, 12:08 a.m. UTC | #2
On 11/1/24 12:55 PM, Alexei Starovoitov wrote:
> On Thu, Oct 31, 2024 at 8:12 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> In previous patch, tracing progs are enabled for private stack since
>> recursion checking ensures there exists no nested same bpf prog run on
>> the same cpu.
>>
>> But it is still possible for nested bpf subprog run on the same cpu
>> if the same subprog is called in both main prog and async callback,
>> or in different async callbacks. For example,
>>    main_prog
>>     bpf_timer_set_callback(timer, timer_cb);
>>     call sub1
>>    sub1
>>     ...
>>    time_cb
>>     call sub1
>>
>> In the above case, nested subprog run for sub1 is possible with one in
>> process context and the other in softirq context. If this is the case,
>> the verifier will disable private stack for this bpf prog.
>>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   kernel/bpf/verifier.c | 46 ++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 41 insertions(+), 5 deletions(-)
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index d3f4cbab97bc..596afd29f088 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -6070,7 +6070,8 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
>>    */
>>   static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>>                                           int *subtree_depth, int *depth_frame,
>> -                                        int priv_stack_supported)
>> +                                        int priv_stack_supported,
>> +                                        char *subprog_visited)
>>   {
>>          struct bpf_subprog_info *subprog = env->subprog_info;
>>          struct bpf_insn *insn = env->prog->insnsi;
>> @@ -6120,8 +6121,12 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>>                                          idx, subprog_depth);
>>                                  return -EACCES;
>>                          }
>> -                       if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
>> +                       if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) {
>>                                  subprog[idx].use_priv_stack = true;
>> +                               subprog_visited[idx] = 1;
>> +                       }
>> +               } else {
>> +                       subprog_visited[idx] = 1;
>>                  }
>>          }
>>   continue_func:
>> @@ -6222,19 +6227,42 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
>>   static int check_max_stack_depth(struct bpf_verifier_env *env)
>>   {
>>          struct bpf_subprog_info *si = env->subprog_info;
>> +       char *subprogs1 = NULL, *subprogs2 = NULL;
>>          int ret, subtree_depth = 0, depth_frame;
>> +       int orig_priv_stack_supported;
>>          int priv_stack_supported;
>>
>>          priv_stack_supported = bpf_enable_priv_stack(env);
>>          if (priv_stack_supported < 0)
>>                  return priv_stack_supported;
>>
>> +       orig_priv_stack_supported = priv_stack_supported;
>> +       if (orig_priv_stack_supported != NO_PRIV_STACK) {
>> +               subprogs1 = kvmalloc(env->subprog_cnt * 2, __GFP_ZERO);
> Just __GFP_ZERO ?!
>
> Overall the algo is messy. Pls think of a cleaner way of checking.
> Add two bool flags to bpf_subprog_info:
> visited_with_priv_stack
> visited_without_priv_stack

Actually this is what I thought initially as well with two bool flags
in bpf_subprog_info. Later on, I think since these two bool flags are
used ONLY in this function, probably not worthwhile to add them to
the head file.

But since you suggest this, I can go to two bool flag approach.

> and after walking all subrpogs add another loop over subprogs
> that checks for exclusivity of these flags?
>
> Probably other algos are possible.
>
>> +               if (!subprogs1)
>> +                       priv_stack_supported = NO_PRIV_STACK;
>> +               else
>> +                       subprogs2 = subprogs1 + env->subprog_cnt;
>> +       }
>> +
>>          for (int i = 0; i < env->subprog_cnt; i++) {
[...]
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d3f4cbab97bc..596afd29f088 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6070,7 +6070,8 @@  static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
  */
 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 					 int *subtree_depth, int *depth_frame,
-					 int priv_stack_supported)
+					 int priv_stack_supported,
+					 char *subprog_visited)
 {
 	struct bpf_subprog_info *subprog = env->subprog_info;
 	struct bpf_insn *insn = env->prog->insnsi;
@@ -6120,8 +6121,12 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 					idx, subprog_depth);
 				return -EACCES;
 			}
-			if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
+			if (subprog_depth >= BPF_PRIV_STACK_MIN_SIZE) {
 				subprog[idx].use_priv_stack = true;
+				subprog_visited[idx] = 1;
+			}
+		} else {
+			subprog_visited[idx] = 1;
 		}
 	}
 continue_func:
@@ -6222,19 +6227,42 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 static int check_max_stack_depth(struct bpf_verifier_env *env)
 {
 	struct bpf_subprog_info *si = env->subprog_info;
+	char *subprogs1 = NULL, *subprogs2 = NULL;
 	int ret, subtree_depth = 0, depth_frame;
+	int orig_priv_stack_supported;
 	int priv_stack_supported;
 
 	priv_stack_supported = bpf_enable_priv_stack(env);
 	if (priv_stack_supported < 0)
 		return priv_stack_supported;
 
+	orig_priv_stack_supported = priv_stack_supported;
+	if (orig_priv_stack_supported != NO_PRIV_STACK) {
+		subprogs1 = kvmalloc(env->subprog_cnt * 2, __GFP_ZERO);
+		if (!subprogs1)
+			priv_stack_supported = NO_PRIV_STACK;
+		else
+			subprogs2 = subprogs1 + env->subprog_cnt;
+	}
+
 	for (int i = 0; i < env->subprog_cnt; i++) {
 		if (!i || si[i].is_async_cb) {
 			ret = check_max_stack_depth_subprog(env, i, &subtree_depth, &depth_frame,
-							    priv_stack_supported);
+							    priv_stack_supported, subprogs2);
 			if (ret < 0)
-				return ret;
+				goto out;
+
+			if (priv_stack_supported != NO_PRIV_STACK) {
+				for (int j = 0; j < env->subprog_cnt; j++) {
+					if (subprogs1[j] && subprogs2[j]) {
+						priv_stack_supported = NO_PRIV_STACK;
+						break;
+					}
+					subprogs1[j] |= subprogs2[j];
+				}
+			}
+			if (priv_stack_supported != NO_PRIV_STACK)
+				memset(subprogs2, 0, env->subprog_cnt);
 		}
 	}
 	if (priv_stack_supported == NO_PRIV_STACK) {
@@ -6243,10 +6271,18 @@  static int check_max_stack_depth(struct bpf_verifier_env *env)
 				depth_frame, subtree_depth);
 			return -EACCES;
 		}
+		if (orig_priv_stack_supported == PRIV_STACK_ADAPTIVE) {
+			for (int i = 0; i < env->subprog_cnt; i++)
+				si[i].use_priv_stack = false;
+		}
 	}
 	if (si[0].use_priv_stack)
 		env->prog->aux->use_priv_stack = true;
-	return 0;
+	ret = 0;
+
+out:
+	kvfree(subprogs1);
+	return ret;
 }
 
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON