diff mbox series

[bpf-next,v4,07/10] bpf: Support calling non-tailcall bpf prog

Message ID 20241010175628.1898648-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support private stack for bpf progs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 fail Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 206 this patch: 206
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 8 maintainers not CCed: song@kernel.org haoluo@google.com john.fastabend@gmail.com sdf@fomichev.me martin.lau@linux.dev kpsingh@kernel.org eddyz87@gmail.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 257 this patch: 257
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 6963 this patch: 6964
netdev/checkpatch warning CHECK: Lines should not end with a '(' WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 16 this patch: 16
netdev/source_inline success Was 0 now: 0

Commit Message

Yonghong Song Oct. 10, 2024, 5:56 p.m. UTC
A kfunc bpf_prog_call() is introduced such that it can call another bpf
prog within a bpf prog. It has the same parameters as bpf_tail_call()
but acts like a normal function call.

But bpf_prog_call() could recurse to the caller prog itself. So if a bpf
prog calls bpf_prog_call(), that bpf prog will use private stacks with
maximum recursion level 4. The 4 level recursion should work for most
cases.

bpf_prog_call() cannot be used if tail_call exists in the same prog
since tail_call does not use private stack. If both prog_call and
tail_call in the same prog, verification will fail.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/core.c     |  7 +++++--
 kernel/bpf/helpers.c  | 20 ++++++++++++++++++++
 kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++----
 4 files changed, 53 insertions(+), 6 deletions(-)

Comments

Alexei Starovoitov Oct. 10, 2024, 8:28 p.m. UTC | #1
On Thu, Oct 10, 2024 at 10:56 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> A kfunc bpf_prog_call() is introduced such that it can call another bpf
> prog within a bpf prog. It has the same parameters as bpf_tail_call()
> but acts like a normal function call.
>
> But bpf_prog_call() could recurse to the caller prog itself. So if a bpf
> prog calls bpf_prog_call(), that bpf prog will use private stacks with
> maximum recursion level 4. The 4 level recursion should work for most
> cases.
>
> bpf_prog_call() cannot be used if tail_call exists in the same prog
> since tail_call does not use private stack. If both prog_call and
> tail_call in the same prog, verification will fail.

..

> +__bpf_kfunc int bpf_prog_call(void *ctx, struct bpf_map *p__map, u32 index)
> +{
> +       struct bpf_array *array;
> +       struct bpf_prog *prog;
> +
> +       if (p__map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
> +               return -EINVAL;
> +
> +       array = container_of(p__map, struct bpf_array, map);
> +       if (unlikely(index >= array->map.max_entries))
> +               return -E2BIG;
> +
> +       prog = READ_ONCE(array->ptrs[index]);
> +       if (!prog)
> +               return -ENOENT;
> +
> +       return bpf_prog_run(prog, ctx);
> +}

bpf_tail_call() was a hack during the early days,
since I didn't know any better :(
I really don't want to use that as a pattern.
prog life time rules, tail call cnt, prog_array_compatible, etc.
caused plenty of pain. Don't want to see a repeat.

Progs that need to call another prog can use freplace mechanism already.
There is no need for bpf_prog_call.

Let's get priv_stack in shape first (the first ~6 patches).

pw-bot: cr
Yonghong Song Oct. 11, 2024, 4:12 a.m. UTC | #2
On 10/10/24 1:28 PM, Alexei Starovoitov wrote:
> On Thu, Oct 10, 2024 at 10:56 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>> A kfunc bpf_prog_call() is introduced such that it can call another bpf
>> prog within a bpf prog. It has the same parameters as bpf_tail_call()
>> but acts like a normal function call.
>>
>> But bpf_prog_call() could recurse to the caller prog itself. So if a bpf
>> prog calls bpf_prog_call(), that bpf prog will use private stacks with
>> maximum recursion level 4. The 4 level recursion should work for most
>> cases.
>>
>> bpf_prog_call() cannot be used if tail_call exists in the same prog
>> since tail_call does not use private stack. If both prog_call and
>> tail_call in the same prog, verification will fail.
> ..
>
>> +__bpf_kfunc int bpf_prog_call(void *ctx, struct bpf_map *p__map, u32 index)
>> +{
>> +       struct bpf_array *array;
>> +       struct bpf_prog *prog;
>> +
>> +       if (p__map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
>> +               return -EINVAL;
>> +
>> +       array = container_of(p__map, struct bpf_array, map);
>> +       if (unlikely(index >= array->map.max_entries))
>> +               return -E2BIG;
>> +
>> +       prog = READ_ONCE(array->ptrs[index]);
>> +       if (!prog)
>> +               return -ENOENT;
>> +
>> +       return bpf_prog_run(prog, ctx);
>> +}
> bpf_tail_call() was a hack during the early days,
> since I didn't know any better :(
> I really don't want to use that as a pattern.
> prog life time rules, tail call cnt, prog_array_compatible, etc.
> caused plenty of pain. Don't want to see a repeat.
>
> Progs that need to call another prog can use freplace mechanism already.
> There is no need for bpf_prog_call.

In this case, it could call itself.

>
> Let's get priv_stack in shape first (the first ~6 patches).

I am okay to focus on the first 6 patches. But I would like to get
Tejun's comments about what is the best way to support hierarchical
bpf based scheduler.

>
> pw-bot: cr
Tejun Heo Oct. 15, 2024, 9:18 p.m. UTC | #3
Hello,

On Thu, Oct 10, 2024 at 09:12:19PM -0700, Yonghong Song wrote:
> > Let's get priv_stack in shape first (the first ~6 patches).
> 
> I am okay to focus on the first 6 patches. But I would like to get
> Tejun's comments about what is the best way to support hierarchical
> bpf based scheduler.

There isn't a concrete design yet, so it's difficult to say anything
definitive but I was thinking more along the line of providing sched_ext
kfunc helpers that perform nesting calls rather than each BPF program
directly calling nested BPF programs.

For example, let's say the scheduler hierarchy looks like this:

  R + A + AA
    |   + AB
    + B

Let's say AB has a task waking up to it and is calling ops.select_cpu():

 ops.select_cpu()
 {
	if (does AB already have the perfect CPU sitting around)
		direct dispatch and return the CPU;
	if (scx_bpf_get_cpus(describe the perfect CPU))
		direct dispatch and return the CPU;
	if (is there any eligible idle CPU that AB is holding)
		direct dispatch and return the CPU;
	if (scx_bpf_get_cpus(any eligible CPUs))
		direct dispatch and return the CPU;
	// no idle CPU, proceed to enqueue
	return prev_cpu;
 }

Note that the scheduler at AB doesn't have any knowledge of what's up the
tree. It's just describing what it wants through the kfunc which is then
responsible for nesting calls up the hierarhcy. Up a layer, this can be
implemented like:

 ops.get_cpus(CPUs description)
 {
	if (has any CPUs matching the description)
		claim and return the CPUs;
	modify CPUs description to enforce e.g. cache sharing policy;
	and possibly to request more CPUs for batching;
	if (scx_bpf_get_cpus(CPUs description)) {
		store extra CPUs;
		claim and return some of the CPUs;
	}
	return no CPUs available;
 }

This way, the schedulers at different layers are isolated and each only has
to express what it wants.

Thanks.
Alexei Starovoitov Oct. 15, 2024, 9:35 p.m. UTC | #4
On Tue, Oct 15, 2024 at 2:18 PM Tejun Heo <tj@kernel.org> wrote:
>
> Hello,
>
> On Thu, Oct 10, 2024 at 09:12:19PM -0700, Yonghong Song wrote:
> > > Let's get priv_stack in shape first (the first ~6 patches).
> >
> > I am okay to focus on the first 6 patches. But I would like to get
> > Tejun's comments about what is the best way to support hierarchical
> > bpf based scheduler.
>
> There isn't a concrete design yet, so it's difficult to say anything
> definitive but I was thinking more along the line of providing sched_ext
> kfunc helpers that perform nesting calls rather than each BPF program
> directly calling nested BPF programs.
>
> For example, let's say the scheduler hierarchy looks like this:
>
>   R + A + AA
>     |   + AB
>     + B
>
> Let's say AB has a task waking up to it and is calling ops.select_cpu():
>
>  ops.select_cpu()
>  {
>         if (does AB already have the perfect CPU sitting around)
>                 direct dispatch and return the CPU;
>         if (scx_bpf_get_cpus(describe the perfect CPU))
>                 direct dispatch and return the CPU;
>         if (is there any eligible idle CPU that AB is holding)
>                 direct dispatch and return the CPU;
>         if (scx_bpf_get_cpus(any eligible CPUs))
>                 direct dispatch and return the CPU;
>         // no idle CPU, proceed to enqueue
>         return prev_cpu;
>  }
>
> Note that the scheduler at AB doesn't have any knowledge of what's up the
> tree. It's just describing what it wants through the kfunc which is then
> responsible for nesting calls up the hierarhcy. Up a layer, this can be
> implemented like:
>
>  ops.get_cpus(CPUs description)
>  {
>         if (has any CPUs matching the description)
>                 claim and return the CPUs;
>         modify CPUs description to enforce e.g. cache sharing policy;
>         and possibly to request more CPUs for batching;
>         if (scx_bpf_get_cpus(CPUs description)) {
>                 store extra CPUs;
>                 claim and return some of the CPUs;
>         }
>         return no CPUs available;
>  }
>
> This way, the schedulers at different layers are isolated and each only has
> to express what it wants.

What we've been discussing is something like this:

ops.get_cpus -> bpf prog A -> kfunc

where kfunc will call one of struct_ops callback
which may call bpf prog A again, since it's the only one attached
to this get_cpus callback.
So
ops.get_cpus -> bpf prog A -> kfunc -> ops.get_cpus -> bpf prog A.

If kfunc calls a different struct_ops callback it will call
a different bpf prog B and it will have its own private stack.

During struct_ops registration one of bpf_verifier_ops() callbacks
like bpf_scx_check_member (or a new callback) will indicate
back to bpf trampoline that limited recursion for a specific
ops.get_cpus is allowed.
Then bpf trampoline's bpf_trampoline_enter() selector will
pick an entry helper that allows limited recursion.

Currently bpf trampoline doesn't check recursion for struct_ops progs,
so it needs to be tightened to allow limited recursion
and to let bpf jit prologue know which part of priv stack to use.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f22ddb423fd0..952cb398eb30 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1493,6 +1493,7 @@  struct bpf_prog_aux {
 	bool exception_cb;
 	bool exception_boundary;
 	bool priv_stack_eligible;
+	bool has_prog_call;
 	struct bpf_arena *arena;
 	/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
 	const struct btf_type *attach_func_proto;
@@ -1929,6 +1930,7 @@  struct bpf_array {
 
 #define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
 #define MAX_TAIL_CALL_CNT 33
+#define BPF_MAX_PRIV_STACK_NEST_LEVEL	4
 
 /* Maximum number of loops for bpf_loop and bpf_iter_num.
  * It's enum to expose it (and thus make it discoverable) through BTF.
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index f79d951a061f..0d2c97f63ecf 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2426,10 +2426,13 @@  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 				fp->aux->priv_stack_mode = NO_PRIV_STACK;
 			} else {
 				void __percpu *priv_stack_ptr;
+				int nest_level = 1;
 
+				if (fp->aux->has_prog_call)
+					nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
 				fp->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
-				priv_stack_ptr =
-					__alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
+				priv_stack_ptr = __alloc_percpu_gfp(
+					fp->aux->stack_depth * nest_level, 8, GFP_KERNEL);
 				if (!priv_stack_ptr) {
 					*err = -ENOMEM;
 					return fp;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4053f279ed4c..9cc880dc213e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2749,6 +2749,25 @@  __bpf_kfunc void bpf_rcu_read_unlock(void)
 	rcu_read_unlock();
 }
 
+__bpf_kfunc int bpf_prog_call(void *ctx, struct bpf_map *p__map, u32 index)
+{
+	struct bpf_array *array;
+	struct bpf_prog *prog;
+
+	if (p__map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+		return -EINVAL;
+
+	array = container_of(p__map, struct bpf_array, map);
+	if (unlikely(index >= array->map.max_entries))
+		return -E2BIG;
+
+	prog = READ_ONCE(array->ptrs[index]);
+	if (!prog)
+		return -ENOENT;
+
+	return bpf_prog_run(prog, ctx);
+}
+
 struct bpf_throw_ctx {
 	struct bpf_prog_aux *aux;
 	u64 sp;
@@ -3035,6 +3054,7 @@  BTF_ID_FLAGS(func, bpf_task_get_cgroup1, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_throw)
+BTF_ID_FLAGS(func, bpf_prog_call)
 BTF_KFUNCS_END(generic_btf_ids)
 
 static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 46b0c277c6a8..e3d9820618a1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5986,6 +5986,9 @@  static int check_ptr_alignment(struct bpf_verifier_env *env,
 
 static bool bpf_enable_private_stack(struct bpf_prog *prog)
 {
+	if (prog->aux->has_prog_call)
+		return true;
+
 	if (!bpf_jit_supports_private_stack())
 		return false;
 
@@ -6092,7 +6095,9 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 			return -EACCES;
 		}
 
-		if (!priv_stack_eligible && depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE) {
+		if (!priv_stack_eligible &&
+		    (depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE ||
+		     env->prog->aux->has_prog_call)) {
 			subprog[orig_idx].priv_stack_eligible = true;
 			env->prog->aux->priv_stack_eligible = priv_stack_eligible = true;
 		}
@@ -6181,8 +6186,13 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 			}
 			subprog[ret_prog[j]].tail_call_reachable = true;
 		}
-	if (!check_priv_stack && subprog[0].tail_call_reachable)
+	if (!check_priv_stack && subprog[0].tail_call_reachable) {
+		if (env->prog->aux->has_prog_call) {
+			verbose(env, "cannot do prog call and tail call in the same prog\n");
+			return -EINVAL;
+		}
 		env->prog->aux->tail_call_reachable = true;
+	}
 
 	/* end of for() loop means the last insn of the 'subprog'
 	 * was reached. Doesn't matter whether it was JA or EXIT
@@ -11322,6 +11332,7 @@  enum special_kfunc_type {
 	KF_bpf_preempt_enable,
 	KF_bpf_iter_css_task_new,
 	KF_bpf_session_cookie,
+	KF_bpf_prog_call,
 };
 
 BTF_SET_START(special_kfunc_set)
@@ -11387,6 +11398,7 @@  BTF_ID(func, bpf_session_cookie)
 #else
 BTF_ID_UNUSED
 #endif
+BTF_ID(func, bpf_prog_call)
 
 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 {
@@ -11433,6 +11445,11 @@  get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
 		return KF_ARG_PTR_TO_CTX;
 
+	if (meta->func_id == special_kfunc_list[KF_bpf_prog_call] && argno == 0) {
+		env->prog->aux->has_prog_call = true;
+		return KF_ARG_PTR_TO_CTX;
+	}
+
 	/* In this function, we verify the kfunc's BTF as per the argument type,
 	 * leaving the rest of the verification with respect to the register
 	 * type to our caller. When a set of conditions hold in the BTF type of
@@ -20009,6 +20026,7 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 	struct bpf_insn *insn;
 	void *old_bpf_func;
 	int err, num_exentries;
+	int nest_level = 1;
 
 	if (env->subprog_cnt <= 1)
 		return 0;
@@ -20099,9 +20117,13 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 			} else if (!subtree_stack_depth) {
 				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
 			} else {
+				if (env->prog->aux->has_prog_call) {
+					func[i]->aux->has_prog_call = true;
+					nest_level = BPF_MAX_PRIV_STACK_NEST_LEVEL;
+				}
 				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
-				priv_stack_ptr =
-					__alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
+				priv_stack_ptr = __alloc_percpu_gfp(
+					subtree_stack_depth * nest_level, 8, GFP_KERNEL);
 				if (!priv_stack_ptr) {
 					err = -ENOMEM;
 					goto out_free;