diff mbox series

[bpf-next,v9,05/10] bpf: Allocate private stack for eligible main prog or subprogs

Message ID 20241104193521.3243984-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support private stack for bpf progs | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 204 this patch: 204
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 16 maintainers not CCed: mingo@redhat.com x86@kernel.org hpa@zytor.com eddyz87@gmail.com bp@alien8.de dsahern@kernel.org netdev@vger.kernel.org john.fastabend@gmail.com haoluo@google.com dave.hansen@linux.intel.com kpsingh@kernel.org martin.lau@linux.dev tglx@linutronix.de sdf@fomichev.me song@kernel.org jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 253 this patch: 253
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6965 this patch: 6965
netdev/checkpatch warning WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: line length of 87 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0

Commit Message

Yonghong Song Nov. 4, 2024, 7:35 p.m. UTC
For any main prog or subprogs, allocate private stack space if requested
by subprog info or main prog. The alignment for private stack is 16
since maximum stack alignment is 16 for bpf-enabled archs.

If jit failed, the allocated private stack will be freed in the same
function where the allocation happens. If jit succeeded, e.g., for
x86_64 arch, the allocated private stack is freed in arch specific
implementation of bpf_jit_free().

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 arch/x86/net/bpf_jit_comp.c |  1 +
 include/linux/bpf.h         |  1 +
 kernel/bpf/core.c           | 19 ++++++++++++++++---
 kernel/bpf/verifier.c       | 13 +++++++++++++
 4 files changed, 31 insertions(+), 3 deletions(-)

Comments

Alexei Starovoitov Nov. 5, 2024, 1:38 a.m. UTC | #1
On Mon, Nov 4, 2024 at 11:38 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> For any main prog or subprogs, allocate private stack space if requested
> by subprog info or main prog. The alignment for private stack is 16
> since maximum stack alignment is 16 for bpf-enabled archs.
>
> If jit failed, the allocated private stack will be freed in the same
> function where the allocation happens. If jit succeeded, e.g., for
> x86_64 arch, the allocated private stack is freed in arch specific
> implementation of bpf_jit_free().
>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  arch/x86/net/bpf_jit_comp.c |  1 +
>  include/linux/bpf.h         |  1 +
>  kernel/bpf/core.c           | 19 ++++++++++++++++---
>  kernel/bpf/verifier.c       | 13 +++++++++++++
>  4 files changed, 31 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 06b080b61aa5..59d294b8dd67 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -3544,6 +3544,7 @@ void bpf_jit_free(struct bpf_prog *prog)
>                 prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
>                 hdr = bpf_jit_binary_pack_hdr(prog);
>                 bpf_jit_binary_pack_free(hdr, NULL);
> +               free_percpu(prog->aux->priv_stack_ptr);
>                 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
>         }
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 8db3c5d7404b..8a3ea7440a4a 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1507,6 +1507,7 @@ struct bpf_prog_aux {
>         u32 max_rdwr_access;
>         struct btf *attach_btf;
>         const struct bpf_ctx_arg_aux *ctx_arg_info;
> +       void __percpu *priv_stack_ptr;
>         struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
>         struct bpf_prog *dst_prog;
>         struct bpf_trampoline *dst_trampoline;
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 14d9288441f2..f7a3e93c41e1 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2396,6 +2396,7 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
>   */
>  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>  {
> +       void __percpu *priv_stack_ptr = NULL;
>         /* In case of BPF to BPF calls, verifier did all the prep
>          * work with regards to JITing, etc.
>          */
> @@ -2421,11 +2422,23 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>                 if (*err)
>                         return fp;
>
> +               if (fp->aux->use_priv_stack && fp->aux->stack_depth) {
> +                       priv_stack_ptr = __alloc_percpu_gfp(fp->aux->stack_depth, 16, GFP_KERNEL);
> +                       if (!priv_stack_ptr) {
> +                               *err = -ENOMEM;
> +                               return fp;
> +                       }
> +                       fp->aux->priv_stack_ptr = priv_stack_ptr;
> +               }
> +
>                 fp = bpf_int_jit_compile(fp);
>                 bpf_prog_jit_attempt_done(fp);
> -               if (!fp->jited && jit_needed) {
> -                       *err = -ENOTSUPP;
> -                       return fp;
> +               if (!fp->jited) {
> +                       free_percpu(priv_stack_ptr);
> +                       if (jit_needed) {
> +                               *err = -ENOTSUPP;
> +                               return fp;
> +                       }
>                 }
>         } else {
>                 *err = bpf_prog_offload_compile(fp);
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index e01b3f0fd314..03ae76d57076 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20073,6 +20073,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>  {
>         struct bpf_prog *prog = env->prog, **func, *tmp;
>         int i, j, subprog_start, subprog_end = 0, len, subprog;
> +       void __percpu *priv_stack_ptr;
>         struct bpf_map *map_ptr;
>         struct bpf_insn *insn;
>         void *old_bpf_func;
> @@ -20169,6 +20170,17 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>
>                 func[i]->aux->name[0] = 'F';
>                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
> +
> +               if (env->subprog_info[i].use_priv_stack && func[i]->aux->stack_depth) {
> +                       priv_stack_ptr = __alloc_percpu_gfp(func[i]->aux->stack_depth, 16,
> +                                                           GFP_KERNEL);
> +                       if (!priv_stack_ptr) {
> +                               err = -ENOMEM;
> +                               goto out_free;
> +                       }
> +                       func[i]->aux->priv_stack_ptr = priv_stack_ptr;
> +               }
> +
>                 func[i]->jit_requested = 1;
>                 func[i]->blinding_requested = prog->blinding_requested;
>                 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
> @@ -20201,6 +20213,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>                         func[i]->aux->exception_boundary = env->seen_exception;
>                 func[i] = bpf_int_jit_compile(func[i]);
>                 if (!func[i]->jited) {
> +                       free_percpu(func[i]->aux->priv_stack_ptr);
>                         err = -ENOTSUPP;
>                         goto out_free;
>                 }

Looks correct from leaks pov, but this is so hard to follow.
I still don't like this imbalanced alloc/free.
Either both need to be done by core or both by JIT.

And JIT is probably better, since in:
_alloc_percpu_gfp(func[i]->aux->stack_depth, 16

16 alignment is x86 specific.
Yonghong Song Nov. 5, 2024, 3:07 a.m. UTC | #2
On 11/4/24 5:38 PM, Alexei Starovoitov wrote:
> On Mon, Nov 4, 2024 at 11:38 AM Yonghong Song <yonghong.song@linux.dev> wrote:
>> For any main prog or subprogs, allocate private stack space if requested
>> by subprog info or main prog. The alignment for private stack is 16
>> since maximum stack alignment is 16 for bpf-enabled archs.
>>
>> If jit failed, the allocated private stack will be freed in the same
>> function where the allocation happens. If jit succeeded, e.g., for
>> x86_64 arch, the allocated private stack is freed in arch specific
>> implementation of bpf_jit_free().
>>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   arch/x86/net/bpf_jit_comp.c |  1 +
>>   include/linux/bpf.h         |  1 +
>>   kernel/bpf/core.c           | 19 ++++++++++++++++---
>>   kernel/bpf/verifier.c       | 13 +++++++++++++
>>   4 files changed, 31 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>> index 06b080b61aa5..59d294b8dd67 100644
>> --- a/arch/x86/net/bpf_jit_comp.c
>> +++ b/arch/x86/net/bpf_jit_comp.c
>> @@ -3544,6 +3544,7 @@ void bpf_jit_free(struct bpf_prog *prog)
>>                  prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
>>                  hdr = bpf_jit_binary_pack_hdr(prog);
>>                  bpf_jit_binary_pack_free(hdr, NULL);
>> +               free_percpu(prog->aux->priv_stack_ptr);
>>                  WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
>>          }
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 8db3c5d7404b..8a3ea7440a4a 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1507,6 +1507,7 @@ struct bpf_prog_aux {
>>          u32 max_rdwr_access;
>>          struct btf *attach_btf;
>>          const struct bpf_ctx_arg_aux *ctx_arg_info;
>> +       void __percpu *priv_stack_ptr;
>>          struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
>>          struct bpf_prog *dst_prog;
>>          struct bpf_trampoline *dst_trampoline;
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index 14d9288441f2..f7a3e93c41e1 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -2396,6 +2396,7 @@ static void bpf_prog_select_func(struct bpf_prog *fp)
>>    */
>>   struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>>   {
>> +       void __percpu *priv_stack_ptr = NULL;
>>          /* In case of BPF to BPF calls, verifier did all the prep
>>           * work with regards to JITing, etc.
>>           */
>> @@ -2421,11 +2422,23 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>>                  if (*err)
>>                          return fp;
>>
>> +               if (fp->aux->use_priv_stack && fp->aux->stack_depth) {
>> +                       priv_stack_ptr = __alloc_percpu_gfp(fp->aux->stack_depth, 16, GFP_KERNEL);
>> +                       if (!priv_stack_ptr) {
>> +                               *err = -ENOMEM;
>> +                               return fp;
>> +                       }
>> +                       fp->aux->priv_stack_ptr = priv_stack_ptr;
>> +               }
>> +
>>                  fp = bpf_int_jit_compile(fp);
>>                  bpf_prog_jit_attempt_done(fp);
>> -               if (!fp->jited && jit_needed) {
>> -                       *err = -ENOTSUPP;
>> -                       return fp;
>> +               if (!fp->jited) {
>> +                       free_percpu(priv_stack_ptr);
>> +                       if (jit_needed) {
>> +                               *err = -ENOTSUPP;
>> +                               return fp;
>> +                       }
>>                  }
>>          } else {
>>                  *err = bpf_prog_offload_compile(fp);
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index e01b3f0fd314..03ae76d57076 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -20073,6 +20073,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>>   {
>>          struct bpf_prog *prog = env->prog, **func, *tmp;
>>          int i, j, subprog_start, subprog_end = 0, len, subprog;
>> +       void __percpu *priv_stack_ptr;
>>          struct bpf_map *map_ptr;
>>          struct bpf_insn *insn;
>>          void *old_bpf_func;
>> @@ -20169,6 +20170,17 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>>
>>                  func[i]->aux->name[0] = 'F';
>>                  func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
>> +
>> +               if (env->subprog_info[i].use_priv_stack && func[i]->aux->stack_depth) {
>> +                       priv_stack_ptr = __alloc_percpu_gfp(func[i]->aux->stack_depth, 16,
>> +                                                           GFP_KERNEL);
>> +                       if (!priv_stack_ptr) {
>> +                               err = -ENOMEM;
>> +                               goto out_free;
>> +                       }
>> +                       func[i]->aux->priv_stack_ptr = priv_stack_ptr;
>> +               }
>> +
>>                  func[i]->jit_requested = 1;
>>                  func[i]->blinding_requested = prog->blinding_requested;
>>                  func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
>> @@ -20201,6 +20213,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>>                          func[i]->aux->exception_boundary = env->seen_exception;
>>                  func[i] = bpf_int_jit_compile(func[i]);
>>                  if (!func[i]->jited) {
>> +                       free_percpu(func[i]->aux->priv_stack_ptr);
>>                          err = -ENOTSUPP;
>>                          goto out_free;
>>                  }
> Looks correct from leaks pov, but this is so hard to follow.
> I still don't like this imbalanced alloc/free.
> Either both need to be done by core or both by JIT.
>
> And JIT is probably better, since in:
> _alloc_percpu_gfp(func[i]->aux->stack_depth, 16
>
> 16 alignment is x86 specific.

Agree. I use alignment 16 to cover all architectures. for x86_64, 
alignment 8 is used. I did some checking in arch/ directory. 
[~/work/bpf-next/arch (master)]$ find . -name 'net' ./arm/net ./mips/net 
./parisc/net ./powerpc/net ./s390/net ./sparc/net ./x86/net ./arc/net 
./arm64/net ./loongarch/net ./riscv/net [~/work/bpf-next/arch (master)]$ 
egrep -r bpf_jit_free (excluding not func definition) 
powerpc/net/bpf_jit_comp.c:void bpf_jit_free(struct bpf_prog *fp) 
sparc/net/bpf_jit_comp_32.c:void bpf_jit_free(struct bpf_prog *fp) 
x86/net/bpf_jit_comp.c:void bpf_jit_free(struct bpf_prog *prog) 
arm64/net/bpf_jit_comp.c:void bpf_jit_free(struct bpf_prog *prog) 
riscv/net/bpf_jit_core.c:void bpf_jit_free(struct bpf_prog *prog) Looks 
like all important arch's like x86_64,arm64,riscv having their own 
bpf_jit_free(). Some others like s390, etc. do not. I think we can do 
allocation in JIT. If s390 starts to implement private stack, then it 
can implement arch-specific version of bpf_jit_free() at that time.
Yonghong Song Nov. 5, 2024, 3:44 a.m. UTC | #3
On 11/4/24 7:07 PM, Yonghong Song wrote:
>
> On 11/4/24 5:38 PM, Alexei Starovoitov wrote:
>> On Mon, Nov 4, 2024 at 11:38 AM Yonghong Song 
>> <yonghong.song@linux.dev> wrote:
>>> For any main prog or subprogs, allocate private stack space if 
>>> requested
>>> by subprog info or main prog. The alignment for private stack is 16
>>> since maximum stack alignment is 16 for bpf-enabled archs.
>>>
>>> If jit failed, the allocated private stack will be freed in the same
>>> function where the allocation happens. If jit succeeded, e.g., for
>>> x86_64 arch, the allocated private stack is freed in arch specific
>>> implementation of bpf_jit_free().
>>>
>>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>>> ---
>>>   arch/x86/net/bpf_jit_comp.c |  1 +
>>>   include/linux/bpf.h         |  1 +
>>>   kernel/bpf/core.c           | 19 ++++++++++++++++---
>>>   kernel/bpf/verifier.c       | 13 +++++++++++++
>>>   4 files changed, 31 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>>> index 06b080b61aa5..59d294b8dd67 100644
>>> --- a/arch/x86/net/bpf_jit_comp.c
>>> +++ b/arch/x86/net/bpf_jit_comp.c
>>> @@ -3544,6 +3544,7 @@ void bpf_jit_free(struct bpf_prog *prog)
>>>                  prog->bpf_func = (void *)prog->bpf_func - 
>>> cfi_get_offset();
>>>                  hdr = bpf_jit_binary_pack_hdr(prog);
>>>                  bpf_jit_binary_pack_free(hdr, NULL);
>>> +               free_percpu(prog->aux->priv_stack_ptr);
>>> WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
>>>          }
>>>
>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>> index 8db3c5d7404b..8a3ea7440a4a 100644
>>> --- a/include/linux/bpf.h
>>> +++ b/include/linux/bpf.h
>>> @@ -1507,6 +1507,7 @@ struct bpf_prog_aux {
>>>          u32 max_rdwr_access;
>>>          struct btf *attach_btf;
>>>          const struct bpf_ctx_arg_aux *ctx_arg_info;
>>> +       void __percpu *priv_stack_ptr;
>>>          struct mutex dst_mutex; /* protects dst_* pointers below, 
>>> *after* prog becomes visible */
>>>          struct bpf_prog *dst_prog;
>>>          struct bpf_trampoline *dst_trampoline;
>>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>>> index 14d9288441f2..f7a3e93c41e1 100644
>>> --- a/kernel/bpf/core.c
>>> +++ b/kernel/bpf/core.c
>>> @@ -2396,6 +2396,7 @@ static void bpf_prog_select_func(struct 
>>> bpf_prog *fp)
>>>    */
>>>   struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int 
>>> *err)
>>>   {
>>> +       void __percpu *priv_stack_ptr = NULL;
>>>          /* In case of BPF to BPF calls, verifier did all the prep
>>>           * work with regards to JITing, etc.
>>>           */
>>> @@ -2421,11 +2422,23 @@ struct bpf_prog 
>>> *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>>>                  if (*err)
>>>                          return fp;
>>>
>>> +               if (fp->aux->use_priv_stack && fp->aux->stack_depth) {
>>> +                       priv_stack_ptr = 
>>> __alloc_percpu_gfp(fp->aux->stack_depth, 16, GFP_KERNEL);
>>> +                       if (!priv_stack_ptr) {
>>> +                               *err = -ENOMEM;
>>> +                               return fp;
>>> +                       }
>>> +                       fp->aux->priv_stack_ptr = priv_stack_ptr;
>>> +               }
>>> +
>>>                  fp = bpf_int_jit_compile(fp);
>>>                  bpf_prog_jit_attempt_done(fp);
>>> -               if (!fp->jited && jit_needed) {
>>> -                       *err = -ENOTSUPP;
>>> -                       return fp;
>>> +               if (!fp->jited) {
>>> +                       free_percpu(priv_stack_ptr);
>>> +                       if (jit_needed) {
>>> +                               *err = -ENOTSUPP;
>>> +                               return fp;
>>> +                       }
>>>                  }
>>>          } else {
>>>                  *err = bpf_prog_offload_compile(fp);
>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>> index e01b3f0fd314..03ae76d57076 100644
>>> --- a/kernel/bpf/verifier.c
>>> +++ b/kernel/bpf/verifier.c
>>> @@ -20073,6 +20073,7 @@ static int jit_subprogs(struct 
>>> bpf_verifier_env *env)
>>>   {
>>>          struct bpf_prog *prog = env->prog, **func, *tmp;
>>>          int i, j, subprog_start, subprog_end = 0, len, subprog;
>>> +       void __percpu *priv_stack_ptr;
>>>          struct bpf_map *map_ptr;
>>>          struct bpf_insn *insn;
>>>          void *old_bpf_func;
>>> @@ -20169,6 +20170,17 @@ static int jit_subprogs(struct 
>>> bpf_verifier_env *env)
>>>
>>>                  func[i]->aux->name[0] = 'F';
>>>                  func[i]->aux->stack_depth = 
>>> env->subprog_info[i].stack_depth;
>>> +
>>> +               if (env->subprog_info[i].use_priv_stack && 
>>> func[i]->aux->stack_depth) {
>>> +                       priv_stack_ptr = 
>>> __alloc_percpu_gfp(func[i]->aux->stack_depth, 16,
>>> + GFP_KERNEL);
>>> +                       if (!priv_stack_ptr) {
>>> +                               err = -ENOMEM;
>>> +                               goto out_free;
>>> +                       }
>>> +                       func[i]->aux->priv_stack_ptr = priv_stack_ptr;
>>> +               }
>>> +
>>>                  func[i]->jit_requested = 1;
>>>                  func[i]->blinding_requested = 
>>> prog->blinding_requested;
>>>                  func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
>>> @@ -20201,6 +20213,7 @@ static int jit_subprogs(struct 
>>> bpf_verifier_env *env)
>>> func[i]->aux->exception_boundary = env->seen_exception;
>>>                  func[i] = bpf_int_jit_compile(func[i]);
>>>                  if (!func[i]->jited) {
>>> + free_percpu(func[i]->aux->priv_stack_ptr);
>>>                          err = -ENOTSUPP;
>>>                          goto out_free;
>>>                  }
>> Looks correct from leaks pov, but this is so hard to follow.
>> I still don't like this imbalanced alloc/free.
>> Either both need to be done by core or both by JIT.
>>
>> And JIT is probably better, since in:
>> _alloc_percpu_gfp(func[i]->aux->stack_depth, 16
>>
>> 16 alignment is x86 specific.
>
Sorry, I need to fix my format. The following is a reformat.

Agree. I use alignment 16 to cover all architectures. for x86_64,
alignment 8 is used. I did some checking in arch/ directory.

[~/work/bpf-next/arch (master)]$ find . -name 'net'
./arm/net
./mips/net
./parisc/net
./powerpc/net
./s390/net
./sparc/net
./x86/net
./arc/net
./arm64/net
./loongarch/net
./riscv/net

[~/work/bpf-next/arch (master)]$ egrep -r bpf_jit_free (excluding not func definition)
powerpc/net/bpf_jit_comp.c:void bpf_jit_free(struct bpf_prog *fp)
sparc/net/bpf_jit_comp_32.c:void bpf_jit_free(struct bpf_prog *fp)
x86/net/bpf_jit_comp.c:void bpf_jit_free(struct bpf_prog *prog)
arm64/net/bpf_jit_comp.c:void bpf_jit_free(struct bpf_prog *prog)
riscv/net/bpf_jit_core.c:void bpf_jit_free(struct bpf_prog *prog)
  
Looks like all important arch's like x86_64,arm64,riscv having their own
bpf_jit_free(). Some others like s390, etc. do not. I think we can do
allocation in JIT. If s390 starts to implement private stack, then it
can implement arch-specific version of bpf_jit_free() at that time.
Alexei Starovoitov Nov. 5, 2024, 5:19 a.m. UTC | #4
On Mon, Nov 4, 2024 at 7:44 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
>
> Agree. I use alignment 16 to cover all architectures. for x86_64,
> alignment 8 is used. I did some checking in arch/ directory.

hmm. I'm pretty sure x86 psABI requires 16-byte stack alignment,
but I don't know why.
Yonghong Song Nov. 5, 2024, 6:05 a.m. UTC | #5
On 11/4/24 9:19 PM, Alexei Starovoitov wrote:
> On Mon, Nov 4, 2024 at 7:44 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>>
>> Agree. I use alignment 16 to cover all architectures. for x86_64,
>> alignment 8 is used. I did some checking in arch/ directory.
> hmm. I'm pretty sure x86 psABI requires 16-byte stack alignment,
> but I don't know why.

One possible reason is to accommodate values like int128 or
128bit floating point. I will make percpu allocation with 16
byte alignment.
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06b080b61aa5..59d294b8dd67 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -3544,6 +3544,7 @@  void bpf_jit_free(struct bpf_prog *prog)
 		prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
 		hdr = bpf_jit_binary_pack_hdr(prog);
 		bpf_jit_binary_pack_free(hdr, NULL);
+		free_percpu(prog->aux->priv_stack_ptr);
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
 	}
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8db3c5d7404b..8a3ea7440a4a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1507,6 +1507,7 @@  struct bpf_prog_aux {
 	u32 max_rdwr_access;
 	struct btf *attach_btf;
 	const struct bpf_ctx_arg_aux *ctx_arg_info;
+	void __percpu *priv_stack_ptr;
 	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
 	struct bpf_prog *dst_prog;
 	struct bpf_trampoline *dst_trampoline;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 14d9288441f2..f7a3e93c41e1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2396,6 +2396,7 @@  static void bpf_prog_select_func(struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
+	void __percpu *priv_stack_ptr = NULL;
 	/* In case of BPF to BPF calls, verifier did all the prep
 	 * work with regards to JITing, etc.
 	 */
@@ -2421,11 +2422,23 @@  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 		if (*err)
 			return fp;
 
+		if (fp->aux->use_priv_stack && fp->aux->stack_depth) {
+			priv_stack_ptr = __alloc_percpu_gfp(fp->aux->stack_depth, 16, GFP_KERNEL);
+			if (!priv_stack_ptr) {
+				*err = -ENOMEM;
+				return fp;
+			}
+			fp->aux->priv_stack_ptr = priv_stack_ptr;
+		}
+
 		fp = bpf_int_jit_compile(fp);
 		bpf_prog_jit_attempt_done(fp);
-		if (!fp->jited && jit_needed) {
-			*err = -ENOTSUPP;
-			return fp;
+		if (!fp->jited) {
+			free_percpu(priv_stack_ptr);
+			if (jit_needed) {
+				*err = -ENOTSUPP;
+				return fp;
+			}
 		}
 	} else {
 		*err = bpf_prog_offload_compile(fp);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e01b3f0fd314..03ae76d57076 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20073,6 +20073,7 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog, **func, *tmp;
 	int i, j, subprog_start, subprog_end = 0, len, subprog;
+	void __percpu *priv_stack_ptr;
 	struct bpf_map *map_ptr;
 	struct bpf_insn *insn;
 	void *old_bpf_func;
@@ -20169,6 +20170,17 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 
 		func[i]->aux->name[0] = 'F';
 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
+
+		if (env->subprog_info[i].use_priv_stack && func[i]->aux->stack_depth) {
+			priv_stack_ptr = __alloc_percpu_gfp(func[i]->aux->stack_depth, 16,
+							    GFP_KERNEL);
+			if (!priv_stack_ptr) {
+				err = -ENOMEM;
+				goto out_free;
+			}
+			func[i]->aux->priv_stack_ptr = priv_stack_ptr;
+		}
+
 		func[i]->jit_requested = 1;
 		func[i]->blinding_requested = prog->blinding_requested;
 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
@@ -20201,6 +20213,7 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 			func[i]->aux->exception_boundary = env->seen_exception;
 		func[i] = bpf_int_jit_compile(func[i]);
 		if (!func[i]->jited) {
+			free_percpu(func[i]->aux->priv_stack_ptr);
 			err = -ENOTSUPP;
 			goto out_free;
 		}