diff mbox series

[bpf-next,v6,3/9] bpf: Support private stack for struct ops programs

Message ID 20241020191400.2105605-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support private stack for bpf progs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 206 this patch: 206
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 9 maintainers not CCed: song@kernel.org haoluo@google.com netdev@vger.kernel.org john.fastabend@gmail.com sdf@fomichev.me martin.lau@linux.dev kpsingh@kernel.org eddyz87@gmail.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 257 this patch: 257
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6962 this patch: 6962
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Yonghong Song Oct. 20, 2024, 7:13 p.m. UTC
To identify whether a st_ops program requests private stack or not,
the st_ops stub function is checked. If the stub function has the
following name
   <st_ops_name>__<member_name>__priv_stack
then the corresponding st_ops member func requests to use private
stack. The information that the private stack is requested or not
is encoded in struct bpf_struct_ops_func_info which will later be
used by verifier.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h         |  2 ++
 kernel/bpf/bpf_struct_ops.c | 35 +++++++++++++++++++++++++----------
 kernel/bpf/verifier.c       |  8 +++++++-
 3 files changed, 34 insertions(+), 11 deletions(-)

Comments

Alexei Starovoitov Oct. 22, 2024, 1:34 a.m. UTC | #1
On Sun, Oct 20, 2024 at 12:16 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>
> To identify whether a st_ops program requests private stack or not,
> the st_ops stub function is checked. If the stub function has the
> following name
>    <st_ops_name>__<member_name>__priv_stack
> then the corresponding st_ops member func requests to use private
> stack. The information that the private stack is requested or not
> is encoded in struct bpf_struct_ops_func_info which will later be
> used by verifier.
>
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  include/linux/bpf.h         |  2 ++
>  kernel/bpf/bpf_struct_ops.c | 35 +++++++++++++++++++++++++----------
>  kernel/bpf/verifier.c       |  8 +++++++-
>  3 files changed, 34 insertions(+), 11 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index f3884ce2603d..376e43fc72b9 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1491,6 +1491,7 @@ struct bpf_prog_aux {
>         bool exception_boundary;
>         bool is_extended; /* true if extended by freplace program */
>         bool priv_stack_eligible;
> +       bool priv_stack_always;
>         u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
>         struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>         struct bpf_arena *arena;
> @@ -1776,6 +1777,7 @@ struct bpf_struct_ops {
>  struct bpf_struct_ops_func_info {
>         struct bpf_ctx_arg_aux *info;
>         u32 cnt;
> +       bool priv_stack_always;
>  };
>
>  struct bpf_struct_ops_desc {
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 8279b5a57798..2cd4bd086c7a 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -145,33 +145,44 @@ void bpf_struct_ops_image_free(void *image)
>  }
>
>  #define MAYBE_NULL_SUFFIX "__nullable"
> -#define MAX_STUB_NAME 128
> +#define MAX_STUB_NAME 140
>
>  /* Return the type info of a stub function, if it exists.
>   *
> - * The name of a stub function is made up of the name of the struct_ops and
> - * the name of the function pointer member, separated by "__". For example,
> - * if the struct_ops type is named "foo_ops" and the function pointer
> - * member is named "bar", the stub function name would be "foo_ops__bar".
> + * The name of a stub function is made up of the name of the struct_ops,
> + * the name of the function pointer member and optionally "priv_stack"
> + * suffix, separated by "__". For example, if the struct_ops type is named
> + * "foo_ops" and the function pointer  member is named "bar", the stub
> + * function name would be "foo_ops__bar". If a suffix "priv_stack" exists,
> + * the stub function name would be "foo_ops__bar__priv_stack".
>   */
>  static const struct btf_type *
>  find_stub_func_proto(const struct btf *btf, const char *st_op_name,
> -                    const char *member_name)
> +                    const char *member_name, bool *priv_stack_always)
>  {
>         char stub_func_name[MAX_STUB_NAME];
>         const struct btf_type *func_type;
>         s32 btf_id;
>         int cp;
>
> -       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
> +       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s__priv_stack",
>                       st_op_name, member_name);

I don't think this approach fits.
pw-bot: cr

Also looking at original
commit 1611603537a4 ("bpf: Create argument information for nullable arguments.")
that added this %s__%s notation I'm not sure why we went
with that approach.

Just to avoid adding __nullable suffix in the actual callback
and using cfi stub callback names with such suffixes as
a "proxy" for the real callback?

Did we ever use this functionality for anything other than
bpf_testmod_ops__test_maybe_null selftest ?

Martin ?
Yonghong Song Oct. 22, 2024, 2:59 a.m. UTC | #2
On 10/21/24 6:34 PM, Alexei Starovoitov wrote:
> On Sun, Oct 20, 2024 at 12:16 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> To identify whether a st_ops program requests private stack or not,
>> the st_ops stub function is checked. If the stub function has the
>> following name
>>     <st_ops_name>__<member_name>__priv_stack
>> then the corresponding st_ops member func requests to use private
>> stack. The information that the private stack is requested or not
>> is encoded in struct bpf_struct_ops_func_info which will later be
>> used by verifier.
>>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   include/linux/bpf.h         |  2 ++
>>   kernel/bpf/bpf_struct_ops.c | 35 +++++++++++++++++++++++++----------
>>   kernel/bpf/verifier.c       |  8 +++++++-
>>   3 files changed, 34 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index f3884ce2603d..376e43fc72b9 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1491,6 +1491,7 @@ struct bpf_prog_aux {
>>          bool exception_boundary;
>>          bool is_extended; /* true if extended by freplace program */
>>          bool priv_stack_eligible;
>> +       bool priv_stack_always;
>>          u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
>>          struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>>          struct bpf_arena *arena;
>> @@ -1776,6 +1777,7 @@ struct bpf_struct_ops {
>>   struct bpf_struct_ops_func_info {
>>          struct bpf_ctx_arg_aux *info;
>>          u32 cnt;
>> +       bool priv_stack_always;
>>   };
>>
>>   struct bpf_struct_ops_desc {
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index 8279b5a57798..2cd4bd086c7a 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -145,33 +145,44 @@ void bpf_struct_ops_image_free(void *image)
>>   }
>>
>>   #define MAYBE_NULL_SUFFIX "__nullable"
>> -#define MAX_STUB_NAME 128
>> +#define MAX_STUB_NAME 140
>>
>>   /* Return the type info of a stub function, if it exists.
>>    *
>> - * The name of a stub function is made up of the name of the struct_ops and
>> - * the name of the function pointer member, separated by "__". For example,
>> - * if the struct_ops type is named "foo_ops" and the function pointer
>> - * member is named "bar", the stub function name would be "foo_ops__bar".
>> + * The name of a stub function is made up of the name of the struct_ops,
>> + * the name of the function pointer member and optionally "priv_stack"
>> + * suffix, separated by "__". For example, if the struct_ops type is named
>> + * "foo_ops" and the function pointer  member is named "bar", the stub
>> + * function name would be "foo_ops__bar". If a suffix "priv_stack" exists,
>> + * the stub function name would be "foo_ops__bar__priv_stack".
>>    */
>>   static const struct btf_type *
>>   find_stub_func_proto(const struct btf *btf, const char *st_op_name,
>> -                    const char *member_name)
>> +                    const char *member_name, bool *priv_stack_always)
>>   {
>>          char stub_func_name[MAX_STUB_NAME];
>>          const struct btf_type *func_type;
>>          s32 btf_id;
>>          int cp;
>>
>> -       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
>> +       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s__priv_stack",
>>                        st_op_name, member_name);
> I don't think this approach fits.
> pw-bot: cr

Okay, I will use check_member() callback function then. It should avoid
this hack.

>
> Also looking at original
> commit 1611603537a4 ("bpf: Create argument information for nullable arguments.")
> that added this %s__%s notation I'm not sure why we went
> with that approach.
>
> Just to avoid adding __nullable suffix in the actual callback
> and using cfi stub callback names with such suffixes as
> a "proxy" for the real callback?
>
> Did we ever use this functionality for anything other than
> bpf_testmod_ops__test_maybe_null selftest ?
>
> Martin ?
Martin KaFai Lau Oct. 22, 2024, 5:26 p.m. UTC | #3
On 10/21/24 6:34 PM, Alexei Starovoitov wrote:
> On Sun, Oct 20, 2024 at 12:16 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>>
>> To identify whether a st_ops program requests private stack or not,
>> the st_ops stub function is checked. If the stub function has the
>> following name
>>     <st_ops_name>__<member_name>__priv_stack
>> then the corresponding st_ops member func requests to use private
>> stack. The information that the private stack is requested or not
>> is encoded in struct bpf_struct_ops_func_info which will later be
>> used by verifier.
>>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   include/linux/bpf.h         |  2 ++
>>   kernel/bpf/bpf_struct_ops.c | 35 +++++++++++++++++++++++++----------
>>   kernel/bpf/verifier.c       |  8 +++++++-
>>   3 files changed, 34 insertions(+), 11 deletions(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index f3884ce2603d..376e43fc72b9 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1491,6 +1491,7 @@ struct bpf_prog_aux {
>>          bool exception_boundary;
>>          bool is_extended; /* true if extended by freplace program */
>>          bool priv_stack_eligible;
>> +       bool priv_stack_always;
>>          u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
>>          struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
>>          struct bpf_arena *arena;
>> @@ -1776,6 +1777,7 @@ struct bpf_struct_ops {
>>   struct bpf_struct_ops_func_info {
>>          struct bpf_ctx_arg_aux *info;
>>          u32 cnt;
>> +       bool priv_stack_always;
>>   };
>>
>>   struct bpf_struct_ops_desc {
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index 8279b5a57798..2cd4bd086c7a 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -145,33 +145,44 @@ void bpf_struct_ops_image_free(void *image)
>>   }
>>
>>   #define MAYBE_NULL_SUFFIX "__nullable"
>> -#define MAX_STUB_NAME 128
>> +#define MAX_STUB_NAME 140
>>
>>   /* Return the type info of a stub function, if it exists.
>>    *
>> - * The name of a stub function is made up of the name of the struct_ops and
>> - * the name of the function pointer member, separated by "__". For example,
>> - * if the struct_ops type is named "foo_ops" and the function pointer
>> - * member is named "bar", the stub function name would be "foo_ops__bar".
>> + * The name of a stub function is made up of the name of the struct_ops,
>> + * the name of the function pointer member and optionally "priv_stack"
>> + * suffix, separated by "__". For example, if the struct_ops type is named
>> + * "foo_ops" and the function pointer  member is named "bar", the stub
>> + * function name would be "foo_ops__bar". If a suffix "priv_stack" exists,
>> + * the stub function name would be "foo_ops__bar__priv_stack".
>>    */
>>   static const struct btf_type *
>>   find_stub_func_proto(const struct btf *btf, const char *st_op_name,
>> -                    const char *member_name)
>> +                    const char *member_name, bool *priv_stack_always)
>>   {
>>          char stub_func_name[MAX_STUB_NAME];
>>          const struct btf_type *func_type;
>>          s32 btf_id;
>>          int cp;
>>
>> -       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
>> +       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s__priv_stack",
>>                        st_op_name, member_name);
> 
> I don't think this approach fits.
> pw-bot: cr
> 
> Also looking at original
> commit 1611603537a4 ("bpf: Create argument information for nullable arguments.")
> that added this %s__%s notation I'm not sure why we went
> with that approach.
> 
> Just to avoid adding __nullable suffix in the actual callback
> and using cfi stub callback names with such suffixes as
> a "proxy" for the real callback?
> 
> Did we ever use this functionality for anything other than
> bpf_testmod_ops__test_maybe_null selftest ?
> 
> Martin ?

The __nullable is to tag an argument of an ops. The member in the struct (e.g. 
tcp_congestion_ops) is a pointer to FUNC_PROTO and its argument does not have an 
argument name to tag. Hence, we went with tagging the actual FUNC in the cfi object.

The __nullable argument tagging request was originally from sched_ext but I also 
don't see its usage in-tree for now.

For the priv_stack tagging, I also don't think it is a good way of doing it. It 
is like adding __nullable to flag the ops may return NULL pointer which I also 
tried to avoid in the bpf-qdisc patch set.
Alexei Starovoitov Oct. 22, 2024, 8:19 p.m. UTC | #4
On Tue, Oct 22, 2024 at 10:27 AM Martin KaFai Lau <martin.lau@linux.dev> wrote:
>
> On 10/21/24 6:34 PM, Alexei Starovoitov wrote:
> > On Sun, Oct 20, 2024 at 12:16 PM Yonghong Song <yonghong.song@linux.dev> wrote:
> >>
> >> To identify whether a st_ops program requests private stack or not,
> >> the st_ops stub function is checked. If the stub function has the
> >> following name
> >>     <st_ops_name>__<member_name>__priv_stack
> >> then the corresponding st_ops member func requests to use private
> >> stack. The information that the private stack is requested or not
> >> is encoded in struct bpf_struct_ops_func_info which will later be
> >> used by verifier.
> >>
> >> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> >> ---
> >>   include/linux/bpf.h         |  2 ++
> >>   kernel/bpf/bpf_struct_ops.c | 35 +++++++++++++++++++++++++----------
> >>   kernel/bpf/verifier.c       |  8 +++++++-
> >>   3 files changed, 34 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >> index f3884ce2603d..376e43fc72b9 100644
> >> --- a/include/linux/bpf.h
> >> +++ b/include/linux/bpf.h
> >> @@ -1491,6 +1491,7 @@ struct bpf_prog_aux {
> >>          bool exception_boundary;
> >>          bool is_extended; /* true if extended by freplace program */
> >>          bool priv_stack_eligible;
> >> +       bool priv_stack_always;
> >>          u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
> >>          struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
> >>          struct bpf_arena *arena;
> >> @@ -1776,6 +1777,7 @@ struct bpf_struct_ops {
> >>   struct bpf_struct_ops_func_info {
> >>          struct bpf_ctx_arg_aux *info;
> >>          u32 cnt;
> >> +       bool priv_stack_always;
> >>   };
> >>
> >>   struct bpf_struct_ops_desc {
> >> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> >> index 8279b5a57798..2cd4bd086c7a 100644
> >> --- a/kernel/bpf/bpf_struct_ops.c
> >> +++ b/kernel/bpf/bpf_struct_ops.c
> >> @@ -145,33 +145,44 @@ void bpf_struct_ops_image_free(void *image)
> >>   }
> >>
> >>   #define MAYBE_NULL_SUFFIX "__nullable"
> >> -#define MAX_STUB_NAME 128
> >> +#define MAX_STUB_NAME 140
> >>
> >>   /* Return the type info of a stub function, if it exists.
> >>    *
> >> - * The name of a stub function is made up of the name of the struct_ops and
> >> - * the name of the function pointer member, separated by "__". For example,
> >> - * if the struct_ops type is named "foo_ops" and the function pointer
> >> - * member is named "bar", the stub function name would be "foo_ops__bar".
> >> + * The name of a stub function is made up of the name of the struct_ops,
> >> + * the name of the function pointer member and optionally "priv_stack"
> >> + * suffix, separated by "__". For example, if the struct_ops type is named
> >> + * "foo_ops" and the function pointer  member is named "bar", the stub
> >> + * function name would be "foo_ops__bar". If a suffix "priv_stack" exists,
> >> + * the stub function name would be "foo_ops__bar__priv_stack".
> >>    */
> >>   static const struct btf_type *
> >>   find_stub_func_proto(const struct btf *btf, const char *st_op_name,
> >> -                    const char *member_name)
> >> +                    const char *member_name, bool *priv_stack_always)
> >>   {
> >>          char stub_func_name[MAX_STUB_NAME];
> >>          const struct btf_type *func_type;
> >>          s32 btf_id;
> >>          int cp;
> >>
> >> -       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
> >> +       cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s__priv_stack",
> >>                        st_op_name, member_name);
> >
> > I don't think this approach fits.
> > pw-bot: cr
> >
> > Also looking at original
> > commit 1611603537a4 ("bpf: Create argument information for nullable arguments.")
> > that added this %s__%s notation I'm not sure why we went
> > with that approach.
> >
> > Just to avoid adding __nullable suffix in the actual callback
> > and using cfi stub callback names with such suffixes as
> > a "proxy" for the real callback?
> >
> > Did we ever use this functionality for anything other than
> > bpf_testmod_ops__test_maybe_null selftest ?
> >
> > Martin ?
>
> The __nullable is to tag an argument of an ops. The member in the struct (e.g.
> tcp_congestion_ops) is a pointer to FUNC_PROTO and its argument does not have an
> argument name to tag. Hence, we went with tagging the actual FUNC in the cfi object.

Ahh. Right. That makes sense.

> The __nullable argument tagging request was originally from sched_ext but I also
> don't see its usage in-tree for now.

ok. Let's sync up with Tejun whether they have plans to use it.

> For the priv_stack tagging, I also don't think it is a good way of doing it. It
> is like adding __nullable to flag the ops may return NULL pointer which I also
> tried to avoid in the bpf-qdisc patch set.
Tejun Heo Oct. 23, 2024, 9 p.m. UTC | #5
Hello,

On Tue, Oct 22, 2024 at 01:19:58PM -0700, Alexei Starovoitov wrote:
> > The __nullable argument tagging request was originally from sched_ext but I also
> > don't see its usage in-tree for now.
> 
> ok. Let's sync up with Tejun whether they have plans to use it.

Yeah, in sched_ext_ops.dispatch(s32 cpu, struct task_struct *prev), @prev
can be NULL and right now if a BPF scheduler derefs without checking for
NULL, it can trigger kernel crash, I think, so it needs __nullable tagging.

Thanks.
Alexei Starovoitov Oct. 23, 2024, 11:07 p.m. UTC | #6
On Wed, Oct 23, 2024 at 2:00 PM Tejun Heo <tj@kernel.org> wrote:
>
> Hello,
>
> On Tue, Oct 22, 2024 at 01:19:58PM -0700, Alexei Starovoitov wrote:
> > > The __nullable argument tagging request was originally from sched_ext but I also
> > > don't see its usage in-tree for now.
> >
> > ok. Let's sync up with Tejun whether they have plans to use it.
>
> Yeah, in sched_ext_ops.dispatch(s32 cpu, struct task_struct *prev), @prev
> can be NULL and right now if a BPF scheduler derefs without checking for
> NULL, it can trigger kernel crash, I think, so it needs __nullable tagging.

I see. The following should do it:
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 3cd7c50a51c5..82bef41d7eae 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -5492,7 +5492,7 @@ static int bpf_scx_validate(void *kdata)
 static s32 select_cpu_stub(struct task_struct *p, s32 prev_cpu, u64
wake_flags) { return -EINVAL; }
 static void enqueue_stub(struct task_struct *p, u64 enq_flags) {}
 static void dequeue_stub(struct task_struct *p, u64 enq_flags) {}
-static void dispatch_stub(s32 prev_cpu, struct task_struct *p) {}
+static void dispatch_stub(s32 prev_cpu, struct task_struct *p__nullable) {}
Tejun Heo Oct. 24, 2024, 12:56 a.m. UTC | #7
On Wed, Oct 23, 2024 at 04:07:49PM -0700, Alexei Starovoitov wrote:
> On Wed, Oct 23, 2024 at 2:00 PM Tejun Heo <tj@kernel.org> wrote:
> >
> > Hello,
> >
> > On Tue, Oct 22, 2024 at 01:19:58PM -0700, Alexei Starovoitov wrote:
> > > > The __nullable argument tagging request was originally from sched_ext but I also
> > > > don't see its usage in-tree for now.
> > >
> > > ok. Let's sync up with Tejun whether they have plans to use it.
> >
> > Yeah, in sched_ext_ops.dispatch(s32 cpu, struct task_struct *prev), @prev
> > can be NULL and right now if a BPF scheduler derefs without checking for
> > NULL, it can trigger kernel crash, I think, so it needs __nullable tagging.
> 
> I see. The following should do it:
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 3cd7c50a51c5..82bef41d7eae 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -5492,7 +5492,7 @@ static int bpf_scx_validate(void *kdata)
>  static s32 select_cpu_stub(struct task_struct *p, s32 prev_cpu, u64
> wake_flags) { return -EINVAL; }
>  static void enqueue_stub(struct task_struct *p, u64 enq_flags) {}
>  static void dequeue_stub(struct task_struct *p, u64 enq_flags) {}
> -static void dispatch_stub(s32 prev_cpu, struct task_struct *p) {}
> +static void dispatch_stub(s32 prev_cpu, struct task_struct *p__nullable) {}

This is a lot neater than the existing workaround:

  http://lkml.kernel.org/r/Zxma-ZFPKYZDqCGu@slm.duckdns.org

Thanks!
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f3884ce2603d..376e43fc72b9 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1491,6 +1491,7 @@  struct bpf_prog_aux {
 	bool exception_boundary;
 	bool is_extended; /* true if extended by freplace program */
 	bool priv_stack_eligible;
+	bool priv_stack_always;
 	u64 prog_array_member_cnt; /* counts how many times as member of prog_array */
 	struct mutex ext_mutex; /* mutex for is_extended and prog_array_member_cnt */
 	struct bpf_arena *arena;
@@ -1776,6 +1777,7 @@  struct bpf_struct_ops {
 struct bpf_struct_ops_func_info {
 	struct bpf_ctx_arg_aux *info;
 	u32 cnt;
+	bool priv_stack_always;
 };
 
 struct bpf_struct_ops_desc {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 8279b5a57798..2cd4bd086c7a 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -145,33 +145,44 @@  void bpf_struct_ops_image_free(void *image)
 }
 
 #define MAYBE_NULL_SUFFIX "__nullable"
-#define MAX_STUB_NAME 128
+#define MAX_STUB_NAME 140
 
 /* Return the type info of a stub function, if it exists.
  *
- * The name of a stub function is made up of the name of the struct_ops and
- * the name of the function pointer member, separated by "__". For example,
- * if the struct_ops type is named "foo_ops" and the function pointer
- * member is named "bar", the stub function name would be "foo_ops__bar".
+ * The name of a stub function is made up of the name of the struct_ops,
+ * the name of the function pointer member and optionally "priv_stack"
+ * suffix, separated by "__". For example, if the struct_ops type is named
+ * "foo_ops" and the function pointer  member is named "bar", the stub
+ * function name would be "foo_ops__bar". If a suffix "priv_stack" exists,
+ * the stub function name would be "foo_ops__bar__priv_stack".
  */
 static const struct btf_type *
 find_stub_func_proto(const struct btf *btf, const char *st_op_name,
-		     const char *member_name)
+		     const char *member_name, bool *priv_stack_always)
 {
 	char stub_func_name[MAX_STUB_NAME];
 	const struct btf_type *func_type;
 	s32 btf_id;
 	int cp;
 
-	cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
+	cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s__priv_stack",
 		      st_op_name, member_name);
 	if (cp >= MAX_STUB_NAME) {
 		pr_warn("Stub function name too long\n");
 		return NULL;
 	}
+
 	btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC);
-	if (btf_id < 0)
-		return NULL;
+	if (btf_id >= 0) {
+		*priv_stack_always = true;
+	} else {
+		cp = snprintf(stub_func_name, MAX_STUB_NAME, "%s__%s",
+			      st_op_name, member_name);
+		btf_id = btf_find_by_name_kind(btf, stub_func_name, BTF_KIND_FUNC);
+		if (btf_id < 0)
+			return NULL;
+	}
+
 	func_type = btf_type_by_id(btf, btf_id);
 	if (!func_type)
 		return NULL;
@@ -209,10 +220,12 @@  static int prepare_func_info(struct btf *btf,
 	const struct btf_param *stub_args, *args;
 	struct bpf_ctx_arg_aux *info, *info_buf;
 	u32 nargs, arg_no, info_cnt = 0;
+	bool priv_stack_always = false;
 	u32 arg_btf_id;
 	int offset;
 
-	stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name);
+	stub_func_proto = find_stub_func_proto(btf, st_ops_name, member_name,
+					       &priv_stack_always);
 	if (!stub_func_proto)
 		return 0;
 
@@ -226,6 +239,8 @@  static int prepare_func_info(struct btf *btf,
 		return -EINVAL;
 	}
 
+	func_info->priv_stack_always = priv_stack_always;
+
 	if (!nargs)
 		return 0;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ccfe159cfbde..25283ee6f86f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5995,6 +5995,8 @@  static bool bpf_enable_private_stack(struct bpf_verifier_env *env)
 	case BPF_PROG_TYPE_PERF_EVENT:
 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 		return true;
+	case BPF_PROG_TYPE_STRUCT_OPS:
+		return env->prog->aux->priv_stack_always;
 	case BPF_PROG_TYPE_TRACING:
 		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
 			return true;
@@ -6092,7 +6094,9 @@  static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
 			return -EACCES;
 		}
 
-		if (!priv_stack_eligible && depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE) {
+		if (!priv_stack_eligible &&
+		    (env->prog->aux->priv_stack_always ||
+		     depth >= BPF_PRIV_STACK_MIN_SUBTREE_SIZE)) {
 			subprog[orig_idx].priv_stack_eligible = true;
 			env->prog->aux->priv_stack_eligible = priv_stack_eligible = true;
 		}
@@ -21883,6 +21887,8 @@  static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 		st_ops_desc->func_info[member_idx].info;
 	prog->aux->ctx_arg_info_size =
 		st_ops_desc->func_info[member_idx].cnt;
+	prog->aux->priv_stack_always =
+		st_ops_desc->func_info[member_idx].priv_stack_always;
 
 	prog->aux->attach_func_proto = func_proto;
 	prog->aux->attach_func_name = mname;