diff mbox series

[RFC,bpf-next,v4,2/7] bpf/verifier: allow kfunc to return an allocated mem

Message ID 20220421140740.459558-3-benjamin.tissoires@redhat.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series Introduce eBPF support for HID devices (new attempt) | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR pending PR summary
bpf/vmtest-bpf-next-VM_Test-1 pending Logs for Kernel LATEST on ubuntu-latest + selftests
bpf/vmtest-bpf-next-VM_Test-2 pending Logs for Kernel LATEST on z15 + selftests
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 129 this patch: 129
netdev/cc_maintainers warning 1 maintainers not CCed: netdev@vger.kernel.org
netdev/build_clang fail Errors and warnings before: 17 this patch: 17
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 129 this patch: 129
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Benjamin Tissoires April 21, 2022, 2:07 p.m. UTC
When a kfunc is not returning a pointer to a struct but to a plain type,
check if one of the arguments is called __sz and is a const from the
caller, and use this as the size of the allocated memory.

For tracing programs, we consider the provided memory to be read only
unless the program is BPF_MODIFY_RETURN.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>

---

new in v4
---
 include/linux/btf.h   |  6 ++++
 kernel/bpf/btf.c      | 31 ++++++++++++++++----
 kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++++----------
 3 files changed, 83 insertions(+), 20 deletions(-)

Comments

Alexei Starovoitov April 26, 2022, 4:08 a.m. UTC | #1
On Thu, Apr 21, 2022 at 04:07:35PM +0200, Benjamin Tissoires wrote:
> When a kfunc is not returning a pointer to a struct but to a plain type,
> check if one of the arguments is called __sz and is a const from the
> caller, and use this as the size of the allocated memory.
> 
> For tracing programs, we consider the provided memory to be read only
> unless the program is BPF_MODIFY_RETURN.
> 
> Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
> 
> ---
> 
> new in v4
> ---
>  include/linux/btf.h   |  6 ++++
>  kernel/bpf/btf.c      | 31 ++++++++++++++++----
>  kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++++----------
>  3 files changed, 83 insertions(+), 20 deletions(-)
> 
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 36bc09b8e890..76a3ff48ae2a 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -332,6 +332,12 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
>  	return (struct btf_param *)(t + 1);
>  }
>  
> +struct bpf_reg_state;
> +
> +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> +			       const struct btf_param *arg,
> +			       const struct bpf_reg_state *reg);
> +
>  #ifdef CONFIG_BPF_SYSCALL
>  struct bpf_prog;
>  
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 76318a4c2d0e..22e6e3cdc7ee 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -5851,9 +5851,9 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
>  	return true;
>  }
>  
> -static bool is_kfunc_arg_mem_size(const struct btf *btf,
> -				  const struct btf_param *arg,
> -				  const struct bpf_reg_state *reg)
> +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> +			       const struct btf_param *arg,
> +			       const struct bpf_reg_state *reg)
>  {
>  	int len, sfx_len = sizeof("__sz") - 1;
>  	const struct btf_type *t;
> @@ -5976,7 +5976,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
>  				reg_btf = reg->btf;
>  				reg_ref_id = reg->btf_id;
>  				/* Ensure only one argument is referenced
> -				 * PTR_TO_BTF_ID, check_func_arg_reg_off relies
> +				 * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off relies
>  				 * on only one referenced register being allowed
>  				 * for kfuncs.
>  				 */
> @@ -6012,7 +6012,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
>  			u32 type_size;
>  
>  			if (is_kfunc) {
> -				bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
> +				bool arg_mem_size = i + 1 < nargs &&
> +						    btf_is_kfunc_arg_mem_size(btf,
> +									      &args[i + 1],
> +									      &regs[regno + 1]);

bpf allows ~100 chars. No need to break the line so much.

>  
>  				/* Permit pointer to mem, but only when argument
>  				 * type is pointer to scalar, or struct composed
> @@ -6039,6 +6042,24 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
>  					i++;
>  					continue;
>  				}
> +
> +				if (rel && reg->ref_obj_id) {
> +					/* Ensure only one argument is referenced
> +					 * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off
> +					 * relies on only one referenced register being allowed
> +					 * for kfuncs.
> +					 */
> +					if (ref_obj_id) {
> +						bpf_log(log,
> +							"verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
> +							regno,
> +							reg->ref_obj_id,
> +							ref_obj_id);
> +						return -EFAULT;
> +					}
> +					ref_regno = regno;
> +					ref_obj_id = reg->ref_obj_id;
> +				}
>  			}
>  
>  			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 71827d14724a..0f339f9058f3 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -6974,7 +6974,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  	int err, insn_idx = *insn_idx_p;
>  	const struct btf_param *args;
>  	struct btf *desc_btf;
> +	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
>  	bool acq;
> +	size_t reg_size = 0;
>  
>  	/* skip for now, but return error when we find this in fixup_kfunc_call */
>  	if (!insn->imm)
> @@ -7015,8 +7017,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		}
>  	}
>  
> -	for (i = 0; i < CALLER_SAVED_REGS; i++)
> -		mark_reg_not_init(env, regs, caller_saved[i]);
> +	/* reset REG_0 */
> +	mark_reg_not_init(env, regs, BPF_REG_0);
>  
>  	/* Check return type */
>  	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
> @@ -7026,6 +7028,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		return -EINVAL;
>  	}
>  
> +	nargs = btf_type_vlen(func_proto);
> +	args = btf_params(func_proto);
> +
>  	if (btf_type_is_scalar(t)) {
>  		mark_reg_unknown(env, regs, BPF_REG_0);
>  		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
> @@ -7033,24 +7038,54 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>  		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
>  						   &ptr_type_id);
>  		if (!btf_type_is_struct(ptr_type)) {
> -			ptr_type_name = btf_name_by_offset(desc_btf,
> -							   ptr_type->name_off);
> -			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
> -				func_name, btf_type_str(ptr_type),
> -				ptr_type_name);
> -			return -EINVAL;
> +			/* if we have an array, we must have a const argument named "__sz" */
> +			for (i = 0; i < nargs; i++) {
> +				u32 regno = i + BPF_REG_1;
> +				struct bpf_reg_state *reg = &regs[regno];
> +
> +				/* look for any const scalar parameter of name "__sz" */
> +				if (!check_reg_arg(env, regno, SRC_OP) &&
> +				    tnum_is_const(regs[regno].var_off) &&
> +				    btf_is_kfunc_arg_mem_size(desc_btf, &args[i], reg))
> +					reg_size = regs[regno].var_off.value;
> +			}
> +
> +			if (!reg_size) {
> +				ptr_type_name = btf_name_by_offset(desc_btf,
> +								   ptr_type->name_off);
> +				verbose(env,
> +					"kernel function %s returns pointer type %s %s is not supported\n",
> +					func_name,
> +					btf_type_str(ptr_type),
> +					ptr_type_name);
> +				return -EINVAL;
> +			}
> +
> +			mark_reg_known_zero(env, regs, BPF_REG_0);
> +			regs[BPF_REG_0].type = PTR_TO_MEM;
> +			regs[BPF_REG_0].mem_size = reg_size;
> +
> +			/* in case of tracing, only allow write access to
> +			 * BPF_MODIFY_RETURN programs
> +			 */
> +			if (prog_type == BPF_PROG_TYPE_TRACING &&
> +			    env->prog->expected_attach_type != BPF_MODIFY_RETURN)
> +				regs[BPF_REG_0].type |= MEM_RDONLY;

MOD_RET restriction looks artificial.
We can distinguish readonly vs writeable PTR_TO_MEM based on
another naming convention.
Currently arg_name__sz applies to the previous argument.
Matching suffix made sense there.
Reusing the same suffix matching for a different purpose could be confusing.
For this use case we may reserve a full argument name.
Like "rdonly_buf_size" and "rdwr_buf_size" ?
Benjamin Tissoires April 26, 2022, 7:30 a.m. UTC | #2
On Tue, Apr 26, 2022 at 6:09 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Thu, Apr 21, 2022 at 04:07:35PM +0200, Benjamin Tissoires wrote:
> > When a kfunc is not returning a pointer to a struct but to a plain type,
> > check if one of the arguments is called __sz and is a const from the
> > caller, and use this as the size of the allocated memory.
> >
> > For tracing programs, we consider the provided memory to be read only
> > unless the program is BPF_MODIFY_RETURN.
> >
> > Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
> >
> > ---
> >
> > new in v4
> > ---
> >  include/linux/btf.h   |  6 ++++
> >  kernel/bpf/btf.c      | 31 ++++++++++++++++----
> >  kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++++----------
> >  3 files changed, 83 insertions(+), 20 deletions(-)
> >
> > diff --git a/include/linux/btf.h b/include/linux/btf.h
> > index 36bc09b8e890..76a3ff48ae2a 100644
> > --- a/include/linux/btf.h
> > +++ b/include/linux/btf.h
> > @@ -332,6 +332,12 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
> >       return (struct btf_param *)(t + 1);
> >  }
> >
> > +struct bpf_reg_state;
> > +
> > +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> > +                            const struct btf_param *arg,
> > +                            const struct bpf_reg_state *reg);
> > +
> >  #ifdef CONFIG_BPF_SYSCALL
> >  struct bpf_prog;
> >
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index 76318a4c2d0e..22e6e3cdc7ee 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -5851,9 +5851,9 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
> >       return true;
> >  }
> >
> > -static bool is_kfunc_arg_mem_size(const struct btf *btf,
> > -                               const struct btf_param *arg,
> > -                               const struct bpf_reg_state *reg)
> > +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> > +                            const struct btf_param *arg,
> > +                            const struct bpf_reg_state *reg)
> >  {
> >       int len, sfx_len = sizeof("__sz") - 1;
> >       const struct btf_type *t;
> > @@ -5976,7 +5976,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> >                               reg_btf = reg->btf;
> >                               reg_ref_id = reg->btf_id;
> >                               /* Ensure only one argument is referenced
> > -                              * PTR_TO_BTF_ID, check_func_arg_reg_off relies
> > +                              * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off relies
> >                                * on only one referenced register being allowed
> >                                * for kfuncs.
> >                                */
> > @@ -6012,7 +6012,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> >                       u32 type_size;
> >
> >                       if (is_kfunc) {
> > -                             bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
> > +                             bool arg_mem_size = i + 1 < nargs &&
> > +                                                 btf_is_kfunc_arg_mem_size(btf,
> > +                                                                           &args[i + 1],
> > +                                                                           &regs[regno + 1]);
>
> bpf allows ~100 chars. No need to break the line so much.
>
> >
> >                               /* Permit pointer to mem, but only when argument
> >                                * type is pointer to scalar, or struct composed
> > @@ -6039,6 +6042,24 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> >                                       i++;
> >                                       continue;
> >                               }
> > +
> > +                             if (rel && reg->ref_obj_id) {
> > +                                     /* Ensure only one argument is referenced
> > +                                      * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off
> > +                                      * relies on only one referenced register being allowed
> > +                                      * for kfuncs.
> > +                                      */
> > +                                     if (ref_obj_id) {
> > +                                             bpf_log(log,
> > +                                                     "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
> > +                                                     regno,
> > +                                                     reg->ref_obj_id,
> > +                                                     ref_obj_id);
> > +                                             return -EFAULT;
> > +                                     }
> > +                                     ref_regno = regno;
> > +                                     ref_obj_id = reg->ref_obj_id;
> > +                             }
> >                       }
> >
> >                       resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 71827d14724a..0f339f9058f3 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -6974,7 +6974,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >       int err, insn_idx = *insn_idx_p;
> >       const struct btf_param *args;
> >       struct btf *desc_btf;
> > +     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
> >       bool acq;
> > +     size_t reg_size = 0;
> >
> >       /* skip for now, but return error when we find this in fixup_kfunc_call */
> >       if (!insn->imm)
> > @@ -7015,8 +7017,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >               }
> >       }
> >
> > -     for (i = 0; i < CALLER_SAVED_REGS; i++)
> > -             mark_reg_not_init(env, regs, caller_saved[i]);
> > +     /* reset REG_0 */
> > +     mark_reg_not_init(env, regs, BPF_REG_0);
> >
> >       /* Check return type */
> >       t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
> > @@ -7026,6 +7028,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >               return -EINVAL;
> >       }
> >
> > +     nargs = btf_type_vlen(func_proto);
> > +     args = btf_params(func_proto);
> > +
> >       if (btf_type_is_scalar(t)) {
> >               mark_reg_unknown(env, regs, BPF_REG_0);
> >               mark_btf_func_reg_size(env, BPF_REG_0, t->size);
> > @@ -7033,24 +7038,54 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> >               ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
> >                                                  &ptr_type_id);
> >               if (!btf_type_is_struct(ptr_type)) {
> > -                     ptr_type_name = btf_name_by_offset(desc_btf,
> > -                                                        ptr_type->name_off);
> > -                     verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
> > -                             func_name, btf_type_str(ptr_type),
> > -                             ptr_type_name);
> > -                     return -EINVAL;
> > +                     /* if we have an array, we must have a const argument named "__sz" */
> > +                     for (i = 0; i < nargs; i++) {
> > +                             u32 regno = i + BPF_REG_1;
> > +                             struct bpf_reg_state *reg = &regs[regno];
> > +
> > +                             /* look for any const scalar parameter of name "__sz" */
> > +                             if (!check_reg_arg(env, regno, SRC_OP) &&
> > +                                 tnum_is_const(regs[regno].var_off) &&
> > +                                 btf_is_kfunc_arg_mem_size(desc_btf, &args[i], reg))
> > +                                     reg_size = regs[regno].var_off.value;
> > +                     }
> > +
> > +                     if (!reg_size) {
> > +                             ptr_type_name = btf_name_by_offset(desc_btf,
> > +                                                                ptr_type->name_off);
> > +                             verbose(env,
> > +                                     "kernel function %s returns pointer type %s %s is not supported\n",
> > +                                     func_name,
> > +                                     btf_type_str(ptr_type),
> > +                                     ptr_type_name);
> > +                             return -EINVAL;
> > +                     }
> > +
> > +                     mark_reg_known_zero(env, regs, BPF_REG_0);
> > +                     regs[BPF_REG_0].type = PTR_TO_MEM;
> > +                     regs[BPF_REG_0].mem_size = reg_size;
> > +
> > +                     /* in case of tracing, only allow write access to
> > +                      * BPF_MODIFY_RETURN programs
> > +                      */
> > +                     if (prog_type == BPF_PROG_TYPE_TRACING &&
> > +                         env->prog->expected_attach_type != BPF_MODIFY_RETURN)
> > +                             regs[BPF_REG_0].type |= MEM_RDONLY;
>
> MOD_RET restriction looks artificial.
> We can distinguish readonly vs writeable PTR_TO_MEM based on
> another naming convention.
> Currently arg_name__sz applies to the previous argument.
> Matching suffix made sense there.

Oh, I missed the point of the "__sz". I did not realize it was
supposed to be a suffix.

> Reusing the same suffix matching for a different purpose could be confusing.
> For this use case we may reserve a full argument name.
> Like "rdonly_buf_size" and "rdwr_buf_size" ?
>

I like the idea but I have 2 problems here:
1. I do not really want to have 2 separate kfuncs for read only and
write operations
2. How can I restrict the write operation to fmod_ret?

For 1, my guess is that the read-only operation will not be used
unless we solve 2.
For 2, the rationale is that I think tracing functions are not
supposed to change the behavior. This was said on the thread about
priorities for BPF programs. And it somehow makes sense that fentry
should be used for tracing only. OTOH, fmod_ret is clearly affecting
the behavior of the program, so I see it more "natural" that it can
change the context too.

Cheers,
Benjamin
Alexei Starovoitov April 30, 2022, 3:25 a.m. UTC | #3
On Tue, Apr 26, 2022 at 12:30 AM Benjamin Tissoires
<benjamin.tissoires@redhat.com> wrote:
>
> On Tue, Apr 26, 2022 at 6:09 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Thu, Apr 21, 2022 at 04:07:35PM +0200, Benjamin Tissoires wrote:
> > > When a kfunc is not returning a pointer to a struct but to a plain type,
> > > check if one of the arguments is called __sz and is a const from the
> > > caller, and use this as the size of the allocated memory.
> > >
> > > For tracing programs, we consider the provided memory to be read only
> > > unless the program is BPF_MODIFY_RETURN.
> > >
> > > Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
> > >
> > > ---
> > >
> > > new in v4
> > > ---
> > >  include/linux/btf.h   |  6 ++++
> > >  kernel/bpf/btf.c      | 31 ++++++++++++++++----
> > >  kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++++----------
> > >  3 files changed, 83 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/include/linux/btf.h b/include/linux/btf.h
> > > index 36bc09b8e890..76a3ff48ae2a 100644
> > > --- a/include/linux/btf.h
> > > +++ b/include/linux/btf.h
> > > @@ -332,6 +332,12 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
> > >       return (struct btf_param *)(t + 1);
> > >  }
> > >
> > > +struct bpf_reg_state;
> > > +
> > > +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> > > +                            const struct btf_param *arg,
> > > +                            const struct bpf_reg_state *reg);
> > > +
> > >  #ifdef CONFIG_BPF_SYSCALL
> > >  struct bpf_prog;
> > >
> > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > index 76318a4c2d0e..22e6e3cdc7ee 100644
> > > --- a/kernel/bpf/btf.c
> > > +++ b/kernel/bpf/btf.c
> > > @@ -5851,9 +5851,9 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
> > >       return true;
> > >  }
> > >
> > > -static bool is_kfunc_arg_mem_size(const struct btf *btf,
> > > -                               const struct btf_param *arg,
> > > -                               const struct bpf_reg_state *reg)
> > > +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> > > +                            const struct btf_param *arg,
> > > +                            const struct bpf_reg_state *reg)
> > >  {
> > >       int len, sfx_len = sizeof("__sz") - 1;
> > >       const struct btf_type *t;
> > > @@ -5976,7 +5976,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> > >                               reg_btf = reg->btf;
> > >                               reg_ref_id = reg->btf_id;
> > >                               /* Ensure only one argument is referenced
> > > -                              * PTR_TO_BTF_ID, check_func_arg_reg_off relies
> > > +                              * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off relies
> > >                                * on only one referenced register being allowed
> > >                                * for kfuncs.
> > >                                */
> > > @@ -6012,7 +6012,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> > >                       u32 type_size;
> > >
> > >                       if (is_kfunc) {
> > > -                             bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
> > > +                             bool arg_mem_size = i + 1 < nargs &&
> > > +                                                 btf_is_kfunc_arg_mem_size(btf,
> > > +                                                                           &args[i + 1],
> > > +                                                                           &regs[regno + 1]);
> >
> > bpf allows ~100 chars. No need to break the line so much.
> >
> > >
> > >                               /* Permit pointer to mem, but only when argument
> > >                                * type is pointer to scalar, or struct composed
> > > @@ -6039,6 +6042,24 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> > >                                       i++;
> > >                                       continue;
> > >                               }
> > > +
> > > +                             if (rel && reg->ref_obj_id) {
> > > +                                     /* Ensure only one argument is referenced
> > > +                                      * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off
> > > +                                      * relies on only one referenced register being allowed
> > > +                                      * for kfuncs.
> > > +                                      */
> > > +                                     if (ref_obj_id) {
> > > +                                             bpf_log(log,
> > > +                                                     "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
> > > +                                                     regno,
> > > +                                                     reg->ref_obj_id,
> > > +                                                     ref_obj_id);
> > > +                                             return -EFAULT;
> > > +                                     }
> > > +                                     ref_regno = regno;
> > > +                                     ref_obj_id = reg->ref_obj_id;
> > > +                             }
> > >                       }
> > >
> > >                       resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 71827d14724a..0f339f9058f3 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -6974,7 +6974,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >       int err, insn_idx = *insn_idx_p;
> > >       const struct btf_param *args;
> > >       struct btf *desc_btf;
> > > +     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
> > >       bool acq;
> > > +     size_t reg_size = 0;
> > >
> > >       /* skip for now, but return error when we find this in fixup_kfunc_call */
> > >       if (!insn->imm)
> > > @@ -7015,8 +7017,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >               }
> > >       }
> > >
> > > -     for (i = 0; i < CALLER_SAVED_REGS; i++)
> > > -             mark_reg_not_init(env, regs, caller_saved[i]);
> > > +     /* reset REG_0 */
> > > +     mark_reg_not_init(env, regs, BPF_REG_0);
> > >
> > >       /* Check return type */
> > >       t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
> > > @@ -7026,6 +7028,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >               return -EINVAL;
> > >       }
> > >
> > > +     nargs = btf_type_vlen(func_proto);
> > > +     args = btf_params(func_proto);
> > > +
> > >       if (btf_type_is_scalar(t)) {
> > >               mark_reg_unknown(env, regs, BPF_REG_0);
> > >               mark_btf_func_reg_size(env, BPF_REG_0, t->size);
> > > @@ -7033,24 +7038,54 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > >               ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
> > >                                                  &ptr_type_id);
> > >               if (!btf_type_is_struct(ptr_type)) {
> > > -                     ptr_type_name = btf_name_by_offset(desc_btf,
> > > -                                                        ptr_type->name_off);
> > > -                     verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
> > > -                             func_name, btf_type_str(ptr_type),
> > > -                             ptr_type_name);
> > > -                     return -EINVAL;
> > > +                     /* if we have an array, we must have a const argument named "__sz" */
> > > +                     for (i = 0; i < nargs; i++) {
> > > +                             u32 regno = i + BPF_REG_1;
> > > +                             struct bpf_reg_state *reg = &regs[regno];
> > > +
> > > +                             /* look for any const scalar parameter of name "__sz" */
> > > +                             if (!check_reg_arg(env, regno, SRC_OP) &&
> > > +                                 tnum_is_const(regs[regno].var_off) &&
> > > +                                 btf_is_kfunc_arg_mem_size(desc_btf, &args[i], reg))
> > > +                                     reg_size = regs[regno].var_off.value;
> > > +                     }
> > > +
> > > +                     if (!reg_size) {
> > > +                             ptr_type_name = btf_name_by_offset(desc_btf,
> > > +                                                                ptr_type->name_off);
> > > +                             verbose(env,
> > > +                                     "kernel function %s returns pointer type %s %s is not supported\n",
> > > +                                     func_name,
> > > +                                     btf_type_str(ptr_type),
> > > +                                     ptr_type_name);
> > > +                             return -EINVAL;
> > > +                     }
> > > +
> > > +                     mark_reg_known_zero(env, regs, BPF_REG_0);
> > > +                     regs[BPF_REG_0].type = PTR_TO_MEM;
> > > +                     regs[BPF_REG_0].mem_size = reg_size;
> > > +
> > > +                     /* in case of tracing, only allow write access to
> > > +                      * BPF_MODIFY_RETURN programs
> > > +                      */
> > > +                     if (prog_type == BPF_PROG_TYPE_TRACING &&
> > > +                         env->prog->expected_attach_type != BPF_MODIFY_RETURN)
> > > +                             regs[BPF_REG_0].type |= MEM_RDONLY;
> >
> > MOD_RET restriction looks artificial.
> > We can distinguish readonly vs writeable PTR_TO_MEM based on
> > another naming convention.
> > Currently arg_name__sz applies to the previous argument.
> > Matching suffix made sense there.
>
> Oh, I missed the point of the "__sz". I did not realize it was
> supposed to be a suffix.
>
> > Reusing the same suffix matching for a different purpose could be confusing.
> > For this use case we may reserve a full argument name.
> > Like "rdonly_buf_size" and "rdwr_buf_size" ?
> >
>
> I like the idea but I have 2 problems here:
> 1. I do not really want to have 2 separate kfuncs for read only and
> write operations
> 2. How can I restrict the write operation to fmod_ret?
>
> For 1, my guess is that the read-only operation will not be used
> unless we solve 2.
> For 2, the rationale is that I think tracing functions are not
> supposed to change the behavior. This was said on the thread about
> priorities for BPF programs. And it somehow makes sense that fentry
> should be used for tracing only. OTOH, fmod_ret is clearly affecting
> the behavior of the program, so I see it more "natural" that it can
> change the context too.

Well, if we say that fentry is rdonly and fmod_ret is rdwr
then we probably shouldn't stop at return value.
If bpf prog can access the argument and this argument is an array
it should be writable.
We can allow different kfuncs for fentry and fmod_ret too.
They can be two tiny wrappers with different arg names (to distinguish
rdonly vs rdwr) on top of the single always_inline function
that returns a buffer.
Benjamin Tissoires April 30, 2022, 7:17 a.m. UTC | #4
On Sat, Apr 30, 2022 at 5:26 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Apr 26, 2022 at 12:30 AM Benjamin Tissoires
> <benjamin.tissoires@redhat.com> wrote:
> >
> > On Tue, Apr 26, 2022 at 6:09 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Thu, Apr 21, 2022 at 04:07:35PM +0200, Benjamin Tissoires wrote:
> > > > When a kfunc is not returning a pointer to a struct but to a plain type,
> > > > check if one of the arguments is called __sz and is a const from the
> > > > caller, and use this as the size of the allocated memory.
> > > >
> > > > For tracing programs, we consider the provided memory to be read only
> > > > unless the program is BPF_MODIFY_RETURN.
> > > >
> > > > Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
> > > >
> > > > ---
> > > >
> > > > new in v4
> > > > ---
> > > >  include/linux/btf.h   |  6 ++++
> > > >  kernel/bpf/btf.c      | 31 ++++++++++++++++----
> > > >  kernel/bpf/verifier.c | 66 +++++++++++++++++++++++++++++++++----------
> > > >  3 files changed, 83 insertions(+), 20 deletions(-)
> > > >
> > > > diff --git a/include/linux/btf.h b/include/linux/btf.h
> > > > index 36bc09b8e890..76a3ff48ae2a 100644
> > > > --- a/include/linux/btf.h
> > > > +++ b/include/linux/btf.h
> > > > @@ -332,6 +332,12 @@ static inline struct btf_param *btf_params(const struct btf_type *t)
> > > >       return (struct btf_param *)(t + 1);
> > > >  }
> > > >
> > > > +struct bpf_reg_state;
> > > > +
> > > > +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> > > > +                            const struct btf_param *arg,
> > > > +                            const struct bpf_reg_state *reg);
> > > > +
> > > >  #ifdef CONFIG_BPF_SYSCALL
> > > >  struct bpf_prog;
> > > >
> > > > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > > > index 76318a4c2d0e..22e6e3cdc7ee 100644
> > > > --- a/kernel/bpf/btf.c
> > > > +++ b/kernel/bpf/btf.c
> > > > @@ -5851,9 +5851,9 @@ static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
> > > >       return true;
> > > >  }
> > > >
> > > > -static bool is_kfunc_arg_mem_size(const struct btf *btf,
> > > > -                               const struct btf_param *arg,
> > > > -                               const struct bpf_reg_state *reg)
> > > > +bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
> > > > +                            const struct btf_param *arg,
> > > > +                            const struct bpf_reg_state *reg)
> > > >  {
> > > >       int len, sfx_len = sizeof("__sz") - 1;
> > > >       const struct btf_type *t;
> > > > @@ -5976,7 +5976,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> > > >                               reg_btf = reg->btf;
> > > >                               reg_ref_id = reg->btf_id;
> > > >                               /* Ensure only one argument is referenced
> > > > -                              * PTR_TO_BTF_ID, check_func_arg_reg_off relies
> > > > +                              * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off relies
> > > >                                * on only one referenced register being allowed
> > > >                                * for kfuncs.
> > > >                                */
> > > > @@ -6012,7 +6012,10 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> > > >                       u32 type_size;
> > > >
> > > >                       if (is_kfunc) {
> > > > -                             bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
> > > > +                             bool arg_mem_size = i + 1 < nargs &&
> > > > +                                                 btf_is_kfunc_arg_mem_size(btf,
> > > > +                                                                           &args[i + 1],
> > > > +                                                                           &regs[regno + 1]);
> > >
> > > bpf allows ~100 chars. No need to break the line so much.
> > >
> > > >
> > > >                               /* Permit pointer to mem, but only when argument
> > > >                                * type is pointer to scalar, or struct composed
> > > > @@ -6039,6 +6042,24 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,
> > > >                                       i++;
> > > >                                       continue;
> > > >                               }
> > > > +
> > > > +                             if (rel && reg->ref_obj_id) {
> > > > +                                     /* Ensure only one argument is referenced
> > > > +                                      * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off
> > > > +                                      * relies on only one referenced register being allowed
> > > > +                                      * for kfuncs.
> > > > +                                      */
> > > > +                                     if (ref_obj_id) {
> > > > +                                             bpf_log(log,
> > > > +                                                     "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
> > > > +                                                     regno,
> > > > +                                                     reg->ref_obj_id,
> > > > +                                                     ref_obj_id);
> > > > +                                             return -EFAULT;
> > > > +                                     }
> > > > +                                     ref_regno = regno;
> > > > +                                     ref_obj_id = reg->ref_obj_id;
> > > > +                             }
> > > >                       }
> > > >
> > > >                       resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
> > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > index 71827d14724a..0f339f9058f3 100644
> > > > --- a/kernel/bpf/verifier.c
> > > > +++ b/kernel/bpf/verifier.c
> > > > @@ -6974,7 +6974,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > > >       int err, insn_idx = *insn_idx_p;
> > > >       const struct btf_param *args;
> > > >       struct btf *desc_btf;
> > > > +     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
> > > >       bool acq;
> > > > +     size_t reg_size = 0;
> > > >
> > > >       /* skip for now, but return error when we find this in fixup_kfunc_call */
> > > >       if (!insn->imm)
> > > > @@ -7015,8 +7017,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > > >               }
> > > >       }
> > > >
> > > > -     for (i = 0; i < CALLER_SAVED_REGS; i++)
> > > > -             mark_reg_not_init(env, regs, caller_saved[i]);
> > > > +     /* reset REG_0 */
> > > > +     mark_reg_not_init(env, regs, BPF_REG_0);
> > > >
> > > >       /* Check return type */
> > > >       t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
> > > > @@ -7026,6 +7028,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > > >               return -EINVAL;
> > > >       }
> > > >
> > > > +     nargs = btf_type_vlen(func_proto);
> > > > +     args = btf_params(func_proto);
> > > > +
> > > >       if (btf_type_is_scalar(t)) {
> > > >               mark_reg_unknown(env, regs, BPF_REG_0);
> > > >               mark_btf_func_reg_size(env, BPF_REG_0, t->size);
> > > > @@ -7033,24 +7038,54 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> > > >               ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
> > > >                                                  &ptr_type_id);
> > > >               if (!btf_type_is_struct(ptr_type)) {
> > > > -                     ptr_type_name = btf_name_by_offset(desc_btf,
> > > > -                                                        ptr_type->name_off);
> > > > -                     verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
> > > > -                             func_name, btf_type_str(ptr_type),
> > > > -                             ptr_type_name);
> > > > -                     return -EINVAL;
> > > > +                     /* if we have an array, we must have a const argument named "__sz" */
> > > > +                     for (i = 0; i < nargs; i++) {
> > > > +                             u32 regno = i + BPF_REG_1;
> > > > +                             struct bpf_reg_state *reg = &regs[regno];
> > > > +
> > > > +                             /* look for any const scalar parameter of name "__sz" */
> > > > +                             if (!check_reg_arg(env, regno, SRC_OP) &&
> > > > +                                 tnum_is_const(regs[regno].var_off) &&
> > > > +                                 btf_is_kfunc_arg_mem_size(desc_btf, &args[i], reg))
> > > > +                                     reg_size = regs[regno].var_off.value;
> > > > +                     }
> > > > +
> > > > +                     if (!reg_size) {
> > > > +                             ptr_type_name = btf_name_by_offset(desc_btf,
> > > > +                                                                ptr_type->name_off);
> > > > +                             verbose(env,
> > > > +                                     "kernel function %s returns pointer type %s %s is not supported\n",
> > > > +                                     func_name,
> > > > +                                     btf_type_str(ptr_type),
> > > > +                                     ptr_type_name);
> > > > +                             return -EINVAL;
> > > > +                     }
> > > > +
> > > > +                     mark_reg_known_zero(env, regs, BPF_REG_0);
> > > > +                     regs[BPF_REG_0].type = PTR_TO_MEM;
> > > > +                     regs[BPF_REG_0].mem_size = reg_size;
> > > > +
> > > > +                     /* in case of tracing, only allow write access to
> > > > +                      * BPF_MODIFY_RETURN programs
> > > > +                      */
> > > > +                     if (prog_type == BPF_PROG_TYPE_TRACING &&
> > > > +                         env->prog->expected_attach_type != BPF_MODIFY_RETURN)
> > > > +                             regs[BPF_REG_0].type |= MEM_RDONLY;
> > >
> > > MOD_RET restriction looks artificial.
> > > We can distinguish readonly vs writeable PTR_TO_MEM based on
> > > another naming convention.
> > > Currently arg_name__sz applies to the previous argument.
> > > Matching suffix made sense there.
> >
> > Oh, I missed the point of the "__sz". I did not realize it was
> > supposed to be a suffix.
> >
> > > Reusing the same suffix matching for a different purpose could be confusing.
> > > For this use case we may reserve a full argument name.
> > > Like "rdonly_buf_size" and "rdwr_buf_size" ?
> > >
> >
> > I like the idea but I have 2 problems here:
> > 1. I do not really want to have 2 separate kfuncs for read only and
> > write operations
> > 2. How can I restrict the write operation to fmod_ret?
> >
> > For 1, my guess is that the read-only operation will not be used
> > unless we solve 2.
> > For 2, the rationale is that I think tracing functions are not
> > supposed to change the behavior. This was said on the thread about
> > priorities for BPF programs. And it somehow makes sense that fentry
> > should be used for tracing only. OTOH, fmod_ret is clearly affecting
> > the behavior of the program, so I see it more "natural" that it can
> > change the context too.
>
> Well, if we say that fentry is rdonly and fmod_ret is rdwr
> then we probably shouldn't stop at return value.

Yeah, it makes sense, but it will be a slightly bigger effort.

> If bpf prog can access the argument and this argument is an array
> it should be writable.
> We can allow different kfuncs for fentry and fmod_ret too.

Good idea. No idea if this will be easy to implement though :)

But after the prototype I describe in 0/7 I don't really need to
enforce if read or write is based on fentry/fmod_ret.
So I think let's not over engineer this, and stick to your initial
suggestion of the naming system which will be largely enough for my
use case.

> They can be two tiny wrappers with different arg names (to distinguish
> rdonly vs rdwr) on top of the single always_inline function
> that returns a buffer.
>

Cheers,
Benjamin
diff mbox series

Patch

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 36bc09b8e890..76a3ff48ae2a 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -332,6 +332,12 @@  static inline struct btf_param *btf_params(const struct btf_type *t)
 	return (struct btf_param *)(t + 1);
 }
 
+struct bpf_reg_state;
+
+bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
+			       const struct btf_param *arg,
+			       const struct bpf_reg_state *reg);
+
 #ifdef CONFIG_BPF_SYSCALL
 struct bpf_prog;
 
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 76318a4c2d0e..22e6e3cdc7ee 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5851,9 +5851,9 @@  static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
 	return true;
 }
 
-static bool is_kfunc_arg_mem_size(const struct btf *btf,
-				  const struct btf_param *arg,
-				  const struct bpf_reg_state *reg)
+bool btf_is_kfunc_arg_mem_size(const struct btf *btf,
+			       const struct btf_param *arg,
+			       const struct bpf_reg_state *reg)
 {
 	int len, sfx_len = sizeof("__sz") - 1;
 	const struct btf_type *t;
@@ -5976,7 +5976,7 @@  static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 				reg_btf = reg->btf;
 				reg_ref_id = reg->btf_id;
 				/* Ensure only one argument is referenced
-				 * PTR_TO_BTF_ID, check_func_arg_reg_off relies
+				 * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off relies
 				 * on only one referenced register being allowed
 				 * for kfuncs.
 				 */
@@ -6012,7 +6012,10 @@  static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 			u32 type_size;
 
 			if (is_kfunc) {
-				bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]);
+				bool arg_mem_size = i + 1 < nargs &&
+						    btf_is_kfunc_arg_mem_size(btf,
+									      &args[i + 1],
+									      &regs[regno + 1]);
 
 				/* Permit pointer to mem, but only when argument
 				 * type is pointer to scalar, or struct composed
@@ -6039,6 +6042,24 @@  static int btf_check_func_arg_match(struct bpf_verifier_env *env,
 					i++;
 					continue;
 				}
+
+				if (rel && reg->ref_obj_id) {
+					/* Ensure only one argument is referenced
+					 * PTR_TO_BTF_ID or PTR_TO_MEM, check_func_arg_reg_off
+					 * relies on only one referenced register being allowed
+					 * for kfuncs.
+					 */
+					if (ref_obj_id) {
+						bpf_log(log,
+							"verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+							regno,
+							reg->ref_obj_id,
+							ref_obj_id);
+						return -EFAULT;
+					}
+					ref_regno = regno;
+					ref_obj_id = reg->ref_obj_id;
+				}
 			}
 
 			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 71827d14724a..0f339f9058f3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6974,7 +6974,9 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	int err, insn_idx = *insn_idx_p;
 	const struct btf_param *args;
 	struct btf *desc_btf;
+	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 	bool acq;
+	size_t reg_size = 0;
 
 	/* skip for now, but return error when we find this in fixup_kfunc_call */
 	if (!insn->imm)
@@ -7015,8 +7017,8 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	}
 
-	for (i = 0; i < CALLER_SAVED_REGS; i++)
-		mark_reg_not_init(env, regs, caller_saved[i]);
+	/* reset REG_0 */
+	mark_reg_not_init(env, regs, BPF_REG_0);
 
 	/* Check return type */
 	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
@@ -7026,6 +7028,9 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		return -EINVAL;
 	}
 
+	nargs = btf_type_vlen(func_proto);
+	args = btf_params(func_proto);
+
 	if (btf_type_is_scalar(t)) {
 		mark_reg_unknown(env, regs, BPF_REG_0);
 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
@@ -7033,24 +7038,54 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
 						   &ptr_type_id);
 		if (!btf_type_is_struct(ptr_type)) {
-			ptr_type_name = btf_name_by_offset(desc_btf,
-							   ptr_type->name_off);
-			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
-				func_name, btf_type_str(ptr_type),
-				ptr_type_name);
-			return -EINVAL;
+			/* if we have an array, we must have a const argument named "__sz" */
+			for (i = 0; i < nargs; i++) {
+				u32 regno = i + BPF_REG_1;
+				struct bpf_reg_state *reg = &regs[regno];
+
+				/* look for any const scalar parameter of name "__sz" */
+				if (!check_reg_arg(env, regno, SRC_OP) &&
+				    tnum_is_const(regs[regno].var_off) &&
+				    btf_is_kfunc_arg_mem_size(desc_btf, &args[i], reg))
+					reg_size = regs[regno].var_off.value;
+			}
+
+			if (!reg_size) {
+				ptr_type_name = btf_name_by_offset(desc_btf,
+								   ptr_type->name_off);
+				verbose(env,
+					"kernel function %s returns pointer type %s %s is not supported\n",
+					func_name,
+					btf_type_str(ptr_type),
+					ptr_type_name);
+				return -EINVAL;
+			}
+
+			mark_reg_known_zero(env, regs, BPF_REG_0);
+			regs[BPF_REG_0].type = PTR_TO_MEM;
+			regs[BPF_REG_0].mem_size = reg_size;
+
+			/* in case of tracing, only allow write access to
+			 * BPF_MODIFY_RETURN programs
+			 */
+			if (prog_type == BPF_PROG_TYPE_TRACING &&
+			    env->prog->expected_attach_type != BPF_MODIFY_RETURN)
+				regs[BPF_REG_0].type |= MEM_RDONLY;
+		} else {
+			mark_reg_known_zero(env, regs, BPF_REG_0);
+			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
+			regs[BPF_REG_0].btf = desc_btf;
+			regs[BPF_REG_0].btf_id = ptr_type_id;
+			mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
 		}
-		mark_reg_known_zero(env, regs, BPF_REG_0);
-		regs[BPF_REG_0].btf = desc_btf;
-		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
-		regs[BPF_REG_0].btf_id = ptr_type_id;
+
 		if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
 					      BTF_KFUNC_TYPE_RET_NULL, func_id)) {
 			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
 			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
 			regs[BPF_REG_0].id = ++env->id_gen;
 		}
-		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+
 		if (acq) {
 			int id = acquire_reference_state(env, insn_idx);
 
@@ -7061,8 +7096,9 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		}
 	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
 
-	nargs = btf_type_vlen(func_proto);
-	args = (const struct btf_param *)(func_proto + 1);
+	for (i = 1 ; i < CALLER_SAVED_REGS; i++)
+		mark_reg_not_init(env, regs, caller_saved[i]);
+
 	for (i = 0; i < nargs; i++) {
 		u32 regno = i + 1;