diff mbox series

[v2,bpf-next,3/6] bpf: add fd_array_cnt attribute for prog_load

Message ID 20241119101552.505650-4-aspsk@isovalent.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add fd_array_cnt attribute for BPF_PROG_LOAD | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 202 this patch: 202
netdev/build_tools success Errors and warnings before: 1 (+0) this patch: 1 (+0)
netdev/cc_maintainers fail 12 maintainers not CCed: kpsingh@kernel.org andrii@kernel.org jolsa@kernel.org eddyz87@gmail.com ast@kernel.org song@kernel.org haoluo@google.com john.fastabend@gmail.com daniel@iogearbox.net yonghong.song@linux.dev martin.lau@linux.dev sdf@fomichev.me
netdev/build_clang success Errors and warnings before: 252 this patch: 252
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6969 this patch: 6969
netdev/checkpatch warning WARNING: Missing a blank line after declarations WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

Commit Message

Anton Protopopov Nov. 19, 2024, 10:15 a.m. UTC
The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
of file descriptors: maps or btfs. This field was introduced as a
sparse array. Introduce a new attribute, fd_array_cnt, which, if
present, indicates that the fd_array is a continuous array of the
corresponding length.

If fd_array_cnt is non-zero, then every map in the fd_array will be
bound to the program, as if it was used by the program. This
functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
maps can be used by the verifier during the program load.

Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
---
 include/uapi/linux/bpf.h       |  10 ++++
 kernel/bpf/syscall.c           |   2 +-
 kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
 tools/include/uapi/linux/bpf.h |  10 ++++
 4 files changed, 113 insertions(+), 15 deletions(-)

Comments

Alexei Starovoitov Nov. 26, 2024, 1:38 a.m. UTC | #1
On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> of file descriptors: maps or btfs. This field was introduced as a
> sparse array. Introduce a new attribute, fd_array_cnt, which, if
> present, indicates that the fd_array is a continuous array of the
> corresponding length.
>
> If fd_array_cnt is non-zero, then every map in the fd_array will be
> bound to the program, as if it was used by the program. This
> functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> maps can be used by the verifier during the program load.
>
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> ---
>  include/uapi/linux/bpf.h       |  10 ++++
>  kernel/bpf/syscall.c           |   2 +-
>  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
>  tools/include/uapi/linux/bpf.h |  10 ++++
>  4 files changed, 113 insertions(+), 15 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4162afc6b5d0..2acf9b336371 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1573,6 +1573,16 @@ union bpf_attr {
>                  * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
>                  */
>                 __s32           prog_token_fd;
> +               /* The fd_array_cnt can be used to pass the length of the
> +                * fd_array array. In this case all the [map] file descriptors
> +                * passed in this array will be bound to the program, even if
> +                * the maps are not referenced directly. The functionality is
> +                * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> +                * used by the verifier during the program load. If provided,
> +                * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> +                * continuous.
> +                */
> +               __u32           fd_array_cnt;
>         };
>
>         struct { /* anonymous struct used by BPF_OBJ_* commands */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 58190ca724a2..7e3fbc23c742 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2729,7 +2729,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
>  }
>
>  /* last field in 'union bpf_attr' used by this command */
> -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
> +#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
>
>  static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
>  {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8e034a22aa2a..a84ba93c0036 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -19181,22 +19181,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
>         return 0;
>  }
>
> -/* Add map behind fd to used maps list, if it's not already there, and return
> - * its index.
> - * Returns <0 on error, or >= 0 index, on success.
> - */
> -static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
> +static int add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)

_from_fd suffix is imo too verbose.
Let's use __add_used_map() here instead?

>  {
> -       CLASS(fd, f)(fd);
> -       struct bpf_map *map;
>         int i, err;
>
> -       map = __bpf_map_get(f);
> -       if (IS_ERR(map)) {
> -               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
> -               return PTR_ERR(map);
> -       }
> -
>         /* check whether we recorded this map already */
>         for (i = 0; i < env->used_map_cnt; i++)
>                 if (env->used_maps[i] == map)
> @@ -19227,6 +19215,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
>         return env->used_map_cnt - 1;
>  }
>
> +/* Add map behind fd to used maps list, if it's not already there, and return
> + * its index.
> + * Returns <0 on error, or >= 0 index, on success.
> + */
> +static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)

and keep this one as add_used_map() ?

> +{
> +       struct bpf_map *map;
> +       CLASS(fd, f)(fd);
> +
> +       map = __bpf_map_get(f);
> +       if (IS_ERR(map)) {
> +               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
> +               return PTR_ERR(map);
> +       }
> +
> +       return add_used_map(env, map);
> +}
> +
>  /* find and rewrite pseudo imm in ld_imm64 instructions:
>   *
>   * 1. if it accesses map FD, replace it with actual map pointer.
> @@ -22526,6 +22532,75 @@ struct btf *bpf_get_btf_vmlinux(void)
>         return btf_vmlinux;
>  }
>
> +/*
> + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> + * this case expect that every file descriptor in the array is either a map or
> + * a BTF, or a hole (0). Everything else is considered to be trash.
> + */
> +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> +{
> +       struct bpf_map *map;
> +       CLASS(fd, f)(fd);
> +       int ret;
> +
> +       map = __bpf_map_get(f);
> +       if (!IS_ERR(map)) {
> +               ret = add_used_map(env, map);
> +               if (ret < 0)
> +                       return ret;
> +               return 0;
> +       }
> +
> +       if (!IS_ERR(__btf_get_by_fd(f)))
> +               return 0;
> +
> +       if (!fd)
> +               return 0;

This is not allowed in new apis.
zero cannot be special.

> +
> +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> +       return PTR_ERR(map);
> +}
> +
> +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)

What an odd name... why is 'env_' there?

> +{
> +       int size = sizeof(int) * attr->fd_array_cnt;

int overflow.

> +       int *copy;
> +       int ret;
> +       int i;
> +
> +       if (attr->fd_array_cnt >= MAX_USED_MAPS)
> +               return -E2BIG;
> +
> +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +
> +       /*
> +        * The only difference between old (no fd_array_cnt is given) and new
> +        * APIs is that in the latter case the fd_array is expected to be
> +        * continuous and is scanned for map fds right away
> +        */
> +       if (!size)
> +               return 0;
> +
> +       copy = kzalloc(size, GFP_KERNEL);

the slab will WARN with dmesg splat on large sizes.

> +       if (!copy)
> +               return -ENOMEM;
> +
> +       if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
> +               ret = -EFAULT;
> +               goto free_copy;
> +       }
> +
> +       for (i = 0; i < attr->fd_array_cnt; i++) {
> +               ret = add_fd_from_fd_array(env, copy[i]);
> +               if (ret)
> +                       goto free_copy;
> +       }

I don't get this feature.
Why bother copying and checking for validity?
What does it buy ?

pw-bot: cr
Hou Tao Nov. 26, 2024, 2:11 a.m. UTC | #2
Hi,

On 11/19/2024 6:15 PM, Anton Protopopov wrote:
> The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> of file descriptors: maps or btfs. This field was introduced as a
> sparse array. Introduce a new attribute, fd_array_cnt, which, if
> present, indicates that the fd_array is a continuous array of the
> corresponding length.
>
> If fd_array_cnt is non-zero, then every map in the fd_array will be
> bound to the program, as if it was used by the program. This
> functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> maps can be used by the verifier during the program load.
>
> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> ---
>  include/uapi/linux/bpf.h       |  10 ++++
>  kernel/bpf/syscall.c           |   2 +-
>  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
>  tools/include/uapi/linux/bpf.h |  10 ++++
>  4 files changed, 113 insertions(+), 15 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4162afc6b5d0..2acf9b336371 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1573,6 +1573,16 @@ union bpf_attr {
>  		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
>  		 */
>  		__s32		prog_token_fd;
> +		/* The fd_array_cnt can be used to pass the length of the
> +		 * fd_array array. In this case all the [map] file descriptors
> +		 * passed in this array will be bound to the program, even if
> +		 * the maps are not referenced directly. The functionality is
> +		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> +		 * used by the verifier during the program load. If provided,
> +		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> +		 * continuous.
> +		 */
> +		__u32		fd_array_cnt;
>  	};
>  
>  	struct { /* anonymous struct used by BPF_OBJ_* commands */

SNIP
> +/*
> + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> + * this case expect that every file descriptor in the array is either a map or
> + * a BTF, or a hole (0). Everything else is considered to be trash.
> + */
> +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> +{
> +	struct bpf_map *map;
> +	CLASS(fd, f)(fd);
> +	int ret;
> +
> +	map = __bpf_map_get(f);
> +	if (!IS_ERR(map)) {
> +		ret = add_used_map(env, map);
> +		if (ret < 0)
> +			return ret;
> +		return 0;
> +	}
> +
> +	if (!IS_ERR(__btf_get_by_fd(f)))
> +		return 0;

For fd_array_cnt > 0 case, does it need to handle BTF fd case ? If it
does, these returned BTFs should be saved in somewhere, otherewise,
these BTFs will be leaked.
> +
> +	if (!fd)
> +		return 0;
> +
> +	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> +	return PTR_ERR(map);
> +}
> +
> +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> +{
> +	int size = sizeof(int) * attr->fd_array_cnt;
> +	int *copy;
> +	int ret;
> +	int i;
> +
> +	if (attr->fd_array_cnt >= MAX_USED_MAPS)
> +		return -E2BIG;
> +
> +	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +
> +	/*
> +	 * The only difference between old (no fd_array_cnt is given) and new
> +	 * APIs is that in the latter case the fd_array is expected to be
> +	 * continuous and is scanned for map fds right away
> +	 */
> +	if (!size)
> +		return 0;
> +
> +	copy = kzalloc(size, GFP_KERNEL);
> +	if (!copy)
> +		return -ENOMEM;
> +
> +	if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
> +		ret = -EFAULT;
> +		goto free_copy;
> +	}

It is better to use kvmemdup_bpfptr() instead.
> +
> +	for (i = 0; i < attr->fd_array_cnt; i++) {
> +		ret = add_fd_from_fd_array(env, copy[i]);
> +		if (ret)
> +			goto free_copy;
> +	}
> +
> +free_copy:
> +	kfree(copy);
> +	return ret;
> +}
> +
>  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
>  {
>  	u64 start_time = ktime_get_ns();
> @@ -22557,7 +22632,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>  		env->insn_aux_data[i].orig_idx = i;
>  	env->prog = *prog;
>  	env->ops = bpf_verifier_ops[env->prog->type];
> -	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +	ret = env_init_fd_array(env, attr, uattr);
> +	if (ret)
> +		goto err_free_aux_data;

These maps saved in env->used_map will also be leaked.
>  
>  	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
>  	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
> @@ -22775,6 +22852,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>  err_unlock:
>  	if (!is_priv)
>  		mutex_unlock(&bpf_verifier_lock);
> +err_free_aux_data:
>  	vfree(env->insn_aux_data);
>  	kvfree(env->insn_hist);
>  err_free_env:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 4162afc6b5d0..2acf9b336371 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1573,6 +1573,16 @@ union bpf_attr {
>  		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
>  		 */
>  		__s32		prog_token_fd;
> +		/* The fd_array_cnt can be used to pass the length of the
> +		 * fd_array array. In this case all the [map] file descriptors
> +		 * passed in this array will be bound to the program, even if
> +		 * the maps are not referenced directly. The functionality is
> +		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> +		 * used by the verifier during the program load. If provided,
> +		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> +		 * continuous.
> +		 */
> +		__u32		fd_array_cnt;
>  	};
>  
>  	struct { /* anonymous struct used by BPF_OBJ_* commands */
Anton Protopopov Nov. 26, 2024, 5:05 p.m. UTC | #3
On 24/11/25 05:38PM, Alexei Starovoitov wrote:
> On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > of file descriptors: maps or btfs. This field was introduced as a
> > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > present, indicates that the fd_array is a continuous array of the
> > corresponding length.
> >
> > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > bound to the program, as if it was used by the program. This
> > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > maps can be used by the verifier during the program load.
> >
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > ---
> >  include/uapi/linux/bpf.h       |  10 ++++
> >  kernel/bpf/syscall.c           |   2 +-
> >  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
> >  tools/include/uapi/linux/bpf.h |  10 ++++
> >  4 files changed, 113 insertions(+), 15 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4162afc6b5d0..2acf9b336371 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -1573,6 +1573,16 @@ union bpf_attr {
> >                  * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
> >                  */
> >                 __s32           prog_token_fd;
> > +               /* The fd_array_cnt can be used to pass the length of the
> > +                * fd_array array. In this case all the [map] file descriptors
> > +                * passed in this array will be bound to the program, even if
> > +                * the maps are not referenced directly. The functionality is
> > +                * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> > +                * used by the verifier during the program load. If provided,
> > +                * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> > +                * continuous.
> > +                */
> > +               __u32           fd_array_cnt;
> >         };
> >
> >         struct { /* anonymous struct used by BPF_OBJ_* commands */
> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 58190ca724a2..7e3fbc23c742 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -2729,7 +2729,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
> >  }
> >
> >  /* last field in 'union bpf_attr' used by this command */
> > -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
> > +#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
> >
> >  static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
> >  {
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 8e034a22aa2a..a84ba93c0036 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -19181,22 +19181,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
> >         return 0;
> >  }
> >
> > -/* Add map behind fd to used maps list, if it's not already there, and return
> > - * its index.
> > - * Returns <0 on error, or >= 0 index, on success.
> > - */
> > -static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
> > +static int add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
> 
> _from_fd suffix is imo too verbose.
> Let's use __add_used_map() here instead?

Will do.

> >  {
> > -       CLASS(fd, f)(fd);
> > -       struct bpf_map *map;
> >         int i, err;
> >
> > -       map = __bpf_map_get(f);
> > -       if (IS_ERR(map)) {
> > -               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
> > -               return PTR_ERR(map);
> > -       }
> > -
> >         /* check whether we recorded this map already */
> >         for (i = 0; i < env->used_map_cnt; i++)
> >                 if (env->used_maps[i] == map)
> > @@ -19227,6 +19215,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
> >         return env->used_map_cnt - 1;
> >  }
> >
> > +/* Add map behind fd to used maps list, if it's not already there, and return
> > + * its index.
> > + * Returns <0 on error, or >= 0 index, on success.
> > + */
> > +static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
> 
> and keep this one as add_used_map() ?

Will do.

> > +{
> > +       struct bpf_map *map;
> > +       CLASS(fd, f)(fd);
> > +
> > +       map = __bpf_map_get(f);
> > +       if (IS_ERR(map)) {
> > +               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
> > +               return PTR_ERR(map);
> > +       }
> > +
> > +       return add_used_map(env, map);
> > +}
> > +
> >  /* find and rewrite pseudo imm in ld_imm64 instructions:
> >   *
> >   * 1. if it accesses map FD, replace it with actual map pointer.
> > @@ -22526,6 +22532,75 @@ struct btf *bpf_get_btf_vmlinux(void)
> >         return btf_vmlinux;
> >  }
> >
> > +/*
> > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> > + * this case expect that every file descriptor in the array is either a map or
> > + * a BTF, or a hole (0). Everything else is considered to be trash.
> > + */
> > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > +{
> > +       struct bpf_map *map;
> > +       CLASS(fd, f)(fd);
> > +       int ret;
> > +
> > +       map = __bpf_map_get(f);
> > +       if (!IS_ERR(map)) {
> > +               ret = add_used_map(env, map);
> > +               if (ret < 0)
> > +                       return ret;
> > +               return 0;
> > +       }
> > +
> > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > +               return 0;
> > +
> > +       if (!fd)
> > +               return 0;
> 
> This is not allowed in new apis.
> zero cannot be special.

I thought that this is ok since I check for all possible valid FDs by this
point: if fd=0 is a valid map or btf, then we will return it by this point.

Why I wanted to keep 0 as a valid value, even if it is not pointing to any
map/btf is for case when, like in libbpf gen, fd_array is populated with map
fds starting from 0, and with btf fds from some offset, so in between there may
be 0s. This is probably better to disallow this, and, if fd_array_cnt != 0,
then to check if all [0...fd_array_cnt) elements are valid.

> > +
> > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > +       return PTR_ERR(map);
> > +}
> > +
> > +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> 
> What an odd name... why is 'env_' there?

will remove

> 
> > +{
> > +       int size = sizeof(int) * attr->fd_array_cnt;
> 
> int overflow.

Thanks, will fix.

> > +       int *copy;
> > +       int ret;
> > +       int i;
> > +
> > +       if (attr->fd_array_cnt >= MAX_USED_MAPS)
> > +               return -E2BIG;

(This actually should be MAX_USED_MAPS + MAX_USED_BTFS)

> > +
> > +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > +
> > +       /*
> > +        * The only difference between old (no fd_array_cnt is given) and new
> > +        * APIs is that in the latter case the fd_array is expected to be
> > +        * continuous and is scanned for map fds right away
> > +        */
> > +       if (!size)
> > +               return 0;
> > +
> > +       copy = kzalloc(size, GFP_KERNEL);
> 
> the slab will WARN with dmesg splat on large sizes.

The size would be actually smaller than 4*(MAX_USED_MAPS + MAX_USED_BTFS),
as we check this above.

> 
> > +       if (!copy)
> > +               return -ENOMEM;
> > +
> > +       if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
> > +               ret = -EFAULT;
> > +               goto free_copy;
> > +       }
> > +
> > +       for (i = 0; i < attr->fd_array_cnt; i++) {
> > +               ret = add_fd_from_fd_array(env, copy[i]);
> > +               if (ret)
> > +                       goto free_copy;
> > +       }
> 
> I don't get this feature.
> Why bother copying and checking for validity?
> What does it buy ?

So, the main reason for this whole change is to allow unrelated maps, which
aren't referenced by a program directly, to be noticed and available during the
verification. Thus, I want to go through the array here and add them to
used_maps. (In a consequent patch, "instuction sets" maps are treated
additionally at this point as well.)

The reason to discard that copy here was that "old api" when fd_array_cnt is 0
is still valid and in this case we can't copy fd_array in full. Later during
the verification fd_array elements are accessed individually by copying from
bpfptr. I thought that freeing this copy here is more readable than to add
a check at every such occasion.

> pw-bot: cr
Andrii Nakryiko Nov. 26, 2024, 6:51 p.m. UTC | #4
On Tue, Nov 26, 2024 at 9:27 AM Anton Protopopov <aspsk@isovalent.com> wrote:
>
> On 24/11/25 05:38PM, Alexei Starovoitov wrote:
> > On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > of file descriptors: maps or btfs. This field was introduced as a
> > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > present, indicates that the fd_array is a continuous array of the
> > > corresponding length.
> > >
> > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > bound to the program, as if it was used by the program. This
> > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > maps can be used by the verifier during the program load.
> > >
> > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > ---
> > >  include/uapi/linux/bpf.h       |  10 ++++
> > >  kernel/bpf/syscall.c           |   2 +-
> > >  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
> > >  tools/include/uapi/linux/bpf.h |  10 ++++
> > >  4 files changed, 113 insertions(+), 15 deletions(-)
> > >

[...]

> > > +/*
> > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> > > + * this case expect that every file descriptor in the array is either a map or
> > > + * a BTF, or a hole (0). Everything else is considered to be trash.
> > > + */
> > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > +{
> > > +       struct bpf_map *map;
> > > +       CLASS(fd, f)(fd);
> > > +       int ret;
> > > +
> > > +       map = __bpf_map_get(f);
> > > +       if (!IS_ERR(map)) {
> > > +               ret = add_used_map(env, map);
> > > +               if (ret < 0)
> > > +                       return ret;
> > > +               return 0;
> > > +       }
> > > +
> > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > +               return 0;
> > > +
> > > +       if (!fd)
> > > +               return 0;
> >
> > This is not allowed in new apis.
> > zero cannot be special.
>
> I thought that this is ok since I check for all possible valid FDs by this
> point: if fd=0 is a valid map or btf, then we will return it by this point.
>
> Why I wanted to keep 0 as a valid value, even if it is not pointing to any
> map/btf is for case when, like in libbpf gen, fd_array is populated with map
> fds starting from 0, and with btf fds from some offset, so in between there may
> be 0s. This is probably better to disallow this, and, if fd_array_cnt != 0,
> then to check if all [0...fd_array_cnt) elements are valid.

If fd_array_cnt != 0 we can define that fd_array isn't sparse anymore
and every entry has to be valid. Let's do that.

>
> > > +
> > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > +       return PTR_ERR(map);
> > > +}
> > > +
> > > +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> >
> > What an odd name... why is 'env_' there?
>

[...]

> > I don't get this feature.
> > Why bother copying and checking for validity?
> > What does it buy ?
>
> So, the main reason for this whole change is to allow unrelated maps, which
> aren't referenced by a program directly, to be noticed and available during the
> verification. Thus, I want to go through the array here and add them to
> used_maps. (In a consequent patch, "instuction sets" maps are treated
> additionally at this point as well.)
>
> The reason to discard that copy here was that "old api" when fd_array_cnt is 0
> is still valid and in this case we can't copy fd_array in full. Later during
> the verification fd_array elements are accessed individually by copying from
> bpfptr. I thought that freeing this copy here is more readable than to add
> a check at every such occasion.

I think Alexei meant why you need to make an entire copy of fd_array,
if you can just read one element at a time (still with
copy_from_bpfptr_offset()), validate/add map/btf from that FD, and
move to the next element. No need to make a copy of an array.

>
> > pw-bot: cr
>
Alexei Starovoitov Nov. 26, 2024, 8:40 p.m. UTC | #5
On Tue, Nov 26, 2024 at 10:51 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Nov 26, 2024 at 9:27 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On 24/11/25 05:38PM, Alexei Starovoitov wrote:
> > > On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > present, indicates that the fd_array is a continuous array of the
> > > > corresponding length.
> > > >
> > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > bound to the program, as if it was used by the program. This
> > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > maps can be used by the verifier during the program load.
> > > >
> > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > ---
> > > >  include/uapi/linux/bpf.h       |  10 ++++
> > > >  kernel/bpf/syscall.c           |   2 +-
> > > >  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
> > > >  tools/include/uapi/linux/bpf.h |  10 ++++
> > > >  4 files changed, 113 insertions(+), 15 deletions(-)
> > > >
>
> [...]
>
> > > > +/*
> > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> > > > + * this case expect that every file descriptor in the array is either a map or
> > > > + * a BTF, or a hole (0). Everything else is considered to be trash.
> > > > + */
> > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > +{
> > > > +       struct bpf_map *map;
> > > > +       CLASS(fd, f)(fd);
> > > > +       int ret;
> > > > +
> > > > +       map = __bpf_map_get(f);
> > > > +       if (!IS_ERR(map)) {
> > > > +               ret = add_used_map(env, map);
> > > > +               if (ret < 0)
> > > > +                       return ret;
> > > > +               return 0;
> > > > +       }
> > > > +
> > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > +               return 0;
> > > > +
> > > > +       if (!fd)
> > > > +               return 0;
> > >
> > > This is not allowed in new apis.
> > > zero cannot be special.
> >
> > I thought that this is ok since I check for all possible valid FDs by this
> > point: if fd=0 is a valid map or btf, then we will return it by this point.
> >
> > Why I wanted to keep 0 as a valid value, even if it is not pointing to any
> > map/btf is for case when, like in libbpf gen, fd_array is populated with map
> > fds starting from 0, and with btf fds from some offset, so in between there may
> > be 0s. This is probably better to disallow this, and, if fd_array_cnt != 0,
> > then to check if all [0...fd_array_cnt) elements are valid.
>
> If fd_array_cnt != 0 we can define that fd_array isn't sparse anymore
> and every entry has to be valid. Let's do that.

Exactly.
libbpf gen_loader has a very simplistic implementation of
add_map_fd() and add_kfunc_btf_fd().
It leaves gaps only to keep implementation simple.
It can and probably should be changed to make it contiguous.
Anton Protopopov Nov. 27, 2024, 6:44 a.m. UTC | #6
On 24/11/26 10:11AM, Hou Tao wrote:
> Hi,
> 
> On 11/19/2024 6:15 PM, Anton Protopopov wrote:
> > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > of file descriptors: maps or btfs. This field was introduced as a
> > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > present, indicates that the fd_array is a continuous array of the
> > corresponding length.
> >
> > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > bound to the program, as if it was used by the program. This
> > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > maps can be used by the verifier during the program load.
> >
> > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > ---
> >  include/uapi/linux/bpf.h       |  10 ++++
> >  kernel/bpf/syscall.c           |   2 +-
> >  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
> >  tools/include/uapi/linux/bpf.h |  10 ++++
> >  4 files changed, 113 insertions(+), 15 deletions(-)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 4162afc6b5d0..2acf9b336371 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -1573,6 +1573,16 @@ union bpf_attr {
> >  		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
> >  		 */
> >  		__s32		prog_token_fd;
> > +		/* The fd_array_cnt can be used to pass the length of the
> > +		 * fd_array array. In this case all the [map] file descriptors
> > +		 * passed in this array will be bound to the program, even if
> > +		 * the maps are not referenced directly. The functionality is
> > +		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> > +		 * used by the verifier during the program load. If provided,
> > +		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> > +		 * continuous.
> > +		 */
> > +		__u32		fd_array_cnt;
> >  	};
> >  
> >  	struct { /* anonymous struct used by BPF_OBJ_* commands */
> 
> SNIP
> > +/*
> > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> > + * this case expect that every file descriptor in the array is either a map or
> > + * a BTF, or a hole (0). Everything else is considered to be trash.
> > + */
> > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > +{
> > +	struct bpf_map *map;
> > +	CLASS(fd, f)(fd);
> > +	int ret;
> > +
> > +	map = __bpf_map_get(f);
> > +	if (!IS_ERR(map)) {
> > +		ret = add_used_map(env, map);
> > +		if (ret < 0)
> > +			return ret;
> > +		return 0;
> > +	}
> > +
> > +	if (!IS_ERR(__btf_get_by_fd(f)))
> > +		return 0;
> 
> For fd_array_cnt > 0 case, does it need to handle BTF fd case ? If it
> does, these returned BTFs should be saved in somewhere, otherewise,
> these BTFs will be leaked.

ATM we don't actually store BTFs here. The __btf_get_by_fd doesn't
increase the refcnt, so no leaks.

> > +	if (!fd)
> > +		return 0;
> > +
> > +	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > +	return PTR_ERR(map);
> > +}
> > +
> > +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > +{
> > +	int size = sizeof(int) * attr->fd_array_cnt;
> > +	int *copy;
> > +	int ret;
> > +	int i;
> > +
> > +	if (attr->fd_array_cnt >= MAX_USED_MAPS)
> > +		return -E2BIG;
> > +
> > +	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > +
> > +	/*
> > +	 * The only difference between old (no fd_array_cnt is given) and new
> > +	 * APIs is that in the latter case the fd_array is expected to be
> > +	 * continuous and is scanned for map fds right away
> > +	 */
> > +	if (!size)
> > +		return 0;
> > +
> > +	copy = kzalloc(size, GFP_KERNEL);
> > +	if (!copy)
> > +		return -ENOMEM;
> > +
> > +	if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
> > +		ret = -EFAULT;
> > +		goto free_copy;
> > +	}
> 
> It is better to use kvmemdup_bpfptr() instead.

Thanks for the hint. As suggested by Alexei, I will remove the memory
allocation here altogether.

> > +
> > +	for (i = 0; i < attr->fd_array_cnt; i++) {
> > +		ret = add_fd_from_fd_array(env, copy[i]);
> > +		if (ret)
> > +			goto free_copy;
> > +	}
> > +
> > +free_copy:
> > +	kfree(copy);
> > +	return ret;
> > +}
> > +
> >  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
> >  {
> >  	u64 start_time = ktime_get_ns();
> > @@ -22557,7 +22632,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
> >  		env->insn_aux_data[i].orig_idx = i;
> >  	env->prog = *prog;
> >  	env->ops = bpf_verifier_ops[env->prog->type];
> > -	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> > +	ret = env_init_fd_array(env, attr, uattr);
> > +	if (ret)
> > +		goto err_free_aux_data;
> 
> These maps saved in env->used_map will also be leaked.

Yeah, thanks, actually, env->used_map contents will be leaked (if
error occurs here or until we get to after `goto err_unlock`), so
I will rewrite the init/error path.

> >  
> >  	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
> >  	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
> > @@ -22775,6 +22852,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
> >  err_unlock:
> >  	if (!is_priv)
> >  		mutex_unlock(&bpf_verifier_lock);
> > +err_free_aux_data:
> >  	vfree(env->insn_aux_data);
> >  	kvfree(env->insn_hist);
> >  err_free_env:
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index 4162afc6b5d0..2acf9b336371 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -1573,6 +1573,16 @@ union bpf_attr {
> >  		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
> >  		 */
> >  		__s32		prog_token_fd;
> > +		/* The fd_array_cnt can be used to pass the length of the
> > +		 * fd_array array. In this case all the [map] file descriptors
> > +		 * passed in this array will be bound to the program, even if
> > +		 * the maps are not referenced directly. The functionality is
> > +		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> > +		 * used by the verifier during the program load. If provided,
> > +		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> > +		 * continuous.
> > +		 */
> > +		__u32		fd_array_cnt;
> >  	};
> >  
> >  	struct { /* anonymous struct used by BPF_OBJ_* commands */
>
Anton Protopopov Nov. 27, 2024, 6:49 a.m. UTC | #7
On 24/11/26 10:51AM, Andrii Nakryiko wrote:
> On Tue, Nov 26, 2024 at 9:27 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> >
> > On 24/11/25 05:38PM, Alexei Starovoitov wrote:
> > > On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > >
> > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > present, indicates that the fd_array is a continuous array of the
> > > > corresponding length.
> > > >
> > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > bound to the program, as if it was used by the program. This
> > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > maps can be used by the verifier during the program load.
> > > >
> > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > ---
> > > >  include/uapi/linux/bpf.h       |  10 ++++
> > > >  kernel/bpf/syscall.c           |   2 +-
> > > >  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
> > > >  tools/include/uapi/linux/bpf.h |  10 ++++
> > > >  4 files changed, 113 insertions(+), 15 deletions(-)
> > > >
> 
> [...]
> 
> > > > +/*
> > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> > > > + * this case expect that every file descriptor in the array is either a map or
> > > > + * a BTF, or a hole (0). Everything else is considered to be trash.
> > > > + */
> > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > +{
> > > > +       struct bpf_map *map;
> > > > +       CLASS(fd, f)(fd);
> > > > +       int ret;
> > > > +
> > > > +       map = __bpf_map_get(f);
> > > > +       if (!IS_ERR(map)) {
> > > > +               ret = add_used_map(env, map);
> > > > +               if (ret < 0)
> > > > +                       return ret;
> > > > +               return 0;
> > > > +       }
> > > > +
> > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > +               return 0;
> > > > +
> > > > +       if (!fd)
> > > > +               return 0;
> > >
> > > This is not allowed in new apis.
> > > zero cannot be special.
> >
> > I thought that this is ok since I check for all possible valid FDs by this
> > point: if fd=0 is a valid map or btf, then we will return it by this point.
> >
> > Why I wanted to keep 0 as a valid value, even if it is not pointing to any
> > map/btf is for case when, like in libbpf gen, fd_array is populated with map
> > fds starting from 0, and with btf fds from some offset, so in between there may
> > be 0s. This is probably better to disallow this, and, if fd_array_cnt != 0,
> > then to check if all [0...fd_array_cnt) elements are valid.
> 
> If fd_array_cnt != 0 we can define that fd_array isn't sparse anymore
> and every entry has to be valid. Let's do that.

Yes, makes sense

> >
> > > > +
> > > > +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> > > > +       return PTR_ERR(map);
> > > > +}
> > > > +
> > > > +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> > >
> > > What an odd name... why is 'env_' there?
> >
> 
> [...]
> 
> > > I don't get this feature.
> > > Why bother copying and checking for validity?
> > > What does it buy ?
> >
> > So, the main reason for this whole change is to allow unrelated maps, which
> > aren't referenced by a program directly, to be noticed and available during the
> > verification. Thus, I want to go through the array here and add them to
> > used_maps. (In a consequent patch, "instuction sets" maps are treated
> > additionally at this point as well.)
> >
> > The reason to discard that copy here was that "old api" when fd_array_cnt is 0
> > is still valid and in this case we can't copy fd_array in full. Later during
> > the verification fd_array elements are accessed individually by copying from
> > bpfptr. I thought that freeing this copy here is more readable than to add
> > a check at every such occasion.
> 
> I think Alexei meant why you need to make an entire copy of fd_array,
> if you can just read one element at a time (still with
> copy_from_bpfptr_offset()), validate/add map/btf from that FD, and
> move to the next element. No need to make a copy of an array.
> 
> >
> > > pw-bot: cr
> >
Anton Protopopov Nov. 27, 2024, 6:54 a.m. UTC | #8
On 24/11/26 12:40PM, Alexei Starovoitov wrote:
> On Tue, Nov 26, 2024 at 10:51 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Nov 26, 2024 at 9:27 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > >
> > > On 24/11/25 05:38PM, Alexei Starovoitov wrote:
> > > > On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@isovalent.com> wrote:
> > > > >
> > > > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> > > > > of file descriptors: maps or btfs. This field was introduced as a
> > > > > sparse array. Introduce a new attribute, fd_array_cnt, which, if
> > > > > present, indicates that the fd_array is a continuous array of the
> > > > > corresponding length.
> > > > >
> > > > > If fd_array_cnt is non-zero, then every map in the fd_array will be
> > > > > bound to the program, as if it was used by the program. This
> > > > > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> > > > > maps can be used by the verifier during the program load.
> > > > >
> > > > > Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
> > > > > ---
> > > > >  include/uapi/linux/bpf.h       |  10 ++++
> > > > >  kernel/bpf/syscall.c           |   2 +-
> > > > >  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
> > > > >  tools/include/uapi/linux/bpf.h |  10 ++++
> > > > >  4 files changed, 113 insertions(+), 15 deletions(-)
> > > > >
> >
> > [...]
> >
> > > > > +/*
> > > > > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> > > > > + * this case expect that every file descriptor in the array is either a map or
> > > > > + * a BTF, or a hole (0). Everything else is considered to be trash.
> > > > > + */
> > > > > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> > > > > +{
> > > > > +       struct bpf_map *map;
> > > > > +       CLASS(fd, f)(fd);
> > > > > +       int ret;
> > > > > +
> > > > > +       map = __bpf_map_get(f);
> > > > > +       if (!IS_ERR(map)) {
> > > > > +               ret = add_used_map(env, map);
> > > > > +               if (ret < 0)
> > > > > +                       return ret;
> > > > > +               return 0;
> > > > > +       }
> > > > > +
> > > > > +       if (!IS_ERR(__btf_get_by_fd(f)))
> > > > > +               return 0;
> > > > > +
> > > > > +       if (!fd)
> > > > > +               return 0;
> > > >
> > > > This is not allowed in new apis.
> > > > zero cannot be special.
> > >
> > > I thought that this is ok since I check for all possible valid FDs by this
> > > point: if fd=0 is a valid map or btf, then we will return it by this point.
> > >
> > > Why I wanted to keep 0 as a valid value, even if it is not pointing to any
> > > map/btf is for case when, like in libbpf gen, fd_array is populated with map
> > > fds starting from 0, and with btf fds from some offset, so in between there may
> > > be 0s. This is probably better to disallow this, and, if fd_array_cnt != 0,
> > > then to check if all [0...fd_array_cnt) elements are valid.
> >
> > If fd_array_cnt != 0 we can define that fd_array isn't sparse anymore
> > and every entry has to be valid. Let's do that.
> 
> Exactly.
> libbpf gen_loader has a very simplistic implementation of
> add_map_fd() and add_kfunc_btf_fd().
> It leaves gaps only to keep implementation simple.
> It can and probably should be changed to make it contiguous.

Ok, makes sense. I will send v3 based on all the comments.

For the libbpf gen_loader part, I can address this in one of the follow-ups
(when fd_array_cnt is actually set in libbpf).
Hou Tao Nov. 28, 2024, 4:15 a.m. UTC | #9
H,

On 11/27/2024 2:44 PM, Anton Protopopov wrote:
> On 24/11/26 10:11AM, Hou Tao wrote:
>> Hi,
>>
>> On 11/19/2024 6:15 PM, Anton Protopopov wrote:
>>> The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
>>> of file descriptors: maps or btfs. This field was introduced as a
>>> sparse array. Introduce a new attribute, fd_array_cnt, which, if
>>> present, indicates that the fd_array is a continuous array of the
>>> corresponding length.
>>>
>>> If fd_array_cnt is non-zero, then every map in the fd_array will be
>>> bound to the program, as if it was used by the program. This
>>> functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
>>> maps can be used by the verifier during the program load.
>>>
>>> Signed-off-by: Anton Protopopov <aspsk@isovalent.com>
>>> ---

SNIP
>>> +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
>>> +{
>>> +	struct bpf_map *map;
>>> +	CLASS(fd, f)(fd);
>>> +	int ret;
>>> +
>>> +	map = __bpf_map_get(f);
>>> +	if (!IS_ERR(map)) {
>>> +		ret = add_used_map(env, map);
>>> +		if (ret < 0)
>>> +			return ret;
>>> +		return 0;
>>> +	}
>>> +
>>> +	if (!IS_ERR(__btf_get_by_fd(f)))
>>> +		return 0;
>> For fd_array_cnt > 0 case, does it need to handle BTF fd case ? If it
>> does, these returned BTFs should be saved in somewhere, otherewise,
>> these BTFs will be leaked.
> ATM we don't actually store BTFs here. The __btf_get_by_fd doesn't
> increase the refcnt, so no leaks.

Yes. You are right, I just mis-read the implementation of
__btf_get_by_fd().
>
>>> +	if (!fd)
>>> +		return 0;
>>> +
>>> +	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
>>> +	return PTR_ERR(map);
>>> +}
>>> +
>>> +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
>>> +{
>>> +	int size = sizeof(int) * attr->fd_array_cnt;
>>> +	int *copy;
>>> +	int ret;
>>> +	int i;
>>> +
>>> +	if (attr->fd_array_cnt >= MAX_USED_MAPS)
>>> +		return -E2BIG;
>>> +
>>> +	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
>>> +
>>> +	/*
>>> +	 * The only difference between old (no fd_array_cnt is given) and new
>>> +	 * APIs is that in the latter case the fd_array is expected to be
>>> +	 * continuous and is scanned for map fds right away
>>> +	 */
>>> +	if (!size)
>>> +		return 0;
>>> +
>>> +	copy = kzalloc(size, GFP_KERNEL);
>>> +	if (!copy)
>>> +		return -ENOMEM;
>>> +
>>> +	if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
>>> +		ret = -EFAULT;
>>> +		goto free_copy;
>>> +	}
>> It is better to use kvmemdup_bpfptr() instead.
> Thanks for the hint. As suggested by Alexei, I will remove the memory
> allocation here altogether.

I see.
>
>>> +
>>> +	for (i = 0; i < attr->fd_array_cnt; i++) {
>>> +		ret = add_fd_from_fd_array(env, copy[i]);
>>> +		if (ret)
>>> +			goto free_copy;
>>> +	}
>>> +
>>> +free_copy:
>>> +	kfree(copy);
>>> +	return ret;
>>> +}
>>> +
>>>  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
>>>  {
>>>  	u64 start_time = ktime_get_ns();
>>> @@ -22557,7 +22632,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>>>  		env->insn_aux_data[i].orig_idx = i;
>>>  	env->prog = *prog;
>>>  	env->ops = bpf_verifier_ops[env->prog->type];
>>> -	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
>>> +	ret = env_init_fd_array(env, attr, uattr);
>>> +	if (ret)
>>> +		goto err_free_aux_data;
>> These maps saved in env->used_map will also be leaked.
> Yeah, thanks, actually, env->used_map contents will be leaked (if
> error occurs here or until we get to after `goto err_unlock`), so
> I will rewrite the init/error path.

Glad to hear that。
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@  union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 58190ca724a2..7e3fbc23c742 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2729,7 +2729,7 @@  static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
+#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
 
 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8e034a22aa2a..a84ba93c0036 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -19181,22 +19181,10 @@  static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* Add map behind fd to used maps list, if it's not already there, and return
- * its index.
- * Returns <0 on error, or >= 0 index, on success.
- */
-static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
+static int add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)
 {
-	CLASS(fd, f)(fd);
-	struct bpf_map *map;
 	int i, err;
 
-	map = __bpf_map_get(f);
-	if (IS_ERR(map)) {
-		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
-		return PTR_ERR(map);
-	}
-
 	/* check whether we recorded this map already */
 	for (i = 0; i < env->used_map_cnt; i++)
 		if (env->used_maps[i] == map)
@@ -19227,6 +19215,24 @@  static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
 	return env->used_map_cnt - 1;
 }
 
+/* Add map behind fd to used maps list, if it's not already there, and return
+ * its index.
+ * Returns <0 on error, or >= 0 index, on success.
+ */
+static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
+{
+	struct bpf_map *map;
+	CLASS(fd, f)(fd);
+
+	map = __bpf_map_get(f);
+	if (IS_ERR(map)) {
+		verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
+		return PTR_ERR(map);
+	}
+
+	return add_used_map(env, map);
+}
+
 /* find and rewrite pseudo imm in ld_imm64 instructions:
  *
  * 1. if it accesses map FD, replace it with actual map pointer.
@@ -22526,6 +22532,75 @@  struct btf *bpf_get_btf_vmlinux(void)
 	return btf_vmlinux;
 }
 
+/*
+ * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
+ * this case expect that every file descriptor in the array is either a map or
+ * a BTF, or a hole (0). Everything else is considered to be trash.
+ */
+static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
+{
+	struct bpf_map *map;
+	CLASS(fd, f)(fd);
+	int ret;
+
+	map = __bpf_map_get(f);
+	if (!IS_ERR(map)) {
+		ret = add_used_map(env, map);
+		if (ret < 0)
+			return ret;
+		return 0;
+	}
+
+	if (!IS_ERR(__btf_get_by_fd(f)))
+		return 0;
+
+	if (!fd)
+		return 0;
+
+	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
+	return PTR_ERR(map);
+}
+
+static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
+{
+	int size = sizeof(int) * attr->fd_array_cnt;
+	int *copy;
+	int ret;
+	int i;
+
+	if (attr->fd_array_cnt >= MAX_USED_MAPS)
+		return -E2BIG;
+
+	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
+
+	/*
+	 * The only difference between old (no fd_array_cnt is given) and new
+	 * APIs is that in the latter case the fd_array is expected to be
+	 * continuous and is scanned for map fds right away
+	 */
+	if (!size)
+		return 0;
+
+	copy = kzalloc(size, GFP_KERNEL);
+	if (!copy)
+		return -ENOMEM;
+
+	if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
+		ret = -EFAULT;
+		goto free_copy;
+	}
+
+	for (i = 0; i < attr->fd_array_cnt; i++) {
+		ret = add_fd_from_fd_array(env, copy[i]);
+		if (ret)
+			goto free_copy;
+	}
+
+free_copy:
+	kfree(copy);
+	return ret;
+}
+
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
 {
 	u64 start_time = ktime_get_ns();
@@ -22557,7 +22632,9 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
-	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
+	ret = env_init_fd_array(env, attr, uattr);
+	if (ret)
+		goto err_free_aux_data;
 
 	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
 	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
@@ -22775,6 +22852,7 @@  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
 err_unlock:
 	if (!is_priv)
 		mutex_unlock(&bpf_verifier_lock);
+err_free_aux_data:
 	vfree(env->insn_aux_data);
 	kvfree(env->insn_hist);
 err_free_env:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4162afc6b5d0..2acf9b336371 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1573,6 +1573,16 @@  union bpf_attr {
 		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
 		 */
 		__s32		prog_token_fd;
+		/* The fd_array_cnt can be used to pass the length of the
+		 * fd_array array. In this case all the [map] file descriptors
+		 * passed in this array will be bound to the program, even if
+		 * the maps are not referenced directly. The functionality is
+		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
+		 * used by the verifier during the program load. If provided,
+		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
+		 * continuous.
+		 */
+		__u32		fd_array_cnt;
 	};
 
 	struct { /* anonymous struct used by BPF_OBJ_* commands */