diff mbox series

[bpf-next,v11,07/13] bpf: pass attached BTF to the bpf_struct_ops subsystem

Message ID 20231106201252.1568931-8-thinker.li@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Registrating struct_ops types from modules | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-16 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-16 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-16 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-16 / veristat
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3107 this patch: 3107
netdev/cc_maintainers warning 8 maintainers not CCed: jolsa@kernel.org sdf@google.com john.fastabend@gmail.com kpsingh@kernel.org yonghong.song@linux.dev netdev@vger.kernel.org haoluo@google.com daniel@iogearbox.net
netdev/build_clang success Errors and warnings before: 1532 this patch: 1532
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3192 this patch: 3192
netdev/checkpatch warning WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-3 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-llvm-16 / build / build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-25 fail Logs for x86_64-llvm-16 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 fail Logs for x86_64-llvm-16 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-16 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-16 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-16 / veristat
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc

Commit Message

Kui-Feng Lee Nov. 6, 2023, 8:12 p.m. UTC
From: Kui-Feng Lee <thinker.li@gmail.com>

Every kernel module has its BTF, comprising information on types defined in
the module. The BTF fd (attr->value_type_btf_obj_fd) passed from userspace
helps the bpf_struct_ops to lookup type information and description of the
struct_ops type, which is necessary for parsing the layout of map element
values and registering maps. The descriptions are looked up by matching a
type id (attr->btf_vmlinux_value_type_id) against bpf_struct_ops_desc(s)
defined in a BTF. If the struct_ops type is defined in a module, the
bpf_struct_ops needs to know the module BTF to lookup the
bpf_struct_ops_desc.

The bpf_prog includes attach_btf in aux which is passed along with the
bpf_attr when loading the program. The purpose of attach_btf is to
determine the btf type of attach_btf_id. The attach_btf_id is then used to
identify the traced function for a trace program. In the case of struct_ops
programs, it is used to identify the struct_ops type of the struct_ops
object that a program is attached to.

Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
 include/uapi/linux/bpf.h       |  5 +++
 kernel/bpf/bpf_struct_ops.c    | 57 ++++++++++++++++++++++++----------
 kernel/bpf/syscall.c           |  2 +-
 kernel/bpf/verifier.c          |  9 ++++--
 tools/include/uapi/linux/bpf.h |  5 +++
 5 files changed, 57 insertions(+), 21 deletions(-)

Comments

Martin KaFai Lau Nov. 10, 2023, 2:04 a.m. UTC | #1
On 11/6/23 12:12 PM, thinker.li@gmail.com wrote:
> From: Kui-Feng Lee <thinker.li@gmail.com>
> 
> Every kernel module has its BTF, comprising information on types defined in
> the module. The BTF fd (attr->value_type_btf_obj_fd) passed from userspace

I would highlight this patch (adds) value_type_btf_obj_fd.

> helps the bpf_struct_ops to lookup type information and description of the
> struct_ops type, which is necessary for parsing the layout of map element
> values and registering maps. The descriptions are looked up by matching a
> type id (attr->btf_vmlinux_value_type_id) against bpf_struct_ops_desc(s)
> defined in a BTF. If the struct_ops type is defined in a module, the
> bpf_struct_ops needs to know the module BTF to lookup the
> bpf_struct_ops_desc.
> 
> The bpf_prog includes attach_btf in aux which is passed along with the
> bpf_attr when loading the program. The purpose of attach_btf is to

I read it as "attach_btf" is passed in the bpf_attr. This has been in my head 
for a while. I sort of know what is the actual uapi, so didn't get to it yet.

We have already discussed a bit of this offline. I think it meant 
attr->attach_btf_obj_fd here.

This patch is mainly about how the userspace passing kmod's btf to the kernel 
during map creation and prog load and also what uapi does it use. The commit 
message should mention this patch is reusing the existing 
attr->attach_btf_obj_fd for the userspace to pass the kmod's btf when loading 
the struct_ops prog. I need to go back to the syscall.c code to figure out and 
also leap forward to the later libbpf patch to confirm it.

I depend on the commit message to help the review. It is much appreciated if the 
commit message is clear and accurate on things like: what it wants to do, how it 
does it (addition/deletion/changes), and what are the major changes.

> determine the btf type of attach_btf_id. The attach_btf_id is then used to
> identify the traced function for a trace program. In the case of struct_ops
> programs, it is used to identify the struct_ops type of the struct_ops
> object that a program is attached to.
> 
> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
> ---
>   include/uapi/linux/bpf.h       |  5 +++
>   kernel/bpf/bpf_struct_ops.c    | 57 ++++++++++++++++++++++++----------
>   kernel/bpf/syscall.c           |  2 +-
>   kernel/bpf/verifier.c          |  9 ++++--
>   tools/include/uapi/linux/bpf.h |  5 +++
>   5 files changed, 57 insertions(+), 21 deletions(-)
> 
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 0f6cdf52b1da..fd20c52606b2 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1398,6 +1398,11 @@ union bpf_attr {
>   		 * to using 5 hash functions).
>   		 */
>   		__u64	map_extra;
> +
> +		__u32   value_type_btf_obj_fd;	/* fd pointing to a BTF
> +						 * type data for
> +						 * btf_vmlinux_value_type_id.
> +						 */
>   	};
>   
>   	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 4ba6181ed1c4..2fb1b21f989a 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -635,6 +635,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>   		bpf_jit_uncharge_modmem(PAGE_SIZE);
>   	}
>   	bpf_map_area_free(st_map->uvalue);
> +	btf_put(st_map->btf);
>   	bpf_map_area_free(st_map);
>   }
>   
> @@ -675,15 +676,30 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   	struct bpf_struct_ops_map *st_map;
>   	const struct btf_type *t, *vt;
>   	struct bpf_map *map;
> +	struct btf *btf;
>   	int ret;
>   
> -	st_ops_desc = bpf_struct_ops_find_value(btf_vmlinux, attr->btf_vmlinux_value_type_id);
> -	if (!st_ops_desc)
> -		return ERR_PTR(-ENOTSUPP);
> +	if (attr->value_type_btf_obj_fd) {
> +		/* The map holds btf for its whole life time. */
> +		btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
> +		if (IS_ERR(btf))
> +			return ERR_PTR(PTR_ERR(btf));
> +	} else {
> +		btf = btf_vmlinux;
> +		btf_get(btf);
> +	}
> +
> +	st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id);
> +	if (!st_ops_desc) {
> +		ret = -ENOTSUPP;
> +		goto errout;
> +	}
>   
>   	vt = st_ops_desc->value_type;
> -	if (attr->value_size != vt->size)
> -		return ERR_PTR(-EINVAL);
> +	if (attr->value_size != vt->size) {
> +		ret = -EINVAL;
> +		goto errout;
> +	}
>   
>   	t = st_ops_desc->type;
>   
> @@ -694,17 +710,18 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   		(vt->size - sizeof(struct bpf_struct_ops_value));
>   
>   	st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
> -	if (!st_map)
> -		return ERR_PTR(-ENOMEM);
> +	if (!st_map) {
> +		ret = -ENOMEM;
> +		goto errout;
> +	}
>   
> +	st_map->btf = btf;

How about do the "st_map->btf = btf;" assignment the same as where the current 
code is doing (a few lines below). Would it avoid the new "btf = NULL;" dance 
during the error case?

nit, if moving a line, I would move the following "st_map->st_ops_desc = 
st_ops_desc;" to the later and close to where "st_map->btf = btf;" is.

>   	st_map->st_ops_desc = st_ops_desc;
>   	map = &st_map->map;
>   
>   	ret = bpf_jit_charge_modmem(PAGE_SIZE);
> -	if (ret) {
> -		__bpf_struct_ops_map_free(map);
> -		return ERR_PTR(ret);
> -	}
> +	if (ret)
> +		goto errout_free;
>   
>   	st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
>   	if (!st_map->image) {
> @@ -713,25 +730,31 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   		 * here.
>   		 */
>   		bpf_jit_uncharge_modmem(PAGE_SIZE);
> -		__bpf_struct_ops_map_free(map);
> -		return ERR_PTR(-ENOMEM);
> +		ret = -ENOMEM;
> +		goto errout_free;
>   	}
>   	st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
>   	st_map->links =
>   		bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
>   				   NUMA_NO_NODE);
>   	if (!st_map->uvalue || !st_map->links) {
> -		__bpf_struct_ops_map_free(map);
> -		return ERR_PTR(-ENOMEM);
> +		ret = -ENOMEM;
> +		goto errout_free;
>   	}
>   
> -	st_map->btf = btf_vmlinux;

The old code initializes "st_map->btf" here.

> -
>   	mutex_init(&st_map->lock);
>   	set_vm_flush_reset_perms(st_map->image);
>   	bpf_map_init_from_attr(map, attr);
>   
>   	return map;
> +
> +errout_free:
> +	__bpf_struct_ops_map_free(map);
> +	btf = NULL;		/* has been released */
> +errout:
> +	btf_put(btf);
> +
> +	return ERR_PTR(ret);
>   }
>   
>   static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 0ed286b8a0f0..974651fe2bee 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1096,7 +1096,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
>   	return ret;
>   }
>   
> -#define BPF_MAP_CREATE_LAST_FIELD map_extra
> +#define BPF_MAP_CREATE_LAST_FIELD value_type_btf_obj_fd
>   /* called via syscall */
>   static int map_create(union bpf_attr *attr)
>   {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index bdd166cab977..3f446f76d4bf 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20086,6 +20086,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
>   	const struct btf_member *member;
>   	struct bpf_prog *prog = env->prog;
>   	u32 btf_id, member_idx;
> +	struct btf *btf;
>   	const char *mname;
>   
>   	if (!prog->gpl_compatible) {
> @@ -20093,8 +20094,10 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
>   		return -EINVAL;
>   	}
>   
> +	btf = prog->aux->attach_btf;
> +
>   	btf_id = prog->aux->attach_btf_id;
> -	st_ops_desc = bpf_struct_ops_find(btf_vmlinux, btf_id);
> +	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
>   	if (!st_ops_desc) {
>   		verbose(env, "attach_btf_id %u is not a supported struct\n",
>   			btf_id);
> @@ -20111,8 +20114,8 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
>   	}
>   
>   	member = &btf_type_member(t)[member_idx];
> -	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
> -	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
> +	mname = btf_name_by_offset(btf, member->name_off);
> +	func_proto = btf_type_resolve_func_ptr(btf, member->type,
>   					       NULL);
>   	if (!func_proto) {
>   		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 0f6cdf52b1da..fd20c52606b2 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1398,6 +1398,11 @@ union bpf_attr {
>   		 * to using 5 hash functions).
>   		 */
>   		__u64	map_extra;
> +
> +		__u32   value_type_btf_obj_fd;	/* fd pointing to a BTF
> +						 * type data for
> +						 * btf_vmlinux_value_type_id.
> +						 */
>   	};
>   
>   	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
Kui-Feng Lee Nov. 22, 2023, 10:33 p.m. UTC | #2
On 11/9/23 18:04, Martin KaFai Lau wrote:
> On 11/6/23 12:12 PM, thinker.li@gmail.com wrote:
>> From: Kui-Feng Lee <thinker.li@gmail.com>
>>
>> Every kernel module has its BTF, comprising information on types 
>> defined in
>> the module. The BTF fd (attr->value_type_btf_obj_fd) passed from 
>> userspace
> 
> I would highlight this patch (adds) value_type_btf_obj_fd.
> 
>> helps the bpf_struct_ops to lookup type information and description of 
>> the
>> struct_ops type, which is necessary for parsing the layout of map element
>> values and registering maps. The descriptions are looked up by matching a
>> type id (attr->btf_vmlinux_value_type_id) against bpf_struct_ops_desc(s)
>> defined in a BTF. If the struct_ops type is defined in a module, the
>> bpf_struct_ops needs to know the module BTF to lookup the
>> bpf_struct_ops_desc.
>>
>> The bpf_prog includes attach_btf in aux which is passed along with the
>> bpf_attr when loading the program. The purpose of attach_btf is to
> 
> I read it as "attach_btf" is passed in the bpf_attr. This has been in my 
> head for a while. I sort of know what is the actual uapi, so didn't get 
> to it yet.
> 
> We have already discussed a bit of this offline. I think it meant 
> attr->attach_btf_obj_fd here.
> 
> This patch is mainly about how the userspace passing kmod's btf to the 
> kernel during map creation and prog load and also what uapi does it use. 
> The commit message should mention this patch is reusing the existing 
> attr->attach_btf_obj_fd for the userspace to pass the kmod's btf when 
> loading the struct_ops prog. I need to go back to the syscall.c code to 
> figure out and also leap forward to the later libbpf patch to confirm it.
> 
> I depend on the commit message to help the review. It is much 
> appreciated if the commit message is clear and accurate on things like: 
> what it wants to do, how it does it (addition/deletion/changes), and 
> what are the major changes.
Got it! I will rewrite the commit log to make it easier to read the
patch.

> 
>> determine the btf type of attach_btf_id. The attach_btf_id is then 
>> used to
>> identify the traced function for a trace program. In the case of 
>> struct_ops
>> programs, it is used to identify the struct_ops type of the struct_ops
>> object that a program is attached to.
>>
>> Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
>> ---
>>   include/uapi/linux/bpf.h       |  5 +++
>>   kernel/bpf/bpf_struct_ops.c    | 57 ++++++++++++++++++++++++----------
>>   kernel/bpf/syscall.c           |  2 +-
>>   kernel/bpf/verifier.c          |  9 ++++--
>>   tools/include/uapi/linux/bpf.h |  5 +++
>>   5 files changed, 57 insertions(+), 21 deletions(-)
>>
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index 0f6cdf52b1da..fd20c52606b2 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -1398,6 +1398,11 @@ union bpf_attr {
>>            * to using 5 hash functions).
>>            */
>>           __u64    map_extra;
>> +
>> +        __u32   value_type_btf_obj_fd;    /* fd pointing to a BTF
>> +                         * type data for
>> +                         * btf_vmlinux_value_type_id.
>> +                         */
>>       };
>>       struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>> index 4ba6181ed1c4..2fb1b21f989a 100644
>> --- a/kernel/bpf/bpf_struct_ops.c
>> +++ b/kernel/bpf/bpf_struct_ops.c
>> @@ -635,6 +635,7 @@ static void __bpf_struct_ops_map_free(struct 
>> bpf_map *map)
>>           bpf_jit_uncharge_modmem(PAGE_SIZE);
>>       }
>>       bpf_map_area_free(st_map->uvalue);
>> +    btf_put(st_map->btf);
>>       bpf_map_area_free(st_map);
>>   }
>> @@ -675,15 +676,30 @@ static struct bpf_map 
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>       struct bpf_struct_ops_map *st_map;
>>       const struct btf_type *t, *vt;
>>       struct bpf_map *map;
>> +    struct btf *btf;
>>       int ret;
>> -    st_ops_desc = bpf_struct_ops_find_value(btf_vmlinux, 
>> attr->btf_vmlinux_value_type_id);
>> -    if (!st_ops_desc)
>> -        return ERR_PTR(-ENOTSUPP);
>> +    if (attr->value_type_btf_obj_fd) {
>> +        /* The map holds btf for its whole life time. */
>> +        btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
>> +        if (IS_ERR(btf))
>> +            return ERR_PTR(PTR_ERR(btf));
>> +    } else {
>> +        btf = btf_vmlinux;
>> +        btf_get(btf);
>> +    }
>> +
>> +    st_ops_desc = bpf_struct_ops_find_value(btf, 
>> attr->btf_vmlinux_value_type_id);
>> +    if (!st_ops_desc) {
>> +        ret = -ENOTSUPP;
>> +        goto errout;
>> +    }
>>       vt = st_ops_desc->value_type;
>> -    if (attr->value_size != vt->size)
>> -        return ERR_PTR(-EINVAL);
>> +    if (attr->value_size != vt->size) {
>> +        ret = -EINVAL;
>> +        goto errout;
>> +    }
>>       t = st_ops_desc->type;
>> @@ -694,17 +710,18 @@ static struct bpf_map 
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>           (vt->size - sizeof(struct bpf_struct_ops_value));
>>       st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
>> -    if (!st_map)
>> -        return ERR_PTR(-ENOMEM);
>> +    if (!st_map) {
>> +        ret = -ENOMEM;
>> +        goto errout;
>> +    }
>> +    st_map->btf = btf;
> 
> How about do the "st_map->btf = btf;" assignment the same as where the 
> current code is doing (a few lines below). Would it avoid the new "btf = 
> NULL;" dance during the error case?
> 
> nit, if moving a line, I would move the following "st_map->st_ops_desc = 
> st_ops_desc;" to the later and close to where "st_map->btf = btf;" is.

It would work. But, I also need to init st_map->btf as NULL. Or, it may
fail at errout_free to free an invalid pointer if I read it correctly.

> 
>>       st_map->st_ops_desc = st_ops_desc;
>>       map = &st_map->map;
>>       ret = bpf_jit_charge_modmem(PAGE_SIZE);
>> -    if (ret) {
>> -        __bpf_struct_ops_map_free(map);
>> -        return ERR_PTR(ret);
>> -    }
>> +    if (ret)
>> +        goto errout_free;
>>       st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
>>       if (!st_map->image) {
>> @@ -713,25 +730,31 @@ static struct bpf_map 
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>>            * here.
>>            */
>>           bpf_jit_uncharge_modmem(PAGE_SIZE);
>> -        __bpf_struct_ops_map_free(map);
>> -        return ERR_PTR(-ENOMEM);
>> +        ret = -ENOMEM;
>> +        goto errout_free;
>>       }
>>       st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
>>       st_map->links =
>>           bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct 
>> bpf_links *),
>>                      NUMA_NO_NODE);
>>       if (!st_map->uvalue || !st_map->links) {
>> -        __bpf_struct_ops_map_free(map);
>> -        return ERR_PTR(-ENOMEM);
>> +        ret = -ENOMEM;
>> +        goto errout_free;
>>       }
>> -    st_map->btf = btf_vmlinux;
> 
> The old code initializes "st_map->btf" here.
> 
>> -
>>       mutex_init(&st_map->lock);
>>       set_vm_flush_reset_perms(st_map->image);
>>       bpf_map_init_from_attr(map, attr);
>>       return map;
>> +
>> +errout_free:
>> +    __bpf_struct_ops_map_free(map);
>> +    btf = NULL;        /* has been released */
>> +errout:
>> +    btf_put(btf);
>> +
>> +    return ERR_PTR(ret);
>>   }
>>   static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 0ed286b8a0f0..974651fe2bee 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -1096,7 +1096,7 @@ static int map_check_btf(struct bpf_map *map, 
>> const struct btf *btf,
>>       return ret;
>>   }
>> -#define BPF_MAP_CREATE_LAST_FIELD map_extra
>> +#define BPF_MAP_CREATE_LAST_FIELD value_type_btf_obj_fd
>>   /* called via syscall */
>>   static int map_create(union bpf_attr *attr)
>>   {
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index bdd166cab977..3f446f76d4bf 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -20086,6 +20086,7 @@ static int check_struct_ops_btf_id(struct 
>> bpf_verifier_env *env)
>>       const struct btf_member *member;
>>       struct bpf_prog *prog = env->prog;
>>       u32 btf_id, member_idx;
>> +    struct btf *btf;
>>       const char *mname;
>>       if (!prog->gpl_compatible) {
>> @@ -20093,8 +20094,10 @@ static int check_struct_ops_btf_id(struct 
>> bpf_verifier_env *env)
>>           return -EINVAL;
>>       }
>> +    btf = prog->aux->attach_btf;
>> +
>>       btf_id = prog->aux->attach_btf_id;
>> -    st_ops_desc = bpf_struct_ops_find(btf_vmlinux, btf_id);
>> +    st_ops_desc = bpf_struct_ops_find(btf, btf_id);
>>       if (!st_ops_desc) {
>>           verbose(env, "attach_btf_id %u is not a supported struct\n",
>>               btf_id);
>> @@ -20111,8 +20114,8 @@ static int check_struct_ops_btf_id(struct 
>> bpf_verifier_env *env)
>>       }
>>       member = &btf_type_member(t)[member_idx];
>> -    mname = btf_name_by_offset(btf_vmlinux, member->name_off);
>> -    func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
>> +    mname = btf_name_by_offset(btf, member->name_off);
>> +    func_proto = btf_type_resolve_func_ptr(btf, member->type,
>>                              NULL);
>>       if (!func_proto) {
>>           verbose(env, "attach to invalid member %s(@idx %u) of struct 
>> %s\n",
>> diff --git a/tools/include/uapi/linux/bpf.h 
>> b/tools/include/uapi/linux/bpf.h
>> index 0f6cdf52b1da..fd20c52606b2 100644
>> --- a/tools/include/uapi/linux/bpf.h
>> +++ b/tools/include/uapi/linux/bpf.h
>> @@ -1398,6 +1398,11 @@ union bpf_attr {
>>            * to using 5 hash functions).
>>            */
>>           __u64    map_extra;
>> +
>> +        __u32   value_type_btf_obj_fd;    /* fd pointing to a BTF
>> +                         * type data for
>> +                         * btf_vmlinux_value_type_id.
>> +                         */
>>       };
>>       struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
>
Martin KaFai Lau Nov. 27, 2023, 10:08 p.m. UTC | #3
On 11/22/23 2:33 PM, Kui-Feng Lee wrote:
>>> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
>>> index 4ba6181ed1c4..2fb1b21f989a 100644
>>> --- a/kernel/bpf/bpf_struct_ops.c
>>> +++ b/kernel/bpf/bpf_struct_ops.c
>>> @@ -635,6 +635,7 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>>>           bpf_jit_uncharge_modmem(PAGE_SIZE);
>>>       }
>>>       bpf_map_area_free(st_map->uvalue);
>>> +    btf_put(st_map->btf);
>>>       bpf_map_area_free(st_map);
>>>   }
>>> @@ -675,15 +676,30 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union 
>>> bpf_attr *attr)
>>>       struct bpf_struct_ops_map *st_map;
>>>       const struct btf_type *t, *vt;
>>>       struct bpf_map *map;
>>> +    struct btf *btf;
>>>       int ret;
>>> -    st_ops_desc = bpf_struct_ops_find_value(btf_vmlinux, 
>>> attr->btf_vmlinux_value_type_id);
>>> -    if (!st_ops_desc)
>>> -        return ERR_PTR(-ENOTSUPP);
>>> +    if (attr->value_type_btf_obj_fd) {
>>> +        /* The map holds btf for its whole life time. */
>>> +        btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
>>> +        if (IS_ERR(btf))
>>> +            return ERR_PTR(PTR_ERR(btf));
>>> +    } else {
>>> +        btf = btf_vmlinux;
>>> +        btf_get(btf);
>>> +    }
>>> +
>>> +    st_ops_desc = bpf_struct_ops_find_value(btf, 
>>> attr->btf_vmlinux_value_type_id);
>>> +    if (!st_ops_desc) {
>>> +        ret = -ENOTSUPP;
>>> +        goto errout;
>>> +    }
>>>       vt = st_ops_desc->value_type;
>>> -    if (attr->value_size != vt->size)
>>> -        return ERR_PTR(-EINVAL);
>>> +    if (attr->value_size != vt->size) {
>>> +        ret = -EINVAL;
>>> +        goto errout;
>>> +    }
>>>       t = st_ops_desc->type;
>>> @@ -694,17 +710,18 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union 
>>> bpf_attr *attr)
>>>           (vt->size - sizeof(struct bpf_struct_ops_value));
>>>       st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
>>> -    if (!st_map)
>>> -        return ERR_PTR(-ENOMEM);
>>> +    if (!st_map) {
>>> +        ret = -ENOMEM;
>>> +        goto errout;
>>> +    }
>>> +    st_map->btf = btf;
>>
>> How about do the "st_map->btf = btf;" assignment the same as where the current 
>> code is doing (a few lines below). Would it avoid the new "btf = NULL;" dance 
>> during the error case?
>>
>> nit, if moving a line, I would move the following "st_map->st_ops_desc = 
>> st_ops_desc;" to the later and close to where "st_map->btf = btf;" is.
> 
> It would work. But, I also need to init st_map->btf as NULL. Or, it may
> fail at errout_free to free an invalid pointer if I read it correctly.

st_map->btf should have been initialized to NULL. Please check bpf_map_area_alloc().
diff mbox series

Patch

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f6cdf52b1da..fd20c52606b2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1398,6 +1398,11 @@  union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+
+		__u32   value_type_btf_obj_fd;	/* fd pointing to a BTF
+						 * type data for
+						 * btf_vmlinux_value_type_id.
+						 */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 4ba6181ed1c4..2fb1b21f989a 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -635,6 +635,7 @@  static void __bpf_struct_ops_map_free(struct bpf_map *map)
 		bpf_jit_uncharge_modmem(PAGE_SIZE);
 	}
 	bpf_map_area_free(st_map->uvalue);
+	btf_put(st_map->btf);
 	bpf_map_area_free(st_map);
 }
 
@@ -675,15 +676,30 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	struct bpf_struct_ops_map *st_map;
 	const struct btf_type *t, *vt;
 	struct bpf_map *map;
+	struct btf *btf;
 	int ret;
 
-	st_ops_desc = bpf_struct_ops_find_value(btf_vmlinux, attr->btf_vmlinux_value_type_id);
-	if (!st_ops_desc)
-		return ERR_PTR(-ENOTSUPP);
+	if (attr->value_type_btf_obj_fd) {
+		/* The map holds btf for its whole life time. */
+		btf = btf_get_by_fd(attr->value_type_btf_obj_fd);
+		if (IS_ERR(btf))
+			return ERR_PTR(PTR_ERR(btf));
+	} else {
+		btf = btf_vmlinux;
+		btf_get(btf);
+	}
+
+	st_ops_desc = bpf_struct_ops_find_value(btf, attr->btf_vmlinux_value_type_id);
+	if (!st_ops_desc) {
+		ret = -ENOTSUPP;
+		goto errout;
+	}
 
 	vt = st_ops_desc->value_type;
-	if (attr->value_size != vt->size)
-		return ERR_PTR(-EINVAL);
+	if (attr->value_size != vt->size) {
+		ret = -EINVAL;
+		goto errout;
+	}
 
 	t = st_ops_desc->type;
 
@@ -694,17 +710,18 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 		(vt->size - sizeof(struct bpf_struct_ops_value));
 
 	st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
-	if (!st_map)
-		return ERR_PTR(-ENOMEM);
+	if (!st_map) {
+		ret = -ENOMEM;
+		goto errout;
+	}
 
+	st_map->btf = btf;
 	st_map->st_ops_desc = st_ops_desc;
 	map = &st_map->map;
 
 	ret = bpf_jit_charge_modmem(PAGE_SIZE);
-	if (ret) {
-		__bpf_struct_ops_map_free(map);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto errout_free;
 
 	st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
 	if (!st_map->image) {
@@ -713,25 +730,31 @@  static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 		 * here.
 		 */
 		bpf_jit_uncharge_modmem(PAGE_SIZE);
-		__bpf_struct_ops_map_free(map);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto errout_free;
 	}
 	st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
 	st_map->links =
 		bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
 				   NUMA_NO_NODE);
 	if (!st_map->uvalue || !st_map->links) {
-		__bpf_struct_ops_map_free(map);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto errout_free;
 	}
 
-	st_map->btf = btf_vmlinux;
-
 	mutex_init(&st_map->lock);
 	set_vm_flush_reset_perms(st_map->image);
 	bpf_map_init_from_attr(map, attr);
 
 	return map;
+
+errout_free:
+	__bpf_struct_ops_map_free(map);
+	btf = NULL;		/* has been released */
+errout:
+	btf_put(btf);
+
+	return ERR_PTR(ret);
 }
 
 static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0ed286b8a0f0..974651fe2bee 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1096,7 +1096,7 @@  static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	return ret;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD map_extra
+#define BPF_MAP_CREATE_LAST_FIELD value_type_btf_obj_fd
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bdd166cab977..3f446f76d4bf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20086,6 +20086,7 @@  static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 	const struct btf_member *member;
 	struct bpf_prog *prog = env->prog;
 	u32 btf_id, member_idx;
+	struct btf *btf;
 	const char *mname;
 
 	if (!prog->gpl_compatible) {
@@ -20093,8 +20094,10 @@  static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 		return -EINVAL;
 	}
 
+	btf = prog->aux->attach_btf;
+
 	btf_id = prog->aux->attach_btf_id;
-	st_ops_desc = bpf_struct_ops_find(btf_vmlinux, btf_id);
+	st_ops_desc = bpf_struct_ops_find(btf, btf_id);
 	if (!st_ops_desc) {
 		verbose(env, "attach_btf_id %u is not a supported struct\n",
 			btf_id);
@@ -20111,8 +20114,8 @@  static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
 	}
 
 	member = &btf_type_member(t)[member_idx];
-	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
-	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
+	mname = btf_name_by_offset(btf, member->name_off);
+	func_proto = btf_type_resolve_func_ptr(btf, member->type,
 					       NULL);
 	if (!func_proto) {
 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0f6cdf52b1da..fd20c52606b2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1398,6 +1398,11 @@  union bpf_attr {
 		 * to using 5 hash functions).
 		 */
 		__u64	map_extra;
+
+		__u32   value_type_btf_obj_fd;	/* fd pointing to a BTF
+						 * type data for
+						 * btf_vmlinux_value_type_id.
+						 */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */