diff mbox series

[RFC,bpf-next,5/6] bpf: Allow pro/epilogue to call kfunc

Message ID 20240813184943.3759630-6-martin.lau@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Add gen_epilogue and allow kfunc call in pro/epilogue | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 42 this patch: 42
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 7 maintainers not CCed: song@kernel.org sdf@fomichev.me eddyz87@gmail.com haoluo@google.com jolsa@kernel.org kpsingh@kernel.org john.fastabend@gmail.com
netdev/build_clang success Errors and warnings before: 43 this patch: 43
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 53 this patch: 53
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Martin KaFai Lau Aug. 13, 2024, 6:49 p.m. UTC
From: Martin KaFai Lau <martin.lau@kernel.org>

The existing prologue has been able to call bpf helper but not a kfunc.
This patch allows the prologue/epilogue to call the kfunc.

The subsystem that implements the .gen_prologue and .gen_epilogue
can add the BPF_PSEUDO_KFUNC_CALL instruction with insn->imm
set to the btf func_id of the kfunc call. This part is the same
as the bpf prog loaded from the sys_bpf.

Another piece is to have a way for the subsystem to tell the btf object
of the kfunc func_id. This patch uses the "struct module **module"
argument added to the .gen_prologue and .gen_epilogue
in the previous patch. The verifier will use btf_get_module_btf(module)
to find out the btf object.

The .gen_epi/prologue will usually use THIS_MODULE to initialize
the "*module = THIS_MODULE". Only kfunc(s) from one module (or vmlinux)
can be used in the .gen_epi/prologue now. In the future, the
.gen_epi/prologue can return an array of modules and use the
insn->off as an index into the array.

When the returned module is NULL, the btf is btf_vmlinux. Then the
insn->off stays at 0. This is the same as the sys_bpf.

When the btf is from a module, the btf needs an entry in
prog->aux->kfunc_btf_tab. The kfunc_btf_tab is currently
sorted by insn->off which is the offset to the attr->fd_array.

This module btf may or may not be in the kfunc_btf_tab. A new function
"find_kfunc_desc_btf_offset" is added to search for the existing entry
that has the same btf. If it is found, its offset will be used in
the insn->off. If it is not found, it will find an offset value
that is not used in the kfunc_btf_tab. Add a new entry
to kfunc_btf_tab and set this new offset to the insn->off

Once the insn->off is determined (either reuse an existing one
or an unused one is found), it will call the existing add_kfunc_call()
and everything else should fall through.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/verifier.c | 116 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 113 insertions(+), 3 deletions(-)

Comments

Eduard Zingerman Aug. 14, 2024, 10:17 p.m. UTC | #1
On Tue, 2024-08-13 at 11:49 -0700, Martin KaFai Lau wrote:
> From: Martin KaFai Lau <martin.lau@kernel.org>
> 
> The existing prologue has been able to call bpf helper but not a kfunc.
> This patch allows the prologue/epilogue to call the kfunc.

[...]

> Once the insn->off is determined (either reuse an existing one
> or an unused one is found), it will call the existing add_kfunc_call()
> and everything else should fall through.
> 
> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
> ---

fwiw, don't see any obvious problems with this patch.

Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>

>  kernel/bpf/verifier.c | 116 ++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 113 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 5e995b7884fb..2873e1083402 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2787,6 +2787,61 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
>  	return btf_vmlinux ?: ERR_PTR(-ENOENT);
>  }
>  
> +static int find_kfunc_desc_btf_offset(struct bpf_verifier_env *env, struct btf *btf,
> +				      struct module *module, s16 *offset)
> +{
> +	struct bpf_kfunc_btf_tab *tab;
> +	struct bpf_kfunc_btf *b;
> +	s16 new_offset = S16_MAX;
> +	u32 i;
> +
> +	if (btf_is_vmlinux(btf)) {
> +		*offset = 0;
> +		return 0;
> +	}
> +
> +	tab = env->prog->aux->kfunc_btf_tab;
> +	if (!tab) {
> +		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
> +		if (!tab)
> +			return -ENOMEM;
> +		env->prog->aux->kfunc_btf_tab = tab;
> +	}
> +
> +	b = tab->descs;
> +	for (i = tab->nr_descs; i > 0; i--) {

Question: why iterating in reverse here?

> +		if (b[i - 1].btf == btf) {
> +			*offset = b[i - 1].offset;
> +			return 0;
> +		}
> +		/* Search new_offset from backward S16_MAX, S16_MAX-1, ...
> +		 * tab->nr_descs max out at MAX_KFUNC_BTFS which is
> +		 * smaller than S16_MAX, so it will be able to find
> +		 * a non-zero new_offset to use.
> +		 */
> +		if (new_offset == b[i - 1].offset)
> +			new_offset--;
> +	}
> +
> +	if (tab->nr_descs == MAX_KFUNC_BTFS) {
> +		verbose(env, "too many different module BTFs\n");
> +		return -E2BIG;
> +	}
> +
> +	if (!try_module_get(module))
> +		return -ENXIO;
> +
> +	b = &tab->descs[tab->nr_descs++];
> +	btf_get(btf);
> +	b->btf = btf;
> +	b->module = module;
> +	b->offset = new_offset;
> +	*offset = new_offset;
> +	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
> +	     kfunc_btf_cmp_by_off, NULL);
> +	return 0;
> +}
> +
>  static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
>  {
>  	const struct btf_type *func, *func_proto;

[...]
Martin KaFai Lau Aug. 15, 2024, 11:47 p.m. UTC | #2
On 8/14/24 3:17 PM, Eduard Zingerman wrote:
> On Tue, 2024-08-13 at 11:49 -0700, Martin KaFai Lau wrote:
>> From: Martin KaFai Lau <martin.lau@kernel.org>
>>
>> The existing prologue has been able to call bpf helper but not a kfunc.
>> This patch allows the prologue/epilogue to call the kfunc.
> 
> [...]
> 
>> Once the insn->off is determined (either reuse an existing one
>> or an unused one is found), it will call the existing add_kfunc_call()
>> and everything else should fall through.
>>
>> Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
>> ---
> 
> fwiw, don't see any obvious problems with this patch.
> 
> Reviewed-by: Eduard Zingerman <eddyz87@gmail.com>
> 
>>   kernel/bpf/verifier.c | 116 ++++++++++++++++++++++++++++++++++++++++--
>>   1 file changed, 113 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>> index 5e995b7884fb..2873e1083402 100644
>> --- a/kernel/bpf/verifier.c
>> +++ b/kernel/bpf/verifier.c
>> @@ -2787,6 +2787,61 @@ static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
>>   	return btf_vmlinux ?: ERR_PTR(-ENOENT);
>>   }
>>   
>> +static int find_kfunc_desc_btf_offset(struct bpf_verifier_env *env, struct btf *btf,
>> +				      struct module *module, s16 *offset)
>> +{
>> +	struct bpf_kfunc_btf_tab *tab;
>> +	struct bpf_kfunc_btf *b;
>> +	s16 new_offset = S16_MAX;
>> +	u32 i;
>> +
>> +	if (btf_is_vmlinux(btf)) {
>> +		*offset = 0;
>> +		return 0;
>> +	}
>> +
>> +	tab = env->prog->aux->kfunc_btf_tab;
>> +	if (!tab) {
>> +		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
>> +		if (!tab)
>> +			return -ENOMEM;
>> +		env->prog->aux->kfunc_btf_tab = tab;
>> +	}
>> +
>> +	b = tab->descs;
>> +	for (i = tab->nr_descs; i > 0; i--) {
> 
> Question: why iterating in reverse here?

Agreed. It is unnecessary. I will change it to iterate forward in the next re-spin.

Thanks for the review!
diff mbox series

Patch

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5e995b7884fb..2873e1083402 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2787,6 +2787,61 @@  static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
 	return btf_vmlinux ?: ERR_PTR(-ENOENT);
 }
 
+static int find_kfunc_desc_btf_offset(struct bpf_verifier_env *env, struct btf *btf,
+				      struct module *module, s16 *offset)
+{
+	struct bpf_kfunc_btf_tab *tab;
+	struct bpf_kfunc_btf *b;
+	s16 new_offset = S16_MAX;
+	u32 i;
+
+	if (btf_is_vmlinux(btf)) {
+		*offset = 0;
+		return 0;
+	}
+
+	tab = env->prog->aux->kfunc_btf_tab;
+	if (!tab) {
+		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
+		if (!tab)
+			return -ENOMEM;
+		env->prog->aux->kfunc_btf_tab = tab;
+	}
+
+	b = tab->descs;
+	for (i = tab->nr_descs; i > 0; i--) {
+		if (b[i - 1].btf == btf) {
+			*offset = b[i - 1].offset;
+			return 0;
+		}
+		/* Search new_offset from backward S16_MAX, S16_MAX-1, ...
+		 * tab->nr_descs max out at MAX_KFUNC_BTFS which is
+		 * smaller than S16_MAX, so it will be able to find
+		 * a non-zero new_offset to use.
+		 */
+		if (new_offset == b[i - 1].offset)
+			new_offset--;
+	}
+
+	if (tab->nr_descs == MAX_KFUNC_BTFS) {
+		verbose(env, "too many different module BTFs\n");
+		return -E2BIG;
+	}
+
+	if (!try_module_get(module))
+		return -ENXIO;
+
+	b = &tab->descs[tab->nr_descs++];
+	btf_get(btf);
+	b->btf = btf;
+	b->module = module;
+	b->offset = new_offset;
+	*offset = new_offset;
+	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+	     kfunc_btf_cmp_by_off, NULL);
+	return 0;
+}
+
 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 {
 	const struct btf_type *func, *func_proto;
@@ -19603,6 +19658,50 @@  static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int fixup_pro_epilogue_kfunc(struct bpf_verifier_env *env, struct bpf_insn *insns,
+				    int cnt, struct module *module)
+{
+	struct btf *btf;
+	u32 func_id;
+	int i, err;
+	s16 offset;
+
+	for (i = 0; i < cnt; i++) {
+		if (!bpf_pseudo_kfunc_call(&insns[i]))
+			continue;
+
+		/* The kernel may not have BTF available, so only
+		 * try to get a btf if the pro/epilogue calls a kfunc.
+		 */
+		btf = btf_get_module_btf(module);
+		if (IS_ERR_OR_NULL(btf)) {
+			verbose(env, "cannot find BTF from %s for kfunc used in pro/epilogue\n",
+				module_name(module));
+			return -EINVAL;
+		}
+
+		func_id = insns[i].imm;
+		if (btf_is_vmlinux(btf) &&
+		    btf_id_set_contains(&special_kfunc_set, func_id)) {
+			verbose(env, "pro/epilogue cannot use special kfunc\n");
+			btf_put(btf);
+			return -EINVAL;
+		}
+
+		err = find_kfunc_desc_btf_offset(env, btf, module, &offset);
+		btf_put(btf);
+		if (err)
+			return err;
+
+		insns[i].off = offset;
+		err = add_kfunc_call(env, func_id, offset);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 /* convert load instructions that access fields of a context type into a
  * sequence of instructions that access fields of the underlying structure:
  *     struct __sk_buff    -> struct sk_buff
@@ -19612,21 +19711,27 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
 	struct bpf_subprog_info *subprogs = env->subprog_info;
 	const struct bpf_verifier_ops *ops = env->ops;
-	int i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
+	int err, i, cnt, size, ctx_field_size, delta = 0, epilogue_cnt = 0;
 	const int insn_cnt = env->prog->len;
 	struct bpf_insn insn_buf[16], epilogue_buf[16], *insn;
 	u32 target_size, size_default, off;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
 	bool is_narrower_load;
+	struct module *module;
 
 	if (ops->gen_epilogue) {
+		module = NULL;
 		epilogue_cnt = ops->gen_epilogue(epilogue_buf, env->prog,
-						 -(subprogs[0].stack_depth + 8), NULL);
+						 -(subprogs[0].stack_depth + 8), &module);
 		if (epilogue_cnt >= ARRAY_SIZE(epilogue_buf)) {
 			verbose(env, "bpf verifier is misconfigured\n");
 			return -EINVAL;
 		} else if (epilogue_cnt) {
+			err = fixup_pro_epilogue_kfunc(env, epilogue_buf, epilogue_cnt, module);
+			if (err)
+				return err;
+
 			/* Save the ARG_PTR_TO_CTX for the epilogue to use */
 			cnt = 0;
 			subprogs[0].stack_depth += 8;
@@ -19646,12 +19751,17 @@  static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			verbose(env, "bpf verifier is misconfigured\n");
 			return -EINVAL;
 		}
+		module = NULL;
 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
-					env->prog, NULL);
+					env->prog, &module);
 		if (cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose(env, "bpf verifier is misconfigured\n");
 			return -EINVAL;
 		} else if (cnt) {
+			err = fixup_pro_epilogue_kfunc(env, insn_buf, cnt, module);
+			if (err)
+				return err;
+
 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
 			if (!new_prog)
 				return -ENOMEM;