diff mbox series

[v6,bpf-next,5/7] bpf: Add arch_bpf_trampoline_size()

Message ID 20231201190654.1233153-6-song@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Allocate bpf trampoline on bpf_prog_pack | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success SINGLE THREAD; Generated files up to date; no warnings/errors;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2657 this patch: 2657
netdev/cc_maintainers warning 28 maintainers not CCed: gor@linux.ibm.com will@kernel.org svens@linux.ibm.com paul.walmsley@sifive.com hca@linux.ibm.com bp@alien8.de borntraeger@linux.ibm.com bjorn@kernel.org dsahern@kernel.org yonghong.song@linux.dev linux-s390@vger.kernel.org aou@eecs.berkeley.edu haoluo@google.com zlim.lnx@gmail.com x86@kernel.org kpsingh@kernel.org dave.hansen@linux.intel.com palmer@dabbelt.com linux-riscv@lists.infradead.org john.fastabend@gmail.com mingo@redhat.com catalin.marinas@arm.com hpa@zytor.com agordeev@linux.ibm.com linux-arm-kernel@lists.infradead.org sdf@google.com tglx@linutronix.de netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 1276 this patch: 1276
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2734 this patch: 2734
netdev/checkpatch warning WARNING: ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-llvm-16 / build / build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-16 / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-llvm-16 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-llvm-16 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-16 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-16 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

Commit Message

Song Liu Dec. 1, 2023, 7:06 p.m. UTC
This helper will be used to calculate the size of the trampoline before
allocating the memory.

arch_prepare_bpf_trampoline() for arm64 and riscv64 can use
arch_bpf_trampoline_size() to check the trampoline fits in the image.

OTOH, arch_prepare_bpf_trampoline() for s390 has to call the JIT process
twice, so it cannot use arch_bpf_trampoline_size().

Signed-off-by: Song Liu <song@kernel.org>
Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Tested-by: Ilya Leoshkevich <iii@linux.ibm.com>  # on s390x
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Björn Töpel <bjorn@rivosinc.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com> # on riscv
---
 arch/arm64/net/bpf_jit_comp.c   | 56 ++++++++++++++++++++++++---------
 arch/riscv/net/bpf_jit_comp64.c | 22 ++++++++++---
 arch/s390/net/bpf_jit_comp.c    | 56 ++++++++++++++++++++-------------
 arch/x86/net/bpf_jit_comp.c     | 37 +++++++++++++++++++---
 include/linux/bpf.h             |  2 ++
 kernel/bpf/trampoline.c         |  6 ++++
 6 files changed, 133 insertions(+), 46 deletions(-)

Comments

Alexei Starovoitov Dec. 6, 2023, 7:33 p.m. UTC | #1
On Fri, Dec 01, 2023 at 11:06:52AM -0800, Song Liu wrote:
> +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
> +                          struct bpf_tramp_links *tlinks, void *func_addr)
> +{
> +     struct bpf_tramp_image im;
> +     void *image;
> +     int ret;
> +
> +     /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
> +      * This will NOT cause fragmentation in direct map, as we do not
> +      * call set_memory_*() on this buffer.
> +      */
> +     image = bpf_jit_alloc_exec(PAGE_SIZE);
> +     if (!image)
> +             return -ENOMEM;
> +
> +     ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, m, flags,
> +                                         tlinks, func_addr);
> +     bpf_jit_free_exec(image);
> +     return ret;
> +}

There is no need to allocate an executable page just to compute the size, right?
Instead of bpf_jit_alloc_exec() it should work with alloc_page() ?

Similar in patch 7:
int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void
*image, void *image_end,
                                const struct btf_func_model *m, u32 flags,
                                struct bpf_tramp_links *tlinks,
                                void *func_addr)
 {
-       return __arch_prepare_bpf_trampoline(im, image, image_end, m,
flags, tlinks, func_addr);
+       void *rw_image, *tmp;
+       int ret;
+       u32 size = image_end - image;
+
+       rw_image = bpf_jit_alloc_exec(size);
+       if (!rw_image)
+               return -ENOMEM;
+
+       ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image +
size, image, m,
+                                           flags, tlinks, func_addr);
+       if (ret < 0)
+               goto out;
+
+       tmp = bpf_arch_text_copy(image, rw_image, size);
+       if (IS_ERR(tmp))
+               ret = PTR_ERR(tmp);
+out:
+       bpf_jit_free_exec(rw_image);
+       return ret;
 }

In the above only 'image' has to be ROX. rw_image can be allocated
with kvmalloc().
Just like it's done in the main loop of JIT via
bpf_jit_binary_pack_alloc() -> kvmalloc() -> rw_header.

pw-bot: cr
Song Liu Dec. 6, 2023, 9:18 p.m. UTC | #2
On Wed, Dec 6, 2023 at 11:34 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Dec 01, 2023 at 11:06:52AM -0800, Song Liu wrote:
> > +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
> > +                          struct bpf_tramp_links *tlinks, void *func_addr)
> > +{
> > +     struct bpf_tramp_image im;
> > +     void *image;
> > +     int ret;
> > +
> > +     /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
> > +      * This will NOT cause fragmentation in direct map, as we do not
> > +      * call set_memory_*() on this buffer.
> > +      */
> > +     image = bpf_jit_alloc_exec(PAGE_SIZE);
> > +     if (!image)
> > +             return -ENOMEM;
> > +
> > +     ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, m, flags,
> > +                                         tlinks, func_addr);
> > +     bpf_jit_free_exec(image);
> > +     return ret;
> > +}
>
> There is no need to allocate an executable page just to compute the size, right?
> Instead of bpf_jit_alloc_exec() it should work with alloc_page() ?

We can use kvmalloc in patch 7. But we need bpf_jit_alloc_exec(). The reason is
__arch_prepare_bpf_trampoline() assumes "image" falls in certain memory ranges.
If we use kvmalloc here, we may fail those checks, for example is_simm32() in
emit_patch().

In patch 7, we separate rw_image from image, so rw_image do not need to be in
the range.

Thanks,
Song


>
> Similar in patch 7:
> int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void
> *image, void *image_end,
>                                 const struct btf_func_model *m, u32 flags,
>                                 struct bpf_tramp_links *tlinks,
>                                 void *func_addr)
>  {
> -       return __arch_prepare_bpf_trampoline(im, image, image_end, m,
> flags, tlinks, func_addr);
> +       void *rw_image, *tmp;
> +       int ret;
> +       u32 size = image_end - image;
> +
> +       rw_image = bpf_jit_alloc_exec(size);
> +       if (!rw_image)
> +               return -ENOMEM;
> +
> +       ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image +
> size, image, m,
> +                                           flags, tlinks, func_addr);
> +       if (ret < 0)
> +               goto out;
> +
> +       tmp = bpf_arch_text_copy(image, rw_image, size);
> +       if (IS_ERR(tmp))
> +               ret = PTR_ERR(tmp);
> +out:
> +       bpf_jit_free_exec(rw_image);
> +       return ret;
>  }
>
> In the above only 'image' has to be ROX. rw_image can be allocated
> with kvmalloc().
> Just like it's done in the main loop of JIT via
> bpf_jit_binary_pack_alloc() -> kvmalloc() -> rw_header.
>
> pw-bot: cr
Alexei Starovoitov Dec. 6, 2023, 9:31 p.m. UTC | #3
On Wed, Dec 6, 2023 at 1:18 PM Song Liu <song@kernel.org> wrote:
>
> On Wed, Dec 6, 2023 at 11:34 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Fri, Dec 01, 2023 at 11:06:52AM -0800, Song Liu wrote:
> > > +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
> > > +                          struct bpf_tramp_links *tlinks, void *func_addr)
> > > +{
> > > +     struct bpf_tramp_image im;
> > > +     void *image;
> > > +     int ret;
> > > +
> > > +     /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
> > > +      * This will NOT cause fragmentation in direct map, as we do not
> > > +      * call set_memory_*() on this buffer.
> > > +      */
> > > +     image = bpf_jit_alloc_exec(PAGE_SIZE);
> > > +     if (!image)
> > > +             return -ENOMEM;
> > > +
> > > +     ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, m, flags,
> > > +                                         tlinks, func_addr);
> > > +     bpf_jit_free_exec(image);
> > > +     return ret;
> > > +}
> >
> > There is no need to allocate an executable page just to compute the size, right?
> > Instead of bpf_jit_alloc_exec() it should work with alloc_page() ?
>
> We can use kvmalloc in patch 7. But we need bpf_jit_alloc_exec(). The reason is
> __arch_prepare_bpf_trampoline() assumes "image" falls in certain memory ranges.
> If we use kvmalloc here, we may fail those checks, for example is_simm32() in
> emit_patch().

Ahh. Makes sense. Please add a comment then.
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index d81b886ea4df..a6671253b7ed 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -2026,18 +2026,10 @@  static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
 	return ctx->idx;
 }
 
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
-				void *image_end, const struct btf_func_model *m,
-				u32 flags, struct bpf_tramp_links *tlinks,
-				void *func_addr)
+static int btf_func_model_nregs(const struct btf_func_model *m)
 {
-	int i, ret;
 	int nregs = m->nr_args;
-	int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE;
-	struct jit_ctx ctx = {
-		.image = NULL,
-		.idx = 0,
-	};
+	int i;
 
 	/* extra registers needed for struct argument */
 	for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
@@ -2046,19 +2038,53 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 			nregs += (m->arg_size[i] + 7) / 8 - 1;
 	}
 
+	return nregs;
+}
+
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+			     struct bpf_tramp_links *tlinks, void *func_addr)
+{
+	struct jit_ctx ctx = {
+		.image = NULL,
+		.idx = 0,
+	};
+	struct bpf_tramp_image im;
+	int nregs, ret;
+
+	nregs = btf_func_model_nregs(m);
 	/* the first 8 registers are used for arguments */
 	if (nregs > 8)
 		return -ENOTSUPP;
 
-	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
+	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
 	if (ret < 0)
 		return ret;
 
-	if (ret > max_insns)
-		return -EFBIG;
+	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
+}
 
-	ctx.image = image;
-	ctx.idx = 0;
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
+				void *image_end, const struct btf_func_model *m,
+				u32 flags, struct bpf_tramp_links *tlinks,
+				void *func_addr)
+{
+	int ret, nregs;
+	struct jit_ctx ctx = {
+		.image = image,
+		.idx = 0,
+	};
+
+	nregs = btf_func_model_nregs(m);
+	/* the first 8 registers are used for arguments */
+	if (nregs > 8)
+		return -ENOTSUPP;
+
+	ret = arch_bpf_trampoline_size(m, flags, tlinks, func_addr);
+	if (ret < 0)
+		return ret;
+
+	if (ret > ((long)image_end - (long)image))
+		return -EFBIG;
 
 	jit_fill_hole(image, (unsigned int)(image_end - image));
 	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 8581693e62d3..35747fafde57 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -1029,6 +1029,21 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	return ret;
 }
 
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+			     struct bpf_tramp_links *tlinks, void *func_addr)
+{
+	struct bpf_tramp_image im;
+	struct rv_jit_context ctx;
+	int ret;
+
+	ctx.ninsns = 0;
+	ctx.insns = NULL;
+	ctx.ro_insns = NULL;
+	ret = __arch_prepare_bpf_trampoline(&im, m, tlinks, func_addr, flags, &ctx);
+
+	return ret < 0 ? ret : ninsns_rvoff(ctx.ninsns);
+}
+
 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 				void *image_end, const struct btf_func_model *m,
 				u32 flags, struct bpf_tramp_links *tlinks,
@@ -1037,14 +1052,11 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 	int ret;
 	struct rv_jit_context ctx;
 
-	ctx.ninsns = 0;
-	ctx.insns = NULL;
-	ctx.ro_insns = NULL;
-	ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
+	ret = arch_bpf_trampoline_size(im, m, flags, tlinks, func_addr);
 	if (ret < 0)
 		return ret;
 
-	if (ninsns_rvoff(ret) > (long)image_end - (long)image)
+	if (ret > (long)image_end - (long)image)
 		return -EFBIG;
 
 	ctx.ninsns = 0;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bf06b7283f0c..cc129617480a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -2637,6 +2637,21 @@  static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
 	return 0;
 }
 
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+			     struct bpf_tramp_links *tlinks, void *orig_call)
+{
+	struct bpf_tramp_image im;
+	struct bpf_tramp_jit tjit;
+	int ret;
+
+	memset(&tjit, 0, sizeof(tjit));
+
+	ret = __arch_prepare_bpf_trampoline(&im, &tjit, m, flags,
+					    tlinks, orig_call);
+
+	return ret < 0 ? ret : tjit.common.prg;
+}
+
 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 				void *image_end, const struct btf_func_model *m,
 				u32 flags, struct bpf_tramp_links *tlinks,
@@ -2644,30 +2659,27 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
 {
 	struct bpf_tramp_jit tjit;
 	int ret;
-	int i;
 
-	for (i = 0; i < 2; i++) {
-		if (i == 0) {
-			/* Compute offsets, check whether the code fits. */
-			memset(&tjit, 0, sizeof(tjit));
-		} else {
-			/* Generate the code. */
-			tjit.common.prg = 0;
-			tjit.common.prg_buf = image;
-		}
-		ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
-						    tlinks, func_addr);
-		if (ret < 0)
-			return ret;
-		if (tjit.common.prg > (char *)image_end - (char *)image)
-			/*
-			 * Use the same error code as for exceeding
-			 * BPF_MAX_TRAMP_LINKS.
-			 */
-			return -E2BIG;
-	}
+	/* Compute offsets, check whether the code fits. */
+	memset(&tjit, 0, sizeof(tjit));
+	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+					    tlinks, func_addr);
+
+	if (ret < 0)
+		return ret;
+	if (tjit.common.prg > (char *)image_end - (char *)image)
+		/*
+		 * Use the same error code as for exceeding
+		 * BPF_MAX_TRAMP_LINKS.
+		 */
+		return -E2BIG;
+
+	tjit.common.prg = 0;
+	tjit.common.prg_buf = image;
+	ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
+					    tlinks, func_addr);
 
-	return tjit.common.prg;
+	return ret < 0 ? ret : tjit.common.prg;
 }
 
 bool bpf_jit_supports_subprog_tailcalls(void)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5f7528cac344..561530ef2cdb 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -2422,10 +2422,10 @@  static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
  * add rsp, 8                      // skip eth_type_trans's frame
  * ret                             // return to its caller
  */
-int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
-				const struct btf_func_model *m, u32 flags,
-				struct bpf_tramp_links *tlinks,
-				void *func_addr)
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
+					 const struct btf_func_model *m, u32 flags,
+					 struct bpf_tramp_links *tlinks,
+					 void *func_addr)
 {
 	int i, ret, nr_regs = m->nr_args, stack_size = 0;
 	int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
@@ -2678,6 +2678,35 @@  int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	return ret;
 }
 
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
+				const struct btf_func_model *m, u32 flags,
+				struct bpf_tramp_links *tlinks,
+				void *func_addr)
+{
+	return __arch_prepare_bpf_trampoline(im, image, image_end, m, flags, tlinks, func_addr);
+}
+
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+			     struct bpf_tramp_links *tlinks, void *func_addr)
+{
+	struct bpf_tramp_image im;
+	void *image;
+	int ret;
+
+	/* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
+	 * This will NOT cause fragmentation in direct map, as we do not
+	 * call set_memory_*() on this buffer.
+	 */
+	image = bpf_jit_alloc_exec(PAGE_SIZE);
+	if (!image)
+		return -ENOMEM;
+
+	ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, m, flags,
+					    tlinks, func_addr);
+	bpf_jit_free_exec(image);
+	return ret;
+}
+
 static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf)
 {
 	u8 *jg_reloc, *prog = *pprog;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dc3c6275576..93f6bc382f8e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1091,6 +1091,8 @@  void *arch_alloc_bpf_trampoline(unsigned int size);
 void arch_free_bpf_trampoline(void *image, unsigned int size);
 void arch_protect_bpf_trampoline(void *image, unsigned int size);
 void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+			     struct bpf_tramp_links *tlinks, void *func_addr);
 
 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 					     struct bpf_tramp_run_ctx *run_ctx);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index affbcbf7e76e..b553cbd89e55 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -1072,6 +1072,12 @@  void __weak arch_unprotect_bpf_trampoline(void *image, unsigned int size)
 	set_memory_rw((long)image, 1);
 }
 
+int __weak arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+				    struct bpf_tramp_links *tlinks, void *func_addr)
+{
+	return -ENOTSUPP;
+}
+
 static int __init init_trampolines(void)
 {
 	int i;