diff mbox series

[bpf-next,v2,2/2] bpf: Add arm64 JIT support for bpf_addr_space_cast instruction.

Message ID 20240321153102.103832-3-puranjay12@gmail.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf,arm64: Add support for BPF Arena | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 9 maintainers not CCed: linux-kselftest@vger.kernel.org nathan@kernel.org justinstitt@google.com ndesaulniers@google.com shuah@kernel.org morbo@google.com llvm@lists.linux.dev mykolal@fb.com linux-arm-kernel@lists.infradead.org
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 69 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc

Commit Message

Puranjay Mohan March 21, 2024, 3:31 p.m. UTC
LLVM generates bpf_addr_space_cast instruction while translating
pointers between native (zero) address space and
__attribute__((address_space(N))). The addr_space=1 is reserved as
bpf_arena address space.

rY = addr_space_cast(rX, 0, 1) is processed by the verifier and
converted to normal 32-bit move: wX = wY

rY = addr_space_cast(rX, 1, 0) has to be converted by JIT:

Here I explain using symbolic language what the JIT is supposed to do:
We have:
	src = [src_upper32][src_lower32] // 64 bit src kernel pointer
	uvm = [uvm_upper32][uvm_lower32] // 64 bit user_vm_start

The JIT has to make the dst reg like following
	dst = [uvm_upper32][src_lower32] // if src_lower32 != 0
	dst = [00000000000][00000000000] // if src_lower32 == 0

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
---
 arch/arm64/net/bpf_jit.h                     |  1 +
 arch/arm64/net/bpf_jit_comp.c                | 35 ++++++++++++++++++++
 tools/testing/selftests/bpf/DENYLIST.aarch64 |  2 --
 3 files changed, 36 insertions(+), 2 deletions(-)

Comments

Xu Kuohai March 23, 2024, 7:19 a.m. UTC | #1
On 3/21/2024 11:31 PM, Puranjay Mohan wrote:
> LLVM generates bpf_addr_space_cast instruction while translating
> pointers between native (zero) address space and
> __attribute__((address_space(N))). The addr_space=1 is reserved as
> bpf_arena address space.
> 
> rY = addr_space_cast(rX, 0, 1) is processed by the verifier and
> converted to normal 32-bit move: wX = wY
> 
> rY = addr_space_cast(rX, 1, 0) has to be converted by JIT:
> 
> Here I explain using symbolic language what the JIT is supposed to do:
> We have:
> 	src = [src_upper32][src_lower32] // 64 bit src kernel pointer
> 	uvm = [uvm_upper32][uvm_lower32] // 64 bit user_vm_start
> 
> The JIT has to make the dst reg like following
> 	dst = [uvm_upper32][src_lower32] // if src_lower32 != 0
> 	dst = [00000000000][00000000000] // if src_lower32 == 0
> 
> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
> ---
>   arch/arm64/net/bpf_jit.h                     |  1 +
>   arch/arm64/net/bpf_jit_comp.c                | 35 ++++++++++++++++++++
>   tools/testing/selftests/bpf/DENYLIST.aarch64 |  2 --
>   3 files changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
> index 23b1b34db088..813c3c428fde 100644
> --- a/arch/arm64/net/bpf_jit.h
> +++ b/arch/arm64/net/bpf_jit.h
> @@ -238,6 +238,7 @@
>   #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
>   #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
>   #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
> +#define A64_RORV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, RORV)
>   
>   /* Data-processing (3 source) */
>   /* Rd = Ra + Rn * Rm */
> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index b9b5febe64f0..37c94ebd06b2 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -82,6 +82,7 @@ struct jit_ctx {
>   	__le32 *ro_image;
>   	u32 stack_size;
>   	int fpb_offset;
> +	u64 user_vm_start;
>   };
>   
>   struct bpf_plt {
> @@ -868,6 +869,34 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
>   	/* dst = src */
>   	case BPF_ALU | BPF_MOV | BPF_X:

is it legal to encode BPF_ADDR_SPACE_CAST with BPF_ALU?

>   	case BPF_ALU64 | BPF_MOV | BPF_X:
> +		if (insn->off == BPF_ADDR_SPACE_CAST &&
> +		    insn->imm == 1U << 16) {
> +			/* Zero out tmp2 */
> +			emit(A64_EOR(1, tmp2, tmp2, tmp2), ctx);
> +
> +			/* Move lo_32_bits(src) to dst */
> +			if (dst != src)
> +				emit(A64_MOV(0, dst, src), ctx);
> +
> +			/* Logical shift left by 32 bits */
> +			emit(A64_LSL(1, dst, dst, 32), ctx);
> +
> +			/* Get upper 32 bits of user_vm_start in tmp */
> +			emit_a64_mov_i(0, tmp, ctx->user_vm_start >> 32, ctx);
> +
> +			/* dst |= up_32_bits(user_vm_start) */
> +			emit(A64_ORR(1, dst, dst, tmp), ctx);
> +
> +			/* Rotate by 32 bits to get final result */
> +			emit_a64_mov_i(0, tmp, 32, ctx);
> +			emit(A64_RORV(1, dst, dst, tmp), ctx);
> +
> +			/* If lo_32_bits(dst) == 0, set dst = tmp2(0) */
> +			emit(A64_CBZ(0, dst, 2), ctx);
> +			emit(A64_MOV(1, tmp2, dst), ctx);
> +			emit(A64_MOV(1, dst, tmp2), ctx);

seems we could simplify it to:

emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
emit(A64_LSL(1, dst, dst, 32), ctx);
emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
emit(A64_CBZ(1, tmp, 2), ctx);
emit(A64_ORR(1, tmp, dst, tmp), ctx);
emit(A64_MOV(1, dst, tmp), ctx);

> +		break;

not aligned

> +		}
>   		switch (insn->off) {
>   		case 0:
>   			emit(A64_MOV(is64, dst, src), ctx);
> @@ -1690,6 +1719,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
>   	}
>   
>   	ctx.fpb_offset = find_fpb_offset(prog);
> +	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
>   
>   	/*
>   	 * 1. Initial fake pass to compute ctx->idx and ctx->offset.
> @@ -2511,6 +2541,11 @@ bool bpf_jit_supports_exceptions(void)
>   	return true;
>   }
>   
> +bool bpf_jit_supports_arena(void)
> +{
> +	return true;
> +}
> +
>   void bpf_jit_free(struct bpf_prog *prog)
>   {
>   	if (prog->jited) {
> diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
> index d8ade15e2789..0445ac38bc07 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.aarch64
> +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
> @@ -10,5 +10,3 @@ fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_mu
>   fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
>   fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
>   missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
> -verifier_arena                                   # JIT does not support arena
> -arena_htab                                       # JIT does not support arena
Puranjay Mohan March 23, 2024, 10:21 a.m. UTC | #2
Xu Kuohai <xukuohai@huaweicloud.com> writes:

> On 3/21/2024 11:31 PM, Puranjay Mohan wrote:
>> LLVM generates bpf_addr_space_cast instruction while translating
>> pointers between native (zero) address space and
>> __attribute__((address_space(N))). The addr_space=1 is reserved as
>> bpf_arena address space.
>> 
>> rY = addr_space_cast(rX, 0, 1) is processed by the verifier and
>> converted to normal 32-bit move: wX = wY
>> 
>> rY = addr_space_cast(rX, 1, 0) has to be converted by JIT:
>> 
>> Here I explain using symbolic language what the JIT is supposed to do:
>> We have:
>> 	src = [src_upper32][src_lower32] // 64 bit src kernel pointer
>> 	uvm = [uvm_upper32][uvm_lower32] // 64 bit user_vm_start
>> 
>> The JIT has to make the dst reg like following
>> 	dst = [uvm_upper32][src_lower32] // if src_lower32 != 0
>> 	dst = [00000000000][00000000000] // if src_lower32 == 0
>> 
>> Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
>> ---
>>   arch/arm64/net/bpf_jit.h                     |  1 +
>>   arch/arm64/net/bpf_jit_comp.c                | 35 ++++++++++++++++++++
>>   tools/testing/selftests/bpf/DENYLIST.aarch64 |  2 --
>>   3 files changed, 36 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
>> index 23b1b34db088..813c3c428fde 100644
>> --- a/arch/arm64/net/bpf_jit.h
>> +++ b/arch/arm64/net/bpf_jit.h
>> @@ -238,6 +238,7 @@
>>   #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
>>   #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
>>   #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
>> +#define A64_RORV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, RORV)
>>   
>>   /* Data-processing (3 source) */
>>   /* Rd = Ra + Rn * Rm */
>> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
>> index b9b5febe64f0..37c94ebd06b2 100644
>> --- a/arch/arm64/net/bpf_jit_comp.c
>> +++ b/arch/arm64/net/bpf_jit_comp.c
>> @@ -82,6 +82,7 @@ struct jit_ctx {
>>   	__le32 *ro_image;
>>   	u32 stack_size;
>>   	int fpb_offset;
>> +	u64 user_vm_start;
>>   };
>>   
>>   struct bpf_plt {
>> @@ -868,6 +869,34 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
>>   	/* dst = src */
>>   	case BPF_ALU | BPF_MOV | BPF_X:
>
> is it legal to encode BPF_ADDR_SPACE_CAST with BPF_ALU?

No, the verifier will reject BPF_ALU MOV that has off=BPF_ADDR_SPACE_CAST.
So, a check is not required but I will add BPF_CLASS(code) == BPF_ALU64 below
in the next version.

>>   	case BPF_ALU64 | BPF_MOV | BPF_X:
>> +		if (insn->off == BPF_ADDR_SPACE_CAST &&
>> +		    insn->imm == 1U << 16) {
>> +			/* Zero out tmp2 */
>> +			emit(A64_EOR(1, tmp2, tmp2, tmp2), ctx);
>> +
>> +			/* Move lo_32_bits(src) to dst */
>> +			if (dst != src)
>> +				emit(A64_MOV(0, dst, src), ctx);
>> +
>> +			/* Logical shift left by 32 bits */
>> +			emit(A64_LSL(1, dst, dst, 32), ctx);
>> +
>> +			/* Get upper 32 bits of user_vm_start in tmp */
>> +			emit_a64_mov_i(0, tmp, ctx->user_vm_start >> 32, ctx);
>> +
>> +			/* dst |= up_32_bits(user_vm_start) */
>> +			emit(A64_ORR(1, dst, dst, tmp), ctx);
>> +
>> +			/* Rotate by 32 bits to get final result */
>> +			emit_a64_mov_i(0, tmp, 32, ctx);
>> +			emit(A64_RORV(1, dst, dst, tmp), ctx);
>> +
>> +			/* If lo_32_bits(dst) == 0, set dst = tmp2(0) */
>> +			emit(A64_CBZ(0, dst, 2), ctx);
>> +			emit(A64_MOV(1, tmp2, dst), ctx);
>> +			emit(A64_MOV(1, dst, tmp2), ctx);
>
> seems we could simplify it to:
>
> emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
> emit(A64_LSL(1, dst, dst, 32), ctx);
> emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
> emit(A64_CBZ(1, tmp, 2), ctx);
> emit(A64_ORR(1, tmp, dst, tmp), ctx);
> emit(A64_MOV(1, dst, tmp), ctx);

Thanks, I will use this in the next version. I will move the
emit(A64_MOV(0, tmp, src), ctx); to the top so if the src and dst are same
then src will be moved to tmp before it is overwritten through dst:

emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
emit(A64_LSL(1, dst, dst, 32), ctx);
emit(A64_CBZ(1, tmp, 2), ctx);
emit(A64_ORR(1, tmp, dst, tmp), ctx);
emit(A64_MOV(1, dst, tmp), ctx);

>> +		break;
>
> not aligned

Will fix it in the next version.

Thanks for the feedback.

Puranjay
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 23b1b34db088..813c3c428fde 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -238,6 +238,7 @@ 
 #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV)
 #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV)
 #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
+#define A64_RORV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, RORV)
 
 /* Data-processing (3 source) */
 /* Rd = Ra + Rn * Rm */
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index b9b5febe64f0..37c94ebd06b2 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -82,6 +82,7 @@  struct jit_ctx {
 	__le32 *ro_image;
 	u32 stack_size;
 	int fpb_offset;
+	u64 user_vm_start;
 };
 
 struct bpf_plt {
@@ -868,6 +869,34 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	/* dst = src */
 	case BPF_ALU | BPF_MOV | BPF_X:
 	case BPF_ALU64 | BPF_MOV | BPF_X:
+		if (insn->off == BPF_ADDR_SPACE_CAST &&
+		    insn->imm == 1U << 16) {
+			/* Zero out tmp2 */
+			emit(A64_EOR(1, tmp2, tmp2, tmp2), ctx);
+
+			/* Move lo_32_bits(src) to dst */
+			if (dst != src)
+				emit(A64_MOV(0, dst, src), ctx);
+
+			/* Logical shift left by 32 bits */
+			emit(A64_LSL(1, dst, dst, 32), ctx);
+
+			/* Get upper 32 bits of user_vm_start in tmp */
+			emit_a64_mov_i(0, tmp, ctx->user_vm_start >> 32, ctx);
+
+			/* dst |= up_32_bits(user_vm_start) */
+			emit(A64_ORR(1, dst, dst, tmp), ctx);
+
+			/* Rotate by 32 bits to get final result */
+			emit_a64_mov_i(0, tmp, 32, ctx);
+			emit(A64_RORV(1, dst, dst, tmp), ctx);
+
+			/* If lo_32_bits(dst) == 0, set dst = tmp2(0) */
+			emit(A64_CBZ(0, dst, 2), ctx);
+			emit(A64_MOV(1, tmp2, dst), ctx);
+			emit(A64_MOV(1, dst, tmp2), ctx);
+		break;
+		}
 		switch (insn->off) {
 		case 0:
 			emit(A64_MOV(is64, dst, src), ctx);
@@ -1690,6 +1719,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	}
 
 	ctx.fpb_offset = find_fpb_offset(prog);
+	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
 
 	/*
 	 * 1. Initial fake pass to compute ctx->idx and ctx->offset.
@@ -2511,6 +2541,11 @@  bool bpf_jit_supports_exceptions(void)
 	return true;
 }
 
+bool bpf_jit_supports_arena(void)
+{
+	return true;
+}
+
 void bpf_jit_free(struct bpf_prog *prog)
 {
 	if (prog->jited) {
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index d8ade15e2789..0445ac38bc07 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -10,5 +10,3 @@  fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_mu
 fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
 missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
-verifier_arena                                   # JIT does not support arena
-arena_htab                                       # JIT does not support arena