diff mbox series

[v2] arm64: bpf: Fix branch offset in JIT

Message ID 20200914160355.19179-1-ilias.apalodimas@linaro.org (mailing list archive)
State New, archived
Headers show
Series [v2] arm64: bpf: Fix branch offset in JIT | expand

Commit Message

Ilias Apalodimas Sept. 14, 2020, 4:03 p.m. UTC
Running the eBPF test_verifier leads to random errors looking like this:

[ 6525.735488] Unexpected kernel BRK exception at EL1
[ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
[ 6525.741609] Modules linked in: nls_utf8 cifs libdes libarc4 dns_resolver fscache binfmt_misc nls_ascii nls_cp437 vfat fat aes_ce_blk crypto_simd cryptd aes_ce_cipher ghash_ce gf128mul efi_pstore sha2_ce sha256_arm64 sha1_ce evdev efivars efivarfs ip_tables x_tables autofs4 btrfs blake2b_generic xor xor_neon zstd_compress raid6_pq libcrc32c crc32c_generic ahci xhci_pci libahci xhci_hcd igb libata i2c_algo_bit nvme realtek usbcore nvme_core scsi_mod t10_pi netsec mdio_devres of_mdio gpio_keys fixed_phy libphy gpio_mb86s7x
[ 6525.787760] CPU: 3 PID: 7881 Comm: test_verifier Tainted: G        W         5.9.0-rc1+ #47
[ 6525.796111] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun  6 2020
[ 6525.804812] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
[ 6525.810390] pc : bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
[ 6525.815613] lr : bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
[ 6525.820832] sp : ffff8000130cbb80
[ 6525.824141] x29: ffff8000130cbbb0 x28: 0000000000000000
[ 6525.829451] x27: 000005ef6fcbf39b x26: 0000000000000000
[ 6525.834759] x25: ffff8000130cbb80 x24: ffff800011dc7038
[ 6525.840067] x23: ffff8000130cbd00 x22: ffff0008f624d080
[ 6525.845375] x21: 0000000000000001 x20: ffff800011dc7000
[ 6525.850682] x19: 0000000000000000 x18: 0000000000000000
[ 6525.855990] x17: 0000000000000000 x16: 0000000000000000
[ 6525.861298] x15: 0000000000000000 x14: 0000000000000000
[ 6525.866606] x13: 0000000000000000 x12: 0000000000000000
[ 6525.871913] x11: 0000000000000001 x10: ffff8000000a660c
[ 6525.877220] x9 : ffff800010951810 x8 : ffff8000130cbc38
[ 6525.882528] x7 : 0000000000000000 x6 : 0000009864cfa881
[ 6525.887836] x5 : 00ffffffffffffff x4 : 002880ba1a0b3e9f
[ 6525.893144] x3 : 0000000000000018 x2 : ffff8000000a4374
[ 6525.898452] x1 : 000000000000000a x0 : 0000000000000009
[ 6525.903760] Call trace:
[ 6525.906202]  bpf_prog_c3d01833289b6311_F+0xc8/0x9f4
[ 6525.911076]  bpf_prog_d53bb52e3f4483f9_F+0x38/0xc8c
[ 6525.915957]  bpf_dispatcher_xdp_func+0x14/0x20
[ 6525.920398]  bpf_test_run+0x70/0x1b0
[ 6525.923969]  bpf_prog_test_run_xdp+0xec/0x190
[ 6525.928326]  __do_sys_bpf+0xc88/0x1b28
[ 6525.932072]  __arm64_sys_bpf+0x24/0x30
[ 6525.935820]  el0_svc_common.constprop.0+0x70/0x168
[ 6525.940607]  do_el0_svc+0x28/0x88
[ 6525.943920]  el0_sync_handler+0x88/0x190
[ 6525.947838]  el0_sync+0x140/0x180
[ 6525.951154] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
[ 6525.957249] ---[ end trace cecc3f93b14927e2 ]---

The reason is the offset[] creation and later usage while building
the eBPF body. The code currently omits the first instruction, since
build_insn() will increase our ctx->idx before saving it.
That was fine up until bounded eBPF loops were introduced. After that
introduction, offset[0] must be the offset of the end of prologue which
is the start of the 1st insn while, offset[n] holds the
offset of the end of n-th insn.

When "taken loop with back jump to 1st insn" test runs, it will
eventually call bpf2a64_offset(-1, 2, ctx). Since negative indexing is
permitted, the current outcome depends on the value stored in
ctx->offset[-1], which has nothing to do with our array.
If the value happens to be 0 the tests will work. If not this error
triggers.

7c2e988f400e ("bpf: fix x64 JIT code generation for jmp to 1st insn")
fixed an indentical bug on x86 when eBPF bounded loops were introduced.

So let's fix it by creating the ctx->offset[] correctly in the first
place and account for the first instruction while calculating the arm
instruction offsets.

Fixes: 2589726d12a1 ("bpf: introduce bounded loops")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Reported-by: Jiri Olsa <jolsa@kernel.org>
Co-developed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Co-developed-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
---
Changes since v1: 
 - Added Co-developed-by, Reported-by and Fixes tags correctly
 - Describe the expected context of ctx->offset[] in comments

 arch/arm64/net/bpf_jit_comp.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

Comments

Will Deacon Sept. 15, 2020, 1:11 p.m. UTC | #1
Hi Ilias,

On Mon, Sep 14, 2020 at 07:03:55PM +0300, Ilias Apalodimas wrote:
> Running the eBPF test_verifier leads to random errors looking like this:
> 
> [ 6525.735488] Unexpected kernel BRK exception at EL1
> [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP

Does this happen because we poison the BPF memory with BRK instructions?
Maybe we should look at using a special immediate so we can detect this,
rather than end up in the ptrace handler.

> diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> index f8912e45be7a..0974effff58c 100644
> --- a/arch/arm64/net/bpf_jit_comp.c
> +++ b/arch/arm64/net/bpf_jit_comp.c
> @@ -143,9 +143,13 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
>  	}
>  }
>  
> -static inline int bpf2a64_offset(int bpf_to, int bpf_from,
> +static inline int bpf2a64_offset(int bpf_insn, int off,
>  				 const struct jit_ctx *ctx)
>  {
> +	/* arm64 offset is relative to the branch instruction */
> +	int bpf_from = bpf_insn + 1;
> +	/* BPF JMP offset is relative to the next instruction */
> +	int bpf_to = bpf_insn + off + 1;
>  	int to = ctx->offset[bpf_to];
>  	/* -1 to account for the Branch instruction */
>  	int from = ctx->offset[bpf_from] - 1;

I think this is a bit confusing with all the variables. How about just
doing:

	/* BPF JMP offset is relative to the next BPF instruction */
	bpf_insn++;

	/*
	 * Whereas arm64 branch instructions encode the offset from the
	 * branch itself, so we must subtract 1 from the instruction offset.
	 */
	return ctx->offset[bpf_insn + off] - ctx->offset[bpf_insn] - 1;

> @@ -642,7 +646,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
>  
>  	/* JUMP off */
>  	case BPF_JMP | BPF_JA:
> -		jmp_offset = bpf2a64_offset(i + off, i, ctx);
> +		jmp_offset = bpf2a64_offset(i, off, ctx);
>  		check_imm26(jmp_offset);
>  		emit(A64_B(jmp_offset), ctx);
>  		break;
> @@ -669,7 +673,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
>  	case BPF_JMP32 | BPF_JSLE | BPF_X:
>  		emit(A64_CMP(is64, dst, src), ctx);
>  emit_cond_jmp:
> -		jmp_offset = bpf2a64_offset(i + off, i, ctx);
> +		jmp_offset = bpf2a64_offset(i, off, ctx);
>  		check_imm19(jmp_offset);
>  		switch (BPF_OP(code)) {
>  		case BPF_JEQ:
> @@ -912,18 +916,26 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
>  		const struct bpf_insn *insn = &prog->insnsi[i];
>  		int ret;
>  
> +		/*
> +		 * offset[0] offset of the end of prologue, start of the
> +		 * first insn.
> +		 * offset[x] - offset of the end of x insn.

So does offset[1] point at the last arm64 instruction for the first BPF
instruction, or does it point to the first arm64 instruction for the second
BPF instruction?

> +		 */
> +		if (ctx->image == NULL)
> +			ctx->offset[i] = ctx->idx;
> +
>  		ret = build_insn(insn, ctx, extra_pass);
>  		if (ret > 0) {
>  			i++;
>  			if (ctx->image == NULL)
> -				ctx->offset[i] = ctx->idx;
> +				ctx->offset[i] = ctx->offset[i - 1];

Does it matter that we set the offset for both halves of a 16-byte BPF
instruction? I think that's a change in behaviour here.

>  			continue;
>  		}
> -		if (ctx->image == NULL)
> -			ctx->offset[i] = ctx->idx;
>  		if (ret)
>  			return ret;
>  	}
> +	if (ctx->image == NULL)
> +		ctx->offset[i] = ctx->idx;

I think it would be cleared to set ctx->offset[0] before the for loop (with
a comment about what it is) and then change the for loop to iterate from 1
all the way to prog->len.

Will
Ilias Apalodimas Sept. 15, 2020, 1:53 p.m. UTC | #2
On Tue, Sep 15, 2020 at 02:11:03PM +0100, Will Deacon wrote:
> Hi Ilias,
> 
> On Mon, Sep 14, 2020 at 07:03:55PM +0300, Ilias Apalodimas wrote:
> > Running the eBPF test_verifier leads to random errors looking like this:
> > 
> > [ 6525.735488] Unexpected kernel BRK exception at EL1
> > [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
> 
> Does this happen because we poison the BPF memory with BRK instructions?
> Maybe we should look at using a special immediate so we can detect this,
> rather than end up in the ptrace handler.

As discussed offline this is what aarch64_insn_gen_branch_imm() will return for
offsets > 128M and yes replacing the handler with a more suitable message would 
be good.

> 
> > diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
> > index f8912e45be7a..0974effff58c 100644
> > --- a/arch/arm64/net/bpf_jit_comp.c
> > +++ b/arch/arm64/net/bpf_jit_comp.c
> > @@ -143,9 +143,13 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
> >  	}
> >  }
> >  
> > -static inline int bpf2a64_offset(int bpf_to, int bpf_from,
> > +static inline int bpf2a64_offset(int bpf_insn, int off,
> >  				 const struct jit_ctx *ctx)
> >  {
> > +	/* arm64 offset is relative to the branch instruction */
> > +	int bpf_from = bpf_insn + 1;
> > +	/* BPF JMP offset is relative to the next instruction */
> > +	int bpf_to = bpf_insn + off + 1;
> >  	int to = ctx->offset[bpf_to];
> >  	/* -1 to account for the Branch instruction */
> >  	int from = ctx->offset[bpf_from] - 1;
> 
> I think this is a bit confusing with all the variables. How about just
> doing:
> 
> 	/* BPF JMP offset is relative to the next BPF instruction */
> 	bpf_insn++;
> 
> 	/*
> 	 * Whereas arm64 branch instructions encode the offset from the
> 	 * branch itself, so we must subtract 1 from the instruction offset.
> 	 */
> 	return ctx->offset[bpf_insn + off] - ctx->offset[bpf_insn] - 1;
> 

Sure

> > @@ -642,7 +646,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
> >  
> >  	/* JUMP off */
> >  	case BPF_JMP | BPF_JA:
> > -		jmp_offset = bpf2a64_offset(i + off, i, ctx);
> > +		jmp_offset = bpf2a64_offset(i, off, ctx);
> >  		check_imm26(jmp_offset);
> >  		emit(A64_B(jmp_offset), ctx);
> >  		break;
> > @@ -669,7 +673,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
> >  	case BPF_JMP32 | BPF_JSLE | BPF_X:
> >  		emit(A64_CMP(is64, dst, src), ctx);
> >  emit_cond_jmp:
> > -		jmp_offset = bpf2a64_offset(i + off, i, ctx);
> > +		jmp_offset = bpf2a64_offset(i, off, ctx);
> >  		check_imm19(jmp_offset);
> >  		switch (BPF_OP(code)) {
> >  		case BPF_JEQ:
> > @@ -912,18 +916,26 @@ static int build_body(struct jit_ctx *ctx, bool extra_pass)
> >  		const struct bpf_insn *insn = &prog->insnsi[i];
> >  		int ret;
> >  
> > +		/*
> > +		 * offset[0] offset of the end of prologue, start of the
> > +		 * first insn.
> > +		 * offset[x] - offset of the end of x insn.
> 
> So does offset[1] point at the last arm64 instruction for the first BPF
> instruction, or does it point to the first arm64 instruction for the second
> BPF instruction?
> 

Right this isn't exactly a good comment. 
I'll change it to something like:

offset[0] - offset of the end of prologue, start of the 1st insn.
offset[1] - offset of the end of 1st insn.

> > +		 */
> > +		if (ctx->image == NULL)
> > +			ctx->offset[i] = ctx->idx;
> > +
> >  		ret = build_insn(insn, ctx, extra_pass);
> >  		if (ret > 0) {
> >  			i++;
> >  			if (ctx->image == NULL)
> > -				ctx->offset[i] = ctx->idx;
> > +				ctx->offset[i] = ctx->offset[i - 1];
> 
> Does it matter that we set the offset for both halves of a 16-byte BPF
> instruction? I think that's a change in behaviour here.

Yes it is, but from reading around that's what I understood.
for 16-byte eBPF instructions both should point to the start of 
the corresponding jited arm64 instruction.
If I am horribly wrong about this, please shout.

> 
> >  			continue;
> >  		}
> > -		if (ctx->image == NULL)
> > -			ctx->offset[i] = ctx->idx;
> >  		if (ret)
> >  			return ret;
> >  	}
> > +	if (ctx->image == NULL)
> > +		ctx->offset[i] = ctx->idx;
> 
> I think it would be cleared to set ctx->offset[0] before the for loop (with
> a comment about what it is) and then change the for loop to iterate from 1
> all the way to prog->len.
> 

Sure

> Will

Thanks
/Ilias
Jean-Philippe Brucker Sept. 15, 2020, 1:54 p.m. UTC | #3
On Tue, Sep 15, 2020 at 02:11:03PM +0100, Will Deacon wrote:
> >  		ret = build_insn(insn, ctx, extra_pass);
> >  		if (ret > 0) {
> >  			i++;
> >  			if (ctx->image == NULL)
> > -				ctx->offset[i] = ctx->idx;
> > +				ctx->offset[i] = ctx->offset[i - 1];
> 
> Does it matter that we set the offset for both halves of a 16-byte BPF
> instruction? I think that's a change in behaviour here.

After testing this patch a bit, I think setting only the first slot should
be sufficient, and we can drop these two lines. It does make a minor
difference, because although the BPF verifier normally rejects a program
that jumps into the middle of a 16-byte instruction, it can validate it in
some cases: 

        BPF_LD_IMM64(BPF_REG_0, 2)		// 16-byte immediate load
        BPF_JMP_IMM(BPF_JLE, BPF_REG_0, 1, -2)	// If r0 <= 1, branch to
        BPF_EXIT_INSN()				//  the middle of the insn

The verifier detects that the condition is always false and doesn't follow
the branch, hands the program to the JIT. So if we don't set the second
slot, then we generate an invalid branch offset. But that doesn't really
matter as the branch is never taken.

Thanks,
Jean
Will Deacon Sept. 15, 2020, 2:17 p.m. UTC | #4
On Tue, Sep 15, 2020 at 04:53:44PM +0300, Ilias Apalodimas wrote:
> On Tue, Sep 15, 2020 at 02:11:03PM +0100, Will Deacon wrote:
> > Hi Ilias,
> > 
> > On Mon, Sep 14, 2020 at 07:03:55PM +0300, Ilias Apalodimas wrote:
> > > Running the eBPF test_verifier leads to random errors looking like this:
> > > 
> > > [ 6525.735488] Unexpected kernel BRK exception at EL1
> > > [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
> > 
> > Does this happen because we poison the BPF memory with BRK instructions?
> > Maybe we should look at using a special immediate so we can detect this,
> > rather than end up in the ptrace handler.
> 
> As discussed offline this is what aarch64_insn_gen_branch_imm() will return for
> offsets > 128M and yes replacing the handler with a more suitable message would 
> be good.

Can you give the diff below a shot, please? Hopefully printing a more useful
message will mean these things get triaged/debugged better in future.

Will

--->8

diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 840a35ed92ec..b15eb4a3e6b2 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -22,6 +22,15 @@ struct exception_table_entry
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+static inline bool in_bpf_jit(struct pt_regs *regs)
+{
+	if (!IS_ENABLED(CONFIG_BPF_JIT))
+		return false;
+
+	return regs->pc >= BPF_JIT_REGION_START &&
+	       regs->pc < BPF_JIT_REGION_END;
+}
+
 #ifdef CONFIG_BPF_JIT
 int arm64_bpf_fixup_exception(const struct exception_table_entry *ex,
 			      struct pt_regs *regs);
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 7310a4f7f993..fa76151de6ff 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -384,7 +384,7 @@ void __init debug_traps_init(void)
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
 			      TRAP_TRACE, "single-step handler");
 	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
-			      TRAP_BRKPT, "ptrace BRK handler");
+			      TRAP_BRKPT, "BRK handler");
 }
 
 /* Re-enable single step for syscall restarting. */
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 13ebd5ca2070..9f7fde99eda2 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -34,6 +34,7 @@
 #include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
+#include <asm/extable.h>
 #include <asm/insn.h>
 #include <asm/kprobes.h>
 #include <asm/traps.h>
@@ -994,6 +995,21 @@ static struct break_hook bug_break_hook = {
 	.imm = BUG_BRK_IMM,
 };
 
+static int reserved_fault_handler(struct pt_regs *regs, unsigned int esr)
+{
+	pr_err("%s generated an invalid instruction at %pS!\n",
+		in_bpf_jit(regs) ? "BPF JIT" : "Kernel runtime patching",
+		instruction_pointer(regs));
+
+	/* We cannot handle this */
+	return DBG_HOOK_ERROR;
+}
+
+static struct break_hook fault_break_hook = {
+	.fn = reserved_fault_handler,
+	.imm = FAULT_BRK_IMM,
+};
+
 #ifdef CONFIG_KASAN_SW_TAGS
 
 #define KASAN_ESR_RECOVER	0x20
@@ -1059,6 +1075,7 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
 void __init trap_init(void)
 {
 	register_kernel_break_hook(&bug_break_hook);
+	register_kernel_break_hook(&fault_break_hook);
 #ifdef CONFIG_KASAN_SW_TAGS
 	register_kernel_break_hook(&kasan_break_hook);
 #endif
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index eee1732ab6cd..aa0060178343 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -14,9 +14,7 @@ int fixup_exception(struct pt_regs *regs)
 	if (!fixup)
 		return 0;
 
-	if (IS_ENABLED(CONFIG_BPF_JIT) &&
-	    regs->pc >= BPF_JIT_REGION_START &&
-	    regs->pc < BPF_JIT_REGION_END)
+	if (in_bpf_jit(regs))
 		return arm64_bpf_fixup_exception(fixup, regs);
 
 	regs->pc = (unsigned long)&fixup->fixup + fixup->fixup;
Ilias Apalodimas Sept. 15, 2020, 7:23 p.m. UTC | #5
Hi Will, 

On Tue, Sep 15, 2020 at 03:17:08PM +0100, Will Deacon wrote:
> On Tue, Sep 15, 2020 at 04:53:44PM +0300, Ilias Apalodimas wrote:
> > On Tue, Sep 15, 2020 at 02:11:03PM +0100, Will Deacon wrote:
> > > Hi Ilias,
> > > 
> > > On Mon, Sep 14, 2020 at 07:03:55PM +0300, Ilias Apalodimas wrote:
> > > > Running the eBPF test_verifier leads to random errors looking like this:
> > > > 
> > > > [ 6525.735488] Unexpected kernel BRK exception at EL1
> > > > [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
> > > 
> > > Does this happen because we poison the BPF memory with BRK instructions?
> > > Maybe we should look at using a special immediate so we can detect this,
> > > rather than end up in the ptrace handler.
> > 
> > As discussed offline this is what aarch64_insn_gen_branch_imm() will return for
> > offsets > 128M and yes replacing the handler with a more suitable message would 
> > be good.
> 
> Can you give the diff below a shot, please? Hopefully printing a more useful
> message will mean these things get triaged/debugged better in future.

[...]

The error print is going to be helpful imho. At least it will help
people notice something is wrong a lot faster than the previous one.


[  575.273203] BPF JIT generated an invalid instruction at bpf_prog_64e6f4ba80861823_F+0x2e4/0x9a4!
[  575.281996] Unexpected kernel BRK exception at EL1
[  575.286786] Internal error: BRK handler: f2000100 [#5] PREEMPT SMP
[  575.292965] Modules linked in: crct10dif_ce drm ip_tables x_tables ipv6 btrfs blake2b_generic libcrc32c xor xor_neon zstd_compress raid6_pq nvme nvme_core realtek
[  575.307516] CPU: 21 PID: 11760 Comm: test_verifier Tainted: G      D W         5.9.0-rc3-01410-ged6d9b022813-dirty #1
[  575.318125] Hardware name: Socionext SynQuacer E-series DeveloperBox, BIOS build #1 Jun  6 2020
[  575.326825] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
[  575.332396] pc : bpf_prog_64e6f4ba80861823_F+0x2e4/0x9a4
[  575.337705] lr : bpf_prog_d3e125b76c96daac+0x40/0xdec
[  575.342752] sp : ffff8000144e3ba0
[  575.346061] x29: ffff8000144e3bd0 x28: 0000000000000000
[  575.351371] x27: 00000085f19dc08d x26: 0000000000000000
[  575.356681] x25: ffff8000144e3ba0 x24: ffff800011fdf038
[  575.361991] x23: ffff8000144e3d20 x22: 0000000000000001
[  575.367301] x21: ffff800011fdf000 x20: ffff0009609d4740
[  575.372611] x19: 0000000000000000 x18: 0000000000000000
[  575.377921] x17: 0000000000000000 x16: 0000000000000000
[  575.383231] x15: 0000000000000000 x14: 0000000000000000
[  575.388540] x13: 0000000000000000 x12: 0000000000000000
[  575.393850] x11: 0000000000000000 x10: ffff8000000bc65c
[  575.399160] x9 : 0000000000000000 x8 : ffff8000144e3c58
[  575.404469] x7 : 0000000000000000 x6 : 0000000dd7ae967a
[  575.409779] x5 : 00ffffffffffffff x4 : 0007fabd6992cf96
[  575.415088] x3 : 0000000000000018 x2 : ffff8000000ba214
[  575.420398] x1 : 000000000000000a x0 : 0000000000000009
[  575.425708] Call trace:
[  575.428152]  bpf_prog_64e6f4ba80861823_F+0x2e4/0x9a4
[  575.433114]  bpf_prog_d3e125b76c96daac+0x40/0xdec
[  575.437822]  bpf_dispatcher_xdp_func+0x10/0x1c
[  575.442265]  bpf_test_run+0x80/0x240
[  575.445838]  bpf_prog_test_run_xdp+0xe8/0x190
[  575.450196]  __do_sys_bpf+0x8e8/0x1b00
[  575.453943]  __arm64_sys_bpf+0x24/0x510
[  575.457780]  el0_svc_common.constprop.0+0x6c/0x170
[  575.462570]  do_el0_svc+0x24/0x90
[  575.465883]  el0_sync_handler+0x90/0x19c
[  575.469802]  el0_sync+0x158/0x180
[  575.473118] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
[  575.479211] ---[ end trace 8cd54c7d5c0ffda4 ]---

Cheers
/Ilias
Yauheni Kaliuta Sept. 16, 2020, 12:39 p.m. UTC | #6
Hi, Ilias!

>>>>> On Tue, 15 Sep 2020 22:23:11 +0300, Ilias Apalodimas  wrote:

 > Hi Will, 
 > On Tue, Sep 15, 2020 at 03:17:08PM +0100, Will Deacon wrote:
 >> On Tue, Sep 15, 2020 at 04:53:44PM +0300, Ilias Apalodimas wrote:
 >> > On Tue, Sep 15, 2020 at 02:11:03PM +0100, Will Deacon wrote:
 >> > > Hi Ilias,
 >> > > 
 >> > > On Mon, Sep 14, 2020 at 07:03:55PM +0300, Ilias Apalodimas wrote:
 >> > > > Running the eBPF test_verifier leads to random errors looking like this:
 >> > > > 
 >> > > > [ 6525.735488] Unexpected kernel BRK exception at EL1
 >> > > > [ 6525.735502] Internal error: ptrace BRK handler: f2000100 [#1] SMP
 >> > > 
 >> > > Does this happen because we poison the BPF memory with BRK instructions?
 >> > > Maybe we should look at using a special immediate so we can detect this,
 >> > > rather than end up in the ptrace handler.
 >> > 
 >> > As discussed offline this is what aarch64_insn_gen_branch_imm() will return for
 >> > offsets > 128M and yes replacing the handler with a more suitable message would 
 >> > be good.
 >> 
 >> Can you give the diff below a shot, please? Hopefully printing a more useful
 >> message will mean these things get triaged/debugged better in future.

 > [...]

 > The error print is going to be helpful imho. At least it will help
 > people notice something is wrong a lot faster than the previous one.


If you start to amend extables, could you consider a change like

05a68e892e89 ("s390/kernel: expand exception table logic to allow new handling options")

and implementation of BPF_PROBE_MEM then?

 > [ 575.273203] BPF JIT generated an invalid instruction at
 > bpf_prog_64e6f4ba80861823_F+0x2e4/0x9a4!
 > [  575.281996] Unexpected kernel BRK exception at EL1
 > [  575.286786] Internal error: BRK handler: f2000100 [#5] PREEMPT SMP
 > [ 575.292965] Modules linked in: crct10dif_ce drm ip_tables x_tables
 > ipv6 btrfs blake2b_generic libcrc32c xor xor_neon zstd_compress
 > raid6_pq nvme nvme_core realtek
 > [ 575.307516] CPU: 21 PID: 11760 Comm: test_verifier Tainted: G D W
 > 5.9.0-rc3-01410-ged6d9b022813-dirty #1
 > [ 575.318125] Hardware name: Socionext SynQuacer E-series
 > DeveloperBox, BIOS build #1 Jun 6 2020
 > [  575.326825] pstate: 20000005 (nzCv daif -PAN -UAO BTYPE=--)
 > [  575.332396] pc : bpf_prog_64e6f4ba80861823_F+0x2e4/0x9a4
 > [  575.337705] lr : bpf_prog_d3e125b76c96daac+0x40/0xdec
 > [  575.342752] sp : ffff8000144e3ba0
 > [  575.346061] x29: ffff8000144e3bd0 x28: 0000000000000000
 > [  575.351371] x27: 00000085f19dc08d x26: 0000000000000000
 > [  575.356681] x25: ffff8000144e3ba0 x24: ffff800011fdf038
 > [  575.361991] x23: ffff8000144e3d20 x22: 0000000000000001
 > [  575.367301] x21: ffff800011fdf000 x20: ffff0009609d4740
 > [  575.372611] x19: 0000000000000000 x18: 0000000000000000
 > [  575.377921] x17: 0000000000000000 x16: 0000000000000000
 > [  575.383231] x15: 0000000000000000 x14: 0000000000000000
 > [  575.388540] x13: 0000000000000000 x12: 0000000000000000
 > [  575.393850] x11: 0000000000000000 x10: ffff8000000bc65c
 > [  575.399160] x9 : 0000000000000000 x8 : ffff8000144e3c58
 > [  575.404469] x7 : 0000000000000000 x6 : 0000000dd7ae967a
 > [  575.409779] x5 : 00ffffffffffffff x4 : 0007fabd6992cf96
 > [  575.415088] x3 : 0000000000000018 x2 : ffff8000000ba214
 > [  575.420398] x1 : 000000000000000a x0 : 0000000000000009
 > [  575.425708] Call trace:
 > [  575.428152]  bpf_prog_64e6f4ba80861823_F+0x2e4/0x9a4
 > [  575.433114]  bpf_prog_d3e125b76c96daac+0x40/0xdec
 > [  575.437822]  bpf_dispatcher_xdp_func+0x10/0x1c
 > [  575.442265]  bpf_test_run+0x80/0x240
 > [  575.445838]  bpf_prog_test_run_xdp+0xe8/0x190
 > [  575.450196]  __do_sys_bpf+0x8e8/0x1b00
 > [  575.453943]  __arm64_sys_bpf+0x24/0x510
 > [  575.457780]  el0_svc_common.constprop.0+0x6c/0x170
 > [  575.462570]  do_el0_svc+0x24/0x90
 > [  575.465883]  el0_sync_handler+0x90/0x19c
 > [  575.469802]  el0_sync+0x158/0x180
 > [  575.473118] Code: d4202000 d4202000 d4202000 d4202000 (d4202000)
 > [  575.479211] ---[ end trace 8cd54c7d5c0ffda4 ]---

 > Cheers
 > /Ilias
Jean-Philippe Brucker Sept. 16, 2020, 1:17 p.m. UTC | #7
On Wed, Sep 16, 2020 at 03:39:37PM +0300, Yauheni Kaliuta wrote:
> If you start to amend extables, could you consider a change like
> 
> 05a68e892e89 ("s390/kernel: expand exception table logic to allow new handling options")
> 
> and implementation of BPF_PROBE_MEM then?

Commit 800834285361 ("bpf, arm64: Add BPF exception tables") should have
taken care of that, no?

Thanks,
Jean
Yauheni Kaliuta Sept. 16, 2020, 1:45 p.m. UTC | #8
Hi, Jean-Philippe!

>>>>> On Wed, 16 Sep 2020 15:17:02 +0200, Jean-Philippe Brucker  wrote:

 > On Wed, Sep 16, 2020 at 03:39:37PM +0300, Yauheni Kaliuta wrote:
 >> If you start to amend extables, could you consider a change like
 >> 
 >> 05a68e892e89 ("s390/kernel: expand exception table logic to allow
 >> new handling options")
 >> 
 >> and implementation of BPF_PROBE_MEM then?

 > Commit 800834285361 ("bpf, arm64: Add BPF exception tables") should have
 > taken care of that, no?

Ah, missed that. Thanks!
Ilias Apalodimas Sept. 16, 2020, 4:04 p.m. UTC | #9
Hi Will, 

On Tue, Sep 15, 2020 at 02:11:03PM +0100, Will Deacon wrote:
[...]
> >  			continue;
> >  		}
> > -		if (ctx->image == NULL)
> > -			ctx->offset[i] = ctx->idx;
> >  		if (ret)
> >  			return ret;
> >  	}
> > +	if (ctx->image == NULL)
> > +		ctx->offset[i] = ctx->idx;
> 
> I think it would be cleared to set ctx->offset[0] before the for loop (with
> a comment about what it is) and then change the for loop to iterate from 1
> all the way to prog->len.

On a second thought while trying to code this, I'd prefer leaving it as is. 
First of all we'll have to increase ctx->idx while adding ctx->offset[0] and 
more importantly, I don't think that's a 'special' case. 
It's still the same thing i.e the start of the 1st instruction (which happens 
to be the end of prologue), the next one will be the start of the second 
instruction etc etc. 

I don't mind changing if you feel strongly about it, but I think it makese sense
as-is.

Thanks
/Ilias
> 
> Will
diff mbox series

Patch

diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index f8912e45be7a..0974effff58c 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -143,9 +143,13 @@  static inline void emit_addr_mov_i64(const int reg, const u64 val,
 	}
 }
 
-static inline int bpf2a64_offset(int bpf_to, int bpf_from,
+static inline int bpf2a64_offset(int bpf_insn, int off,
 				 const struct jit_ctx *ctx)
 {
+	/* arm64 offset is relative to the branch instruction */
+	int bpf_from = bpf_insn + 1;
+	/* BPF JMP offset is relative to the next instruction */
+	int bpf_to = bpf_insn + off + 1;
 	int to = ctx->offset[bpf_to];
 	/* -1 to account for the Branch instruction */
 	int from = ctx->offset[bpf_from] - 1;
@@ -642,7 +646,7 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 
 	/* JUMP off */
 	case BPF_JMP | BPF_JA:
-		jmp_offset = bpf2a64_offset(i + off, i, ctx);
+		jmp_offset = bpf2a64_offset(i, off, ctx);
 		check_imm26(jmp_offset);
 		emit(A64_B(jmp_offset), ctx);
 		break;
@@ -669,7 +673,7 @@  static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
 	case BPF_JMP32 | BPF_JSLE | BPF_X:
 		emit(A64_CMP(is64, dst, src), ctx);
 emit_cond_jmp:
-		jmp_offset = bpf2a64_offset(i + off, i, ctx);
+		jmp_offset = bpf2a64_offset(i, off, ctx);
 		check_imm19(jmp_offset);
 		switch (BPF_OP(code)) {
 		case BPF_JEQ:
@@ -912,18 +916,26 @@  static int build_body(struct jit_ctx *ctx, bool extra_pass)
 		const struct bpf_insn *insn = &prog->insnsi[i];
 		int ret;
 
+		/*
+		 * offset[0] offset of the end of prologue, start of the
+		 * first insn.
+		 * offset[x] - offset of the end of x insn.
+		 */
+		if (ctx->image == NULL)
+			ctx->offset[i] = ctx->idx;
+
 		ret = build_insn(insn, ctx, extra_pass);
 		if (ret > 0) {
 			i++;
 			if (ctx->image == NULL)
-				ctx->offset[i] = ctx->idx;
+				ctx->offset[i] = ctx->offset[i - 1];
 			continue;
 		}
-		if (ctx->image == NULL)
-			ctx->offset[i] = ctx->idx;
 		if (ret)
 			return ret;
 	}
+	if (ctx->image == NULL)
+		ctx->offset[i] = ctx->idx;
 
 	return 0;
 }
@@ -1002,7 +1014,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	memset(&ctx, 0, sizeof(ctx));
 	ctx.prog = prog;
 
-	ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
+	ctx.offset = kcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
 	if (ctx.offset == NULL) {
 		prog = orig_prog;
 		goto out_off;
@@ -1089,7 +1101,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	prog->jited_len = prog_size;
 
 	if (!prog->is_func || extra_pass) {
-		bpf_prog_fill_jited_linfo(prog, ctx.offset);
+		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
 out_off:
 		kfree(ctx.offset);
 		kfree(jit_data);