diff mbox series

[RFC,bpf-next,v2,12/16] MIPS: eBPF: refactor common 32/64-bit functions and headers

Message ID 56f0d08169bed47303a482a5c6bff67fe1e4bdee.1633392335.git.Tony.Ambardar@gmail.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series MIPS: eBPF: refactor code, add MIPS32 JIT | expand

Checks

Context Check Description
bpf/vmtest-bpf-next pending VM_Test
bpf/vmtest-bpf-next-PR pending PR summary
netdev/cover_letter success Series has a cover letter
netdev/fixes_present success Fixes tag not required for -next series
netdev/patch_count fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/source_inline success Was 5 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc fail Errors and warnings before: 3 this patch: 5
netdev/verify_fixes success No Fixes tag
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Comparison to NULL could be written "!ctx->target" CHECK: Comparison to NULL could be written "target" CHECK: From:/Signed-off-by: email comments mismatch: 'From: Tony Ambardar <tony.ambardar@gmail.com>' != 'Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>' CHECK: Please don't use multiple blank lines CHECK: Please use a blank line after function/struct/union/enum declarations CHECK: Unnecessary parentheses around 'insn->src_reg != BPF_PSEUDO_CALL' CHECK: Unnecessary parentheses around 'insn->src_reg == BPF_PSEUDO_CALL' CHECK: extern prototypes should be avoided in .h files CHECK: spaces preferred around that '|' (ctx:VxV) WARNING: 'regster' may be misspelled - perhaps 'register'? WARNING: Prefer 'fallthrough;' over fallthrough comment WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 101 exceeds 80 columns WARNING: line length of 102 exceeds 80 columns WARNING: line length of 106 exceeds 80 columns WARNING: line length of 108 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success No static functions without inline keyword in header files

Commit Message

Tony Ambardar Oct. 5, 2021, 8:26 a.m. UTC
Move core functions and headers to ebpf_jit_core.c and ebpf_jit.h, and
relocate the MIPS64 specific build_one_insn() to ebpf_jit_comp64.c.

Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
---
 arch/mips/net/Makefile          |    2 +-
 arch/mips/net/ebpf_jit.c        | 2424 -------------------------------
 arch/mips/net/ebpf_jit.h        |  297 ++++
 arch/mips/net/ebpf_jit_comp64.c |  990 +++++++++++++
 arch/mips/net/ebpf_jit_core.c   | 1189 +++++++++++++++
 5 files changed, 2477 insertions(+), 2425 deletions(-)
 delete mode 100644 arch/mips/net/ebpf_jit.c
 create mode 100644 arch/mips/net/ebpf_jit.h
 create mode 100644 arch/mips/net/ebpf_jit_comp64.c
 create mode 100644 arch/mips/net/ebpf_jit_core.c
diff mbox series

Patch

diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
index d55912349039..de42f4a4db56 100644
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -2,4 +2,4 @@ 
 # MIPS networking code
 
 obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
-obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
+obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit_core.o ebpf_jit_comp64.o
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
deleted file mode 100644
index 501c1d532be6..000000000000
--- a/arch/mips/net/ebpf_jit.c
+++ /dev/null
@@ -1,2424 +0,0 @@ 
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Just-In-Time compiler for eBPF filters on MIPS32/MIPS64
- * Copyright (c) 2021 Tony Ambardar <Tony.Ambardar@gmail.com>
- *
- * Based on code from:
- *
- * Copyright (c) 2017 Cavium, Inc.
- * Author: David Daney <david.daney@cavium.com>
- *
- * Copyright (c) 2014 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- */
-
-#include <linux/bitops.h>
-#include <linux/errno.h>
-#include <linux/filter.h>
-#include <linux/bpf.h>
-#include <linux/slab.h>
-#include <asm/bitops.h>
-#include <asm/byteorder.h>
-#include <asm/cacheflush.h>
-#include <asm/cpu-features.h>
-#include <asm/isa-rev.h>
-#include <asm/uasm.h>
-
-/* Registers used by JIT:	  (MIPS32)	(MIPS64) */
-#define MIPS_R_ZERO	0
-#define MIPS_R_AT	1
-#define MIPS_R_V0	2	/* BPF_R0	BPF_R0 */
-#define MIPS_R_V1	3	/* BPF_R0	BPF_TCC */
-#define MIPS_R_A0	4	/* BPF_R1	BPF_R1 */
-#define MIPS_R_A1	5	/* BPF_R1	BPF_R2 */
-#define MIPS_R_A2	6	/* BPF_R2	BPF_R3 */
-#define MIPS_R_A3	7	/* BPF_R2	BPF_R4 */
-
-/* MIPS64 replaces T0-T3 scratch regs with extra arguments A4-A7. */
-#ifdef CONFIG_64BIT
-#  define MIPS_R_A4	8	/* (n/a)	BPF_R5 */
-#else
-#  define MIPS_R_T0	8	/* BPF_R3	(n/a)  */
-#  define MIPS_R_T1	9	/* BPF_R3	(n/a)  */
-#  define MIPS_R_T2	10	/* BPF_R4	(n/a)  */
-#  define MIPS_R_T3	11	/* BPF_R4	(n/a)  */
-#endif
-
-#define MIPS_R_T4	12	/* BPF_R5	BPF_AX */
-#define MIPS_R_T5	13	/* BPF_R5	(free) */
-#define MIPS_R_T6	14	/* BPF_AX	(used) */
-#define MIPS_R_T7	15	/* BPF_AX	(free) */
-#define MIPS_R_S0	16	/* BPF_R6	BPF_R6 */
-#define MIPS_R_S1	17	/* BPF_R6	BPF_R7 */
-#define MIPS_R_S2	18	/* BPF_R7	BPF_R8 */
-#define MIPS_R_S3	19	/* BPF_R7	BPF_R9 */
-#define MIPS_R_S4	20	/* BPF_R8	BPF_TCC */
-#define MIPS_R_S5	21	/* BPF_R8	(free) */
-#define MIPS_R_S6	22	/* BPF_R9	(free) */
-#define MIPS_R_S7	23	/* BPF_R9	(free) */
-#define MIPS_R_T8	24	/* (used)	(used) */
-#define MIPS_R_T9	25	/* (used)	(used) */
-#define MIPS_R_SP	29
-#define MIPS_R_S8	30	/* BPF_R10	BPF_R10 */
-#define MIPS_R_RA	31
-
-/* eBPF flags */
-#define EBPF_SAVE_S0	BIT(0)
-#define EBPF_SAVE_S1	BIT(1)
-#define EBPF_SAVE_S2	BIT(2)
-#define EBPF_SAVE_S3	BIT(3)
-#define EBPF_SAVE_S4	BIT(4)
-#define EBPF_SAVE_S5	BIT(5)
-#define EBPF_SAVE_S6	BIT(6)
-#define EBPF_SAVE_S7	BIT(7)
-#define EBPF_SAVE_S8	BIT(8)
-#define EBPF_SAVE_RA	BIT(9)
-#define EBPF_SEEN_FP	BIT(10)
-#define EBPF_SEEN_TC	BIT(11)
-#define EBPF_TCC_IN_RUN	BIT(12)
-
-/*
- * Extra JIT registers dedicated to holding TCC during runtime or saving
- * across calls.
- */
-enum {
-	JIT_RUN_TCC = MAX_BPF_JIT_REG,
-	JIT_SAV_TCC
-};
-/* Temporary register for passing TCC if nothing dedicated. */
-#define TEMP_PASS_TCC MIPS_R_T8
-
-/*
- * Word-size and endianness-aware helpers for building MIPS32 vs MIPS64
- * tables and selecting 32-bit subregisters from a register pair base.
- * Simplify use by emulating MIPS_R_SP and MIPS_R_ZERO as register pairs
- * and adding HI/LO word memory offsets.
- */
-#ifdef CONFIG_64BIT
-#  define HI(reg) (reg)
-#  define LO(reg) (reg)
-#  define OFFHI(mem) (mem)
-#  define OFFLO(mem) (mem)
-#else	/* CONFIG_32BIT */
-#  ifdef __BIG_ENDIAN
-#    define HI(reg) ((reg) == MIPS_R_SP ? MIPS_R_ZERO : \
-		     (reg) == MIPS_R_S8 ? MIPS_R_ZERO : \
-		     (reg))
-#    define LO(reg) ((reg) == MIPS_R_ZERO ? (reg) : \
-		     (reg) == MIPS_R_SP ? (reg) : \
-		     (reg) == MIPS_R_S8 ? (reg) : \
-		     (reg) + 1)
-#    define OFFHI(mem) (mem)
-#    define OFFLO(mem) ((mem) + sizeof(long))
-#  else	/* __LITTLE_ENDIAN */
-#    define HI(reg) ((reg) == MIPS_R_ZERO ? (reg) : \
-		     (reg) == MIPS_R_SP ? MIPS_R_ZERO : \
-		     (reg) == MIPS_R_S8 ? MIPS_R_ZERO : \
-		     (reg) + 1)
-#    define LO(reg) (reg)
-#    define OFFHI(mem) ((mem) + sizeof(long))
-#    define OFFLO(mem) (mem)
-#  endif
-#endif
-
-#ifdef CONFIG_64BIT
-#  define M(expr32, expr64) (expr64)
-#else
-#  define M(expr32, expr64) (expr32)
-#endif
-const struct {
-	/* Register or pair base */
-	int reg;
-	/* Register flags */
-	u32 flags;
-	/* Usage table:   (MIPS32)			 (MIPS64) */
-} bpf2mips[] = {
-	/* Return value from in-kernel function, and exit value from eBPF. */
-	[BPF_REG_0] =  {M(MIPS_R_V0,			MIPS_R_V0)},
-	/* Arguments from eBPF program to in-kernel/BPF functions. */
-	[BPF_REG_1] =  {M(MIPS_R_A0,			MIPS_R_A0)},
-	[BPF_REG_2] =  {M(MIPS_R_A2,			MIPS_R_A1)},
-	[BPF_REG_3] =  {M(MIPS_R_T0,			MIPS_R_A2)},
-	[BPF_REG_4] =  {M(MIPS_R_T2,			MIPS_R_A3)},
-	[BPF_REG_5] =  {M(MIPS_R_T4,			MIPS_R_A4)},
-	/* Callee-saved registers preserved by in-kernel/BPF functions. */
-	[BPF_REG_6] =  {M(MIPS_R_S0,			MIPS_R_S0),
-			M(EBPF_SAVE_S0|EBPF_SAVE_S1,	EBPF_SAVE_S0)},
-	[BPF_REG_7] =  {M(MIPS_R_S2,			MIPS_R_S1),
-			M(EBPF_SAVE_S2|EBPF_SAVE_S3,	EBPF_SAVE_S1)},
-	[BPF_REG_8] =  {M(MIPS_R_S4,			MIPS_R_S2),
-			M(EBPF_SAVE_S4|EBPF_SAVE_S5,	EBPF_SAVE_S2)},
-	[BPF_REG_9] =  {M(MIPS_R_S6,			MIPS_R_S3),
-			M(EBPF_SAVE_S6|EBPF_SAVE_S7,	EBPF_SAVE_S3)},
-	[BPF_REG_10] = {M(MIPS_R_S8,			MIPS_R_S8),
-			M(EBPF_SAVE_S8|EBPF_SEEN_FP,	EBPF_SAVE_S8|EBPF_SEEN_FP)},
-	/* Internal register for rewriting insns during JIT blinding. */
-	[BPF_REG_AX] = {M(MIPS_R_T6,			MIPS_R_T4)},
-	/*
-	 * Internal registers for TCC runtime holding and saving during
-	 * calls. A zero save register indicates using scratch space on
-	 * the stack for storage during calls. A zero hold register means
-	 * no dedicated register holds TCC during runtime (but a temp reg
-	 * still passes TCC to tailcall or bpf2bpf call).
-	 */
-	[JIT_RUN_TCC] =	{M(0,				MIPS_R_V1)},
-	[JIT_SAV_TCC] =	{M(0,				MIPS_R_S4),
-			 M(0,				EBPF_SAVE_S4)}
-};
-#undef M
-
-static inline bool is64bit(void)
-{
-	return IS_ENABLED(CONFIG_64BIT);
-}
-
-static inline bool isbigend(void)
-{
-	return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-}
-
-/* Stack region alignment under N64 and O32 ABIs */
-#define STACK_ALIGN (2 * sizeof(long))
-
-/*
- * For the mips64 ISA, we need to track the value range or type for
- * each JIT register.  The BPF machine requires zero extended 32-bit
- * values, but the mips64 ISA requires sign extended 32-bit values.
- * At each point in the BPF program we track the state of every
- * register so that we can zero extend or sign extend as the BPF
- * semantics require.
- */
-enum reg_val_type {
-	/* uninitialized */
-	REG_UNKNOWN,
-	/* not known to be 32-bit compatible. */
-	REG_64BIT,
-	/* 32-bit compatible, no truncation needed for 64-bit ops. */
-	REG_64BIT_32BIT,
-	/* 32-bit compatible, need truncation for 64-bit ops. */
-	REG_32BIT,
-	/* 32-bit no sign/zero extension needed. */
-	REG_32BIT_POS
-};
-
-/*
- * high bit of offsets indicates if long branch conversion done at
- * this insn.
- */
-#define OFFSETS_B_CONV	BIT(31)
-
-/**
- * struct jit_ctx - JIT context
- * @prog:		The program
- * @stack_size:		eBPF stack size
- * @bpf_stack_off:	eBPF FP offset
- * @prolog_skip:	Prologue insns to skip by BPF caller
- * @idx:		Instruction index
- * @flags:		JIT flags
- * @offsets:		Instruction offsets
- * @target:		Memory location for compiled instructions
- * @reg_val_types:	Packed enum reg_val_type for each register
- */
-struct jit_ctx {
-	const struct bpf_prog *prog;
-	int stack_size;
-	int bpf_stack_off;
-	int prolog_skip;
-	u32 idx;
-	u32 flags;
-	u32 *offsets;
-	u32 *target;
-	u64 *reg_val_types;
-	unsigned int long_b_conversion:1;
-	unsigned int gen_b_offsets:1;
-	unsigned int use_bbit_insns:1;
-};
-
-static void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type)
-{
-	*rvt &= ~(7ull << (reg * 3));
-	*rvt |= ((u64)type << (reg * 3));
-}
-
-static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx,
-					  int index, int reg)
-{
-	return (ctx->reg_val_types[index] >> (reg * 3)) & 7;
-}
-
-/* Simply emit the instruction if the JIT memory space has been allocated */
-#define emit_instr_long(ctx, func64, func32, ...)		\
-do {								\
-	if ((ctx)->target != NULL) {				\
-		u32 *p = &(ctx)->target[ctx->idx];		\
-		if (IS_ENABLED(CONFIG_64BIT))			\
-			uasm_i_##func64(&p, ##__VA_ARGS__);	\
-		else						\
-			uasm_i_##func32(&p, ##__VA_ARGS__);	\
-	}							\
-	(ctx)->idx++;						\
-} while (0)
-
-#define emit_instr(ctx, func, ...)				\
-	emit_instr_long(ctx, func, func, ##__VA_ARGS__)
-
-static unsigned int j_target(struct jit_ctx *ctx, int target_idx)
-{
-	unsigned long target_va, base_va;
-	unsigned int r;
-
-	if (!ctx->target)
-		return 0;
-
-	base_va = (unsigned long)ctx->target;
-	target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV);
-
-	if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful))
-		return (unsigned int)-1;
-	r = target_va & 0x0ffffffful;
-	return r;
-}
-
-/* Compute the immediate value for PC-relative branches. */
-static u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
-{
-	if (!ctx->gen_b_offsets)
-		return 0;
-
-	/*
-	 * We want a pc-relative branch.  tgt is the instruction offset
-	 * we want to jump to.
-
-	 * Branch on MIPS:
-	 * I: target_offset <- sign_extend(offset)
-	 * I+1: PC += target_offset (delay slot)
-	 *
-	 * ctx->idx currently points to the branch instruction
-	 * but the offset is added to the delay slot so we need
-	 * to subtract 4.
-	 */
-	return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) -
-		(ctx->idx * 4) - 4;
-}
-
-/* Sign-extend dst register or HI 32-bit reg of pair. */
-static inline void gen_sext_insn(int dst, struct jit_ctx *ctx)
-{
-	if (is64bit())
-		emit_instr(ctx, sll, dst, dst, 0);
-	else
-		emit_instr(ctx, sra, HI(dst), LO(dst), 31);
-}
-
-/*
- * Zero-extend dst register or HI 32-bit reg of pair, if either forced
- * or the BPF verifier does not insert its own zext insns.
- */
-static inline void gen_zext_insn(int dst, bool force, struct jit_ctx *ctx)
-{
-	if (!ctx->prog->aux->verifier_zext || force) {
-		if (is64bit())
-			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-		else
-			emit_instr(ctx, and, HI(dst), MIPS_R_ZERO, MIPS_R_ZERO);
-	}
-}
-
-static inline bool tail_call_present(struct jit_ctx *ctx)
-{
-	return ctx->flags & EBPF_SEEN_TC || ctx->prog->aux->tail_call_reachable;
-}
-
-enum reg_usage {
-	REG_SRC_FP_OK,
-	REG_SRC_NO_FP,
-	REG_DST_FP_OK,
-	REG_DST_NO_FP
-};
-
-/*
- * For eBPF, the register mapping naturally falls out of the
- * requirements of eBPF and the MIPS N64/O32 ABIs. We also maintain
- * a separate frame pointer, setting BPF_REG_10 relative to $sp.
- */
-static int ebpf_to_mips_reg(struct jit_ctx *ctx,
-			    const struct bpf_insn *insn,
-			    enum reg_usage u)
-{
-	int ebpf_reg = (u == REG_SRC_FP_OK || u == REG_SRC_NO_FP) ?
-		insn->src_reg : insn->dst_reg;
-
-	switch (ebpf_reg) {
-	case BPF_REG_0:
-	case BPF_REG_1:
-	case BPF_REG_2:
-	case BPF_REG_3:
-	case BPF_REG_4:
-	case BPF_REG_5:
-	case BPF_REG_6:
-	case BPF_REG_7:
-	case BPF_REG_8:
-	case BPF_REG_9:
-	case BPF_REG_AX:
-		ctx->flags |= bpf2mips[ebpf_reg].flags;
-		return bpf2mips[ebpf_reg].reg;
-	case BPF_REG_10:
-		if (u == REG_DST_NO_FP || u == REG_SRC_NO_FP)
-			goto bad_reg;
-		ctx->flags |= bpf2mips[ebpf_reg].flags;
-		return bpf2mips[ebpf_reg].reg;
-	default:
-bad_reg:
-		WARN(1, "Illegal bpf reg: %d\n", ebpf_reg);
-		return -EINVAL;
-	}
-}
-
-/*
- * eBPF stack frame will be something like:
- *
- *  Entry $sp ------>   +--------------------------------+
- *                      |   $ra  (optional)              |
- *                      +--------------------------------+
- *                      |   $s8  (optional)              |
- *                      +--------------------------------+
- *                      |   $s7  (optional)              |
- *                      +--------------------------------+
- *                      |   $s6  (optional)              |
- *                      +--------------------------------+
- *                      |   $s5  (optional)              |
- *                      +--------------------------------+
- *                      |   $s4  (optional)              |
- *                      +--------------------------------+
- *                      |   $s3  (optional)              |
- *                      +--------------------------------+
- *                      |   $s2  (optional)              |
- *                      +--------------------------------+
- *                      |   $s1  (optional)              |
- *                      +--------------------------------+
- *                      |   $s0  (optional)              |
- *                      +--------------------------------+
- *                      |   tmp-storage  (optional)      |
- * $sp + bpf_stack_off->+--------------------------------+ <--BPF_REG_10
- *                      |   BPF_REG_10 relative storage  |
- *                      |    MAX_BPF_STACK (optional)    |
- *                      |      .                         |
- *                      |      .                         |
- *                      |      .                         |
- *        $sp ------>   +--------------------------------+
- *
- * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized
- * area is not allocated.
- */
-static int build_int_prologue(struct jit_ctx *ctx)
-{
-	int tcc_run = bpf2mips[JIT_RUN_TCC].reg ?
-		      bpf2mips[JIT_RUN_TCC].reg :
-		      TEMP_PASS_TCC;
-	int tcc_sav = bpf2mips[JIT_SAV_TCC].reg;
-	const struct bpf_prog *prog = ctx->prog;
-	int r10 = bpf2mips[BPF_REG_10].reg;
-	int r1 = bpf2mips[BPF_REG_1].reg;
-	int stack_adjust = 0;
-	int store_offset;
-	int locals_size;
-	int start_idx;
-
-	if (ctx->flags & EBPF_SAVE_RA)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S8)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S7)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S6)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S5)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S4)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S3)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S2)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S1)
-		stack_adjust += sizeof(long);
-	if (ctx->flags & EBPF_SAVE_S0)
-		stack_adjust += sizeof(long);
-	if (tail_call_present(ctx) &&
-	    !(ctx->flags & EBPF_TCC_IN_RUN) && !tcc_sav)
-		/* Allocate scratch space for holding TCC if needed. */
-		stack_adjust += sizeof(long);
-
-	stack_adjust = ALIGN(stack_adjust, STACK_ALIGN);
-
-	locals_size = (ctx->flags & EBPF_SEEN_FP) ? prog->aux->stack_depth : 0;
-	locals_size = ALIGN(locals_size, STACK_ALIGN);
-
-	stack_adjust += locals_size;
-
-	ctx->stack_size = stack_adjust;
-	ctx->bpf_stack_off = locals_size;
-
-	/*
-	 * First instruction initializes the tail call count (TCC) and
-	 * assumes a call from kernel using the native ABI. Calls made
-	 * using the BPF ABI (bpf2bpf or tail call) will skip this insn
-	 * and pass the TCC via register.
-	 */
-	start_idx = ctx->idx;
-	emit_instr(ctx, addiu, tcc_run, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
-
-	/*
-	 * When called from kernel under O32 ABI we must set up BPF R1
-	 * context, since BPF R1 is an endian-order regster pair ($a0:$a1
-	 * or $a1:$a0) but context is always passed in $a0 as a 32-bit
-	 * pointer. As above, bpf2bpf and tail calls will skip these insns
-	 * since all registers are correctly set up already.
-	 */
-	if (!is64bit()) {
-		if (isbigend())
-			emit_instr(ctx, move, LO(r1), MIPS_R_A0);
-		/* Sanitize upper 32-bit reg */
-		gen_zext_insn(r1, true, ctx);
-	}
-	/*
-	 * Calls using BPF ABI (bpf2bpf and tail calls) will skip TCC
-	 * initialization and R1 context fixup needed by kernel calls.
-	 */
-	ctx->prolog_skip = (ctx->idx - start_idx) * 4;
-
-	if (stack_adjust)
-		emit_instr_long(ctx, daddiu, addiu,
-					MIPS_R_SP, MIPS_R_SP, -stack_adjust);
-	else
-		return 0;
-
-	store_offset = stack_adjust - sizeof(long);
-
-	if (ctx->flags & EBPF_SAVE_RA) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_RA, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S8) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S8, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S7) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S7, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S6) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S6, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S5) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S5, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S4) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S4, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S3) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S3, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S2) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S2, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S1) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S1, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S0) {
-		emit_instr_long(ctx, sd, sw,
-					MIPS_R_S0, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-
-	/* Store TCC in backup register or stack scratch space if indicated. */
-	if (tail_call_present(ctx) && !(ctx->flags & EBPF_TCC_IN_RUN)) {
-		if (tcc_sav)
-			emit_instr(ctx, move, tcc_sav, tcc_run);
-		else
-			emit_instr_long(ctx, sd, sw,
-					tcc_run, ctx->bpf_stack_off, MIPS_R_SP);
-	}
-
-	/* Prepare BPF FP as single-reg ptr, emulate upper 32-bits as needed.*/
-	if (ctx->flags & EBPF_SEEN_FP)
-		emit_instr_long(ctx, daddiu, addiu, r10,
-						MIPS_R_SP, ctx->bpf_stack_off);
-
-	return 0;
-}
-
-static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
-{
-	const struct bpf_prog *prog = ctx->prog;
-	int stack_adjust = ctx->stack_size;
-	int store_offset = stack_adjust - sizeof(long);
-	int ax = bpf2mips[BPF_REG_AX].reg;
-	int r0 = bpf2mips[BPF_REG_0].reg;
-	enum reg_val_type td;
-
-	/*
-	 * As in prologue code, we default to assuming exit to the kernel.
-	 * Returns to the kernel follow the N64 or O32 ABI. For N64, the
-	 * BPF R0 return value may need to be sign-extended, while O32 may
-	 * need fixup of BPF R0 to place the 32-bit return value in MIPS V0.
-	 *
-	 * Returns to BPF2BPF callers consistently use the BPF 64-bit ABI,
-	 * so register usage and mapping between JIT and OS is unchanged.
-	 * Accommodate by saving unmodified R0 register data to allow a
-	 * BPF caller to restore R0 after we return.
-	 */
-	if (dest_reg == MIPS_R_RA) { /* kernel or bpf2bpf function return */
-		if (is64bit()) {
-			/*
-			 * Backup BPF R0 to AX, allowing the caller to
-			 * restore it in case this is a BPF2BPF rather
-			 * than a kernel return.
-			 */
-			emit_instr(ctx, move, ax, r0);
-			/*
-			 * Don't let zero-extended R0 value escape to
-			 * kernel on return, so sign-extend if needed.
-			 */
-			td = get_reg_val_type(ctx, prog->len, BPF_REG_0);
-			if (td == REG_64BIT)
-				gen_sext_insn(r0, ctx);
-		} else if (isbigend()) { /* and 32-bit */
-			/*
-			 * Backup high 32-bit register of BPF R0 to AX,
-			 * since it occupies MIPS_R_V0 which needs to be
-			 * clobbered for a kernel return.
-			 */
-			emit_instr(ctx, move, HI(ax), HI(r0));
-			/*
-			 * O32 ABI specifies 32-bit return value always
-			 * placed in MIPS_R_V0 regardless of the native
-			 * endianness. This would be in the wrong position
-			 * in a BPF R0 reg pair on big-endian systems, so
-			 * we must relocate.
-			 */
-			emit_instr(ctx, move, MIPS_R_V0, LO(r0));
-		}
-	}
-
-	if (ctx->flags & EBPF_SAVE_RA) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_RA, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S8) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S8, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S7) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S7, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S6) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S6, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S5) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S5, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S4) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S4, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S3) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S3, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S2) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S2, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S1) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S1, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	if (ctx->flags & EBPF_SAVE_S0) {
-		emit_instr_long(ctx, ld, lw,
-					MIPS_R_S0, store_offset, MIPS_R_SP);
-		store_offset -= sizeof(long);
-	}
-	emit_instr(ctx, jr, dest_reg);
-
-	/* Delay slot */
-	if (stack_adjust)
-		emit_instr_long(ctx, daddiu, addiu,
-					MIPS_R_SP, MIPS_R_SP, stack_adjust);
-	else
-		emit_instr(ctx, nop);
-
-	return 0;
-}
-
-static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
-			   struct jit_ctx *ctx)
-{
-	if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
-		emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm);
-	} else {
-		int lower = (s16)(insn->imm & 0xffff);
-		int upper = insn->imm - lower;
-
-		emit_instr(ctx, lui, reg, upper >> 16);
-		/* lui already clears lower halfword */
-		if (lower)
-			emit_instr(ctx, addiu, reg, reg, lower);
-	}
-}
-
-static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
-			int idx)
-{
-	int upper_bound, lower_bound;
-	int dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-
-	if (dst < 0)
-		return dst;
-
-	switch (BPF_OP(insn->code)) {
-	case BPF_MOV:
-	case BPF_ADD:
-		upper_bound = S16_MAX;
-		lower_bound = S16_MIN;
-		break;
-	case BPF_SUB:
-		upper_bound = -(int)S16_MIN;
-		lower_bound = -(int)S16_MAX;
-		break;
-	case BPF_AND:
-	case BPF_OR:
-	case BPF_XOR:
-		upper_bound = 0xffff;
-		lower_bound = 0;
-		break;
-	case BPF_RSH:
-	case BPF_LSH:
-	case BPF_ARSH:
-		/* Shift amounts are truncated, no need for bounds */
-		upper_bound = S32_MAX;
-		lower_bound = S32_MIN;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/*
-	 * Immediate move clobbers the register, so no sign/zero
-	 * extension needed.
-	 */
-	if (BPF_CLASS(insn->code) == BPF_ALU64 &&
-	    BPF_OP(insn->code) != BPF_MOV &&
-	    get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT)
-		emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-	/* BPF_ALU | BPF_LSH doesn't need separate sign extension */
-	if (BPF_CLASS(insn->code) == BPF_ALU &&
-	    BPF_OP(insn->code) != BPF_LSH &&
-	    BPF_OP(insn->code) != BPF_MOV &&
-	    get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT)
-		emit_instr(ctx, sll, dst, dst, 0);
-
-	if (insn->imm >= lower_bound && insn->imm <= upper_bound) {
-		/* single insn immediate case */
-		switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
-		case BPF_ALU64 | BPF_MOV:
-			emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm);
-			break;
-		case BPF_ALU64 | BPF_AND:
-		case BPF_ALU | BPF_AND:
-			emit_instr(ctx, andi, dst, dst, insn->imm);
-			break;
-		case BPF_ALU64 | BPF_OR:
-		case BPF_ALU | BPF_OR:
-			emit_instr(ctx, ori, dst, dst, insn->imm);
-			break;
-		case BPF_ALU64 | BPF_XOR:
-		case BPF_ALU | BPF_XOR:
-			emit_instr(ctx, xori, dst, dst, insn->imm);
-			break;
-		case BPF_ALU64 | BPF_ADD:
-			emit_instr(ctx, daddiu, dst, dst, insn->imm);
-			break;
-		case BPF_ALU64 | BPF_SUB:
-			emit_instr(ctx, daddiu, dst, dst, -insn->imm);
-			break;
-		case BPF_ALU64 | BPF_RSH:
-			emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f);
-			break;
-		case BPF_ALU | BPF_RSH:
-			emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f);
-			break;
-		case BPF_ALU64 | BPF_LSH:
-			emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f);
-			break;
-		case BPF_ALU | BPF_LSH:
-			emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f);
-			break;
-		case BPF_ALU64 | BPF_ARSH:
-			emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f);
-			break;
-		case BPF_ALU | BPF_ARSH:
-			emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f);
-			break;
-		case BPF_ALU | BPF_MOV:
-			emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm);
-			break;
-		case BPF_ALU | BPF_ADD:
-			emit_instr(ctx, addiu, dst, dst, insn->imm);
-			break;
-		case BPF_ALU | BPF_SUB:
-			emit_instr(ctx, addiu, dst, dst, -insn->imm);
-			break;
-		default:
-			return -EINVAL;
-		}
-	} else {
-		/* multi insn immediate case */
-		if (BPF_OP(insn->code) == BPF_MOV) {
-			gen_imm_to_reg(insn, dst, ctx);
-		} else {
-			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-			switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
-			case BPF_ALU64 | BPF_AND:
-			case BPF_ALU | BPF_AND:
-				emit_instr(ctx, and, dst, dst, MIPS_R_AT);
-				break;
-			case BPF_ALU64 | BPF_OR:
-			case BPF_ALU | BPF_OR:
-				emit_instr(ctx, or, dst, dst, MIPS_R_AT);
-				break;
-			case BPF_ALU64 | BPF_XOR:
-			case BPF_ALU | BPF_XOR:
-				emit_instr(ctx, xor, dst, dst, MIPS_R_AT);
-				break;
-			case BPF_ALU64 | BPF_ADD:
-				emit_instr(ctx, daddu, dst, dst, MIPS_R_AT);
-				break;
-			case BPF_ALU64 | BPF_SUB:
-				emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT);
-				break;
-			case BPF_ALU | BPF_ADD:
-				emit_instr(ctx, addu, dst, dst, MIPS_R_AT);
-				break;
-			case BPF_ALU | BPF_SUB:
-				emit_instr(ctx, subu, dst, dst, MIPS_R_AT);
-				break;
-			default:
-				return -EINVAL;
-			}
-		}
-	}
-
-	return 0;
-}
-
-static void emit_const_to_reg(struct jit_ctx *ctx, int dst, unsigned long value)
-{
-	if (value >= S16_MIN || value <= S16_MAX) {
-		emit_instr_long(ctx, daddiu, addiu, dst, MIPS_R_ZERO, (int)value);
-	} else if (value >= S32_MIN ||
-		   (value <= S32_MAX && value > U16_MAX)) {
-		emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16));
-		emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff));
-	} else {
-		int i;
-		bool seen_part = false;
-		int needed_shift = 0;
-
-		for (i = 0; i < 4; i++) {
-			u64 part = (value >> (16 * (3 - i))) & 0xffff;
-
-			if (seen_part && needed_shift > 0 && (part || i == 3)) {
-				emit_instr(ctx, dsll_safe, dst, dst, needed_shift);
-				needed_shift = 0;
-			}
-			if (part) {
-				if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) {
-					emit_instr(ctx, lui, dst, (s32)(s16)part);
-					needed_shift = -16;
-				} else {
-					emit_instr(ctx, ori, dst,
-						   seen_part ? dst : MIPS_R_ZERO,
-						   (unsigned int)part);
-				}
-				seen_part = true;
-			}
-			if (seen_part)
-				needed_shift += 16;
-		}
-	}
-}
-
-/*
- * Push BPF regs R3-R5 to the stack, skipping BPF regs R1-R2 which are
- * passed via MIPS register pairs in $a0-$a3. Register order within pairs
- * and the memory storage order are identical i.e. endian native.
- */
-static void emit_push_args(struct jit_ctx *ctx)
-{
-	int store_offset = 2 * sizeof(u64); /* Skip R1-R2 in $a0-$a3 */
-	int bpf, reg;
-
-	for (bpf = BPF_REG_3; bpf <= BPF_REG_5; bpf++) {
-		reg = bpf2mips[bpf].reg;
-
-		emit_instr(ctx, sw, LO(reg), OFFLO(store_offset), MIPS_R_SP);
-		emit_instr(ctx, sw, HI(reg), OFFHI(store_offset), MIPS_R_SP);
-		store_offset += sizeof(u64);
-	}
-}
-
-/*
- * Common helper for BPF_CALL insn, handling TCC and ABI variations.
- * Kernel calls under O32 ABI require arguments passed on the stack,
- * while BPF2BPF calls need the TCC passed via register as expected
- * by the subprog's prologue.
- *
- * Under MIPS32 O32 ABI calling convention, u64 BPF regs R1-R2 are passed
- * via reg pairs in $a0-$a3, while BPF regs R3-R5 are passed via the stack.
- * Stack space is still reserved for $a0-$a3, and the whole area aligned.
- */
-#define ARGS_SIZE (5 * sizeof(u64))
-
-void emit_bpf_call(struct jit_ctx *ctx, const struct bpf_insn *insn)
-{
-	int stack_adjust = ALIGN(ARGS_SIZE, STACK_ALIGN);
-	int tcc_run = bpf2mips[JIT_RUN_TCC].reg ?
-		      bpf2mips[JIT_RUN_TCC].reg :
-		      TEMP_PASS_TCC;
-	int tcc_sav = bpf2mips[JIT_SAV_TCC].reg;
-	int ax = bpf2mips[BPF_REG_AX].reg;
-	int r0 = bpf2mips[BPF_REG_0].reg;
-	long func_addr;
-
-	ctx->flags |= EBPF_SAVE_RA;
-
-	/* Ensure TCC passed into BPF subprog */
-	if ((insn->src_reg == BPF_PSEUDO_CALL) &&
-	    tail_call_present(ctx) && !(ctx->flags & EBPF_TCC_IN_RUN)) {
-		/* Set TCC from reg or stack */
-		if (tcc_sav)
-			emit_instr(ctx, move, tcc_run, tcc_sav);
-		else
-			emit_instr_long(ctx, ld, lw, tcc_run,
-						ctx->bpf_stack_off, MIPS_R_SP);
-	}
-
-	/* Push O32 stack args for kernel call */
-	if (!is64bit() && (insn->src_reg != BPF_PSEUDO_CALL)) {
-		emit_instr(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust);
-		emit_push_args(ctx);
-	}
-
-	func_addr = (long)__bpf_call_base + insn->imm;
-
-	/* Skip TCC init and R1 register fixup with BPF ABI. */
-	if (insn->src_reg == BPF_PSEUDO_CALL)
-		func_addr += ctx->prolog_skip;
-
-	emit_const_to_reg(ctx, MIPS_R_T9, func_addr);
-	emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
-	/* Delay slot */
-	emit_instr(ctx, nop);
-
-	/* Restore stack */
-	if (!is64bit() && (insn->src_reg != BPF_PSEUDO_CALL))
-		emit_instr(ctx, addiu, MIPS_R_SP, MIPS_R_SP, stack_adjust);
-
-	/*
-	 * Assuming a kernel return, a MIPS64 function epilogue may
-	 * sign-extend R0, while MIPS32BE mangles the R0 register pair.
-	 * Undo both for a bpf2bpf call return.
-	 */
-	if (insn->src_reg == BPF_PSEUDO_CALL) {
-		/* Restore BPF R0 from AX */
-		if (is64bit()) {
-			emit_instr(ctx, move, r0, ax);
-		} else if (isbigend()) { /* and 32-bit */
-			emit_instr(ctx, move, LO(r0), MIPS_R_V0);
-			emit_instr(ctx, move, HI(r0), HI(ax));
-		}
-	}
-}
-
-/*
- * Tail call helper arguments passed via BPF ABI as u64 parameters. On
- * MIPS64 N64 ABI systems these are native regs, while on MIPS32 O32 ABI
- * systems these are reg pairs:
- *
- * R1 -> &ctx
- * R2 -> &array
- * R3 -> index
- */
-static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
-{
-	int tcc_run = bpf2mips[JIT_RUN_TCC].reg ?
-		      bpf2mips[JIT_RUN_TCC].reg :
-		      TEMP_PASS_TCC;
-	int tcc_sav = bpf2mips[JIT_SAV_TCC].reg;
-	int r2 = bpf2mips[BPF_REG_2].reg;
-	int r3 = bpf2mips[BPF_REG_3].reg;
-	int off, b_off;
-	int tcc;
-
-	ctx->flags |= EBPF_SEEN_TC;
-	/*
-	 * if (index >= array->map.max_entries)
-	 *     goto out;
-	 */
-	if (is64bit())
-		/* Mask index as 32-bit */
-		gen_zext_insn(r3, true, ctx);
-	off = offsetof(struct bpf_array, map.max_entries);
-	emit_instr_long(ctx, lwu, lw, MIPS_R_AT, off, LO(r2));
-	emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_AT, LO(r3));
-	b_off = b_imm(this_idx + 1, ctx);
-	emit_instr(ctx, bnez, MIPS_R_AT, b_off);
-	/*
-	 * if (TCC-- < 0)
-	 *     goto out;
-	 */
-	/* Delay slot */
-	tcc = (ctx->flags & EBPF_TCC_IN_RUN) ? tcc_run : tcc_sav;
-	/* Get TCC from reg or stack */
-	if (tcc)
-		emit_instr(ctx, move, MIPS_R_T8, tcc);
-	else
-		emit_instr_long(ctx, ld, lw, MIPS_R_T8,
-						ctx->bpf_stack_off, MIPS_R_SP);
-	b_off = b_imm(this_idx + 1, ctx);
-	emit_instr(ctx, bltz, MIPS_R_T8, b_off);
-	/*
-	 * prog = array->ptrs[index];
-	 * if (prog == NULL)
-	 *     goto out;
-	 */
-	/* Delay slot */
-	emit_instr_long(ctx, dsll, sll, MIPS_R_AT, LO(r3), ilog2(sizeof(long)));
-	emit_instr_long(ctx, daddu, addu, MIPS_R_AT, MIPS_R_AT, LO(r2));
-	off = offsetof(struct bpf_array, ptrs);
-	emit_instr_long(ctx, ld, lw, MIPS_R_AT, off, MIPS_R_AT);
-	b_off = b_imm(this_idx + 1, ctx);
-	emit_instr(ctx, beqz, MIPS_R_AT, b_off);
-	/* Delay slot */
-	emit_instr(ctx, nop);
-
-	/* goto *(prog->bpf_func + skip); */
-	off = offsetof(struct bpf_prog, bpf_func);
-	emit_instr_long(ctx, ld, lw, MIPS_R_T9, off, MIPS_R_AT);
-	/* All systems are go... decrement and propagate TCC */
-	emit_instr_long(ctx, daddiu, addiu, tcc_run, MIPS_R_T8, -1);
-	/* Skip first instructions (TCC init and R1 fixup) */
-	emit_instr_long(ctx, daddiu, addiu, MIPS_R_T9, MIPS_R_T9, ctx->prolog_skip);
-	return build_int_epilogue(ctx, MIPS_R_T9);
-}
-
-static bool is_bad_offset(int b_off)
-{
-	return b_off > 0x1ffff || b_off < -0x20000;
-}
-
-/* Returns the number of insn slots consumed. */
-static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
-			  int this_idx, int exit_idx)
-{
-	int src, dst, r, td, ts, mem_off, b_off;
-	bool need_swap, did_move, cmp_eq;
-	unsigned int target = 0;
-	u64 t64;
-	s64 t64s;
-	int bpf_op = BPF_OP(insn->code);
-
-	if (IS_ENABLED(CONFIG_32BIT) && ((BPF_CLASS(insn->code) == BPF_ALU64)
-						|| (bpf_op == BPF_DW)))
-		return -EINVAL;
-
-	switch (insn->code) {
-	case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */
-	case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */
-	case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */
-	case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */
-	case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */
-	case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */
-	case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */
-	case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */
-	case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */
-	case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */
-	case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */
-		r = gen_imm_insn(insn, ctx, this_idx);
-		if (r < 0)
-			return r;
-		break;
-	case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
-			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-		if (insn->imm == 1) /* Mult by 1 is a nop */
-			break;
-		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-		if (MIPS_ISA_REV >= 6) {
-			emit_instr(ctx, dmulu, dst, dst, MIPS_R_AT);
-		} else {
-			emit_instr(ctx, dmultu, MIPS_R_AT, dst);
-			emit_instr(ctx, mflo, dst);
-		}
-		break;
-	case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
-			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-		emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst);
-		break;
-	case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT) {
-			/* sign extend */
-			emit_instr(ctx, sll, dst, dst, 0);
-		}
-		if (insn->imm == 1) /* Mult by 1 is a nop */
-			break;
-		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-		if (MIPS_ISA_REV >= 6) {
-			emit_instr(ctx, mulu, dst, dst, MIPS_R_AT);
-		} else {
-			emit_instr(ctx, multu, dst, MIPS_R_AT);
-			emit_instr(ctx, mflo, dst);
-		}
-		break;
-	case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT) {
-			/* sign extend */
-			emit_instr(ctx, sll, dst, dst, 0);
-		}
-		emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst);
-		break;
-	case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
-	case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
-		if (insn->imm == 0)
-			return -EINVAL;
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT)
-			/* sign extend */
-			emit_instr(ctx, sll, dst, dst, 0);
-		if (insn->imm == 1) {
-			/* div by 1 is a nop, mod by 1 is zero */
-			if (bpf_op == BPF_MOD)
-				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
-			break;
-		}
-		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-		if (MIPS_ISA_REV >= 6) {
-			if (bpf_op == BPF_DIV)
-				emit_instr(ctx, divu_r6, dst, dst, MIPS_R_AT);
-			else
-				emit_instr(ctx, modu, dst, dst, MIPS_R_AT);
-			break;
-		}
-		emit_instr(ctx, divu, dst, MIPS_R_AT);
-		if (bpf_op == BPF_DIV)
-			emit_instr(ctx, mflo, dst);
-		else
-			emit_instr(ctx, mfhi, dst);
-		break;
-	case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
-	case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
-		if (insn->imm == 0)
-			return -EINVAL;
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
-			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-		if (insn->imm == 1) {
-			/* div by 1 is a nop, mod by 1 is zero */
-			if (bpf_op == BPF_MOD)
-				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
-			break;
-		}
-		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-		if (MIPS_ISA_REV >= 6) {
-			if (bpf_op == BPF_DIV)
-				emit_instr(ctx, ddivu_r6, dst, dst, MIPS_R_AT);
-			else
-				emit_instr(ctx, dmodu, dst, dst, MIPS_R_AT);
-			break;
-		}
-		emit_instr(ctx, ddivu, dst, MIPS_R_AT);
-		if (bpf_op == BPF_DIV)
-			emit_instr(ctx, mflo, dst);
-		else
-			emit_instr(ctx, mfhi, dst);
-		break;
-	case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */
-	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */
-		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (src < 0 || dst < 0)
-			return -EINVAL;
-		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
-			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-		did_move = false;
-		if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
-			int tmp_reg = MIPS_R_AT;
-
-			if (bpf_op == BPF_MOV) {
-				tmp_reg = dst;
-				did_move = true;
-			}
-			emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO);
-			emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32);
-			src = MIPS_R_AT;
-		}
-		switch (bpf_op) {
-		case BPF_MOV:
-			if (!did_move)
-				emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO);
-			break;
-		case BPF_ADD:
-			emit_instr(ctx, daddu, dst, dst, src);
-			break;
-		case BPF_SUB:
-			emit_instr(ctx, dsubu, dst, dst, src);
-			break;
-		case BPF_XOR:
-			emit_instr(ctx, xor, dst, dst, src);
-			break;
-		case BPF_OR:
-			emit_instr(ctx, or, dst, dst, src);
-			break;
-		case BPF_AND:
-			emit_instr(ctx, and, dst, dst, src);
-			break;
-		case BPF_MUL:
-			if (MIPS_ISA_REV >= 6) {
-				emit_instr(ctx, dmulu, dst, dst, src);
-			} else {
-				emit_instr(ctx, dmultu, dst, src);
-				emit_instr(ctx, mflo, dst);
-			}
-			break;
-		case BPF_DIV:
-		case BPF_MOD:
-			if (MIPS_ISA_REV >= 6) {
-				if (bpf_op == BPF_DIV)
-					emit_instr(ctx, ddivu_r6,
-							dst, dst, src);
-				else
-					emit_instr(ctx, dmodu, dst, dst, src);
-				break;
-			}
-			emit_instr(ctx, ddivu, dst, src);
-			if (bpf_op == BPF_DIV)
-				emit_instr(ctx, mflo, dst);
-			else
-				emit_instr(ctx, mfhi, dst);
-			break;
-		case BPF_LSH:
-			emit_instr(ctx, dsllv, dst, dst, src);
-			break;
-		case BPF_RSH:
-			emit_instr(ctx, dsrlv, dst, dst, src);
-			break;
-		case BPF_ARSH:
-			emit_instr(ctx, dsrav, dst, dst, src);
-			break;
-		default:
-			pr_err("ALU64_REG NOT HANDLED\n");
-			return -EINVAL;
-		}
-		break;
-	case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */
-	case BPF_ALU | BPF_ARSH | BPF_X: /* ALU_REG */
-		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (src < 0 || dst < 0)
-			return -EINVAL;
-		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (td == REG_64BIT) {
-			/* sign extend */
-			emit_instr(ctx, sll, dst, dst, 0);
-		}
-		did_move = false;
-		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
-		if (ts == REG_64BIT) {
-			int tmp_reg = MIPS_R_AT;
-
-			if (bpf_op == BPF_MOV) {
-				tmp_reg = dst;
-				did_move = true;
-			}
-			/* sign extend */
-			emit_instr(ctx, sll, tmp_reg, src, 0);
-			src = MIPS_R_AT;
-		}
-		switch (bpf_op) {
-		case BPF_MOV:
-			if (!did_move)
-				emit_instr(ctx, addu, dst, src, MIPS_R_ZERO);
-			break;
-		case BPF_ADD:
-			emit_instr(ctx, addu, dst, dst, src);
-			break;
-		case BPF_SUB:
-			emit_instr(ctx, subu, dst, dst, src);
-			break;
-		case BPF_XOR:
-			emit_instr(ctx, xor, dst, dst, src);
-			break;
-		case BPF_OR:
-			emit_instr(ctx, or, dst, dst, src);
-			break;
-		case BPF_AND:
-			emit_instr(ctx, and, dst, dst, src);
-			break;
-		case BPF_MUL:
-			emit_instr(ctx, mul, dst, dst, src);
-			break;
-		case BPF_DIV:
-		case BPF_MOD:
-			if (MIPS_ISA_REV >= 6) {
-				if (bpf_op == BPF_DIV)
-					emit_instr(ctx, divu_r6, dst, dst, src);
-				else
-					emit_instr(ctx, modu, dst, dst, src);
-				break;
-			}
-			emit_instr(ctx, divu, dst, src);
-			if (bpf_op == BPF_DIV)
-				emit_instr(ctx, mflo, dst);
-			else
-				emit_instr(ctx, mfhi, dst);
-			break;
-		case BPF_LSH:
-			emit_instr(ctx, sllv, dst, dst, src);
-			break;
-		case BPF_RSH:
-			emit_instr(ctx, srlv, dst, dst, src);
-			break;
-		case BPF_ARSH:
-			emit_instr(ctx, srav, dst, dst, src);
-			break;
-		default:
-			pr_err("ALU_REG NOT HANDLED\n");
-			return -EINVAL;
-		}
-		break;
-	case BPF_JMP | BPF_EXIT:
-		if (this_idx + 1 < exit_idx) {
-			b_off = b_imm(exit_idx, ctx);
-			if (is_bad_offset(b_off)) {
-				target = j_target(ctx, exit_idx);
-				if (target == (unsigned int)-1)
-					return -E2BIG;
-				emit_instr(ctx, j, target);
-			} else {
-				emit_instr(ctx, b, b_off);
-			}
-			emit_instr(ctx, nop);
-		}
-		break;
-	case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */
-	case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */
-		cmp_eq = (bpf_op == BPF_JEQ);
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		if (dst < 0)
-			return dst;
-		if (insn->imm == 0) {
-			src = MIPS_R_ZERO;
-		} else {
-			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-			src = MIPS_R_AT;
-		}
-		goto jeq_common;
-	case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */
-	case BPF_JMP | BPF_JNE | BPF_X:
-	case BPF_JMP | BPF_JSLT | BPF_X:
-	case BPF_JMP | BPF_JSLE | BPF_X:
-	case BPF_JMP | BPF_JSGT | BPF_X:
-	case BPF_JMP | BPF_JSGE | BPF_X:
-	case BPF_JMP | BPF_JLT | BPF_X:
-	case BPF_JMP | BPF_JLE | BPF_X:
-	case BPF_JMP | BPF_JGT | BPF_X:
-	case BPF_JMP | BPF_JGE | BPF_X:
-	case BPF_JMP | BPF_JSET | BPF_X:
-		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		if (src < 0 || dst < 0)
-			return -EINVAL;
-		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
-		if (td == REG_32BIT && ts != REG_32BIT) {
-			emit_instr(ctx, sll, MIPS_R_AT, src, 0);
-			src = MIPS_R_AT;
-		} else if (ts == REG_32BIT && td != REG_32BIT) {
-			emit_instr(ctx, sll, MIPS_R_AT, dst, 0);
-			dst = MIPS_R_AT;
-		}
-		if (bpf_op == BPF_JSET) {
-			emit_instr(ctx, and, MIPS_R_AT, dst, src);
-			cmp_eq = false;
-			dst = MIPS_R_AT;
-			src = MIPS_R_ZERO;
-		} else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) {
-			emit_instr(ctx, dsubu, MIPS_R_AT, dst, src);
-			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
-				b_off = b_imm(exit_idx, ctx);
-				if (is_bad_offset(b_off))
-					return -E2BIG;
-				if (bpf_op == BPF_JSGT)
-					emit_instr(ctx, blez, MIPS_R_AT, b_off);
-				else
-					emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
-				emit_instr(ctx, nop);
-				return 2; /* We consumed the exit. */
-			}
-			b_off = b_imm(this_idx + insn->off + 1, ctx);
-			if (is_bad_offset(b_off))
-				return -E2BIG;
-			if (bpf_op == BPF_JSGT)
-				emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
-			else
-				emit_instr(ctx, blez, MIPS_R_AT, b_off);
-			emit_instr(ctx, nop);
-			break;
-		} else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) {
-			emit_instr(ctx, slt, MIPS_R_AT, dst, src);
-			cmp_eq = bpf_op == BPF_JSGE;
-			dst = MIPS_R_AT;
-			src = MIPS_R_ZERO;
-		} else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) {
-			/* dst or src could be AT */
-			emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
-			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
-			/* SP known to be non-zero, movz becomes boolean not */
-			if (MIPS_ISA_REV >= 6) {
-				emit_instr(ctx, seleqz, MIPS_R_T9,
-						MIPS_R_SP, MIPS_R_T8);
-			} else {
-				emit_instr(ctx, movz, MIPS_R_T9,
-						MIPS_R_SP, MIPS_R_T8);
-				emit_instr(ctx, movn, MIPS_R_T9,
-						MIPS_R_ZERO, MIPS_R_T8);
-			}
-			emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
-			cmp_eq = bpf_op == BPF_JGT;
-			dst = MIPS_R_AT;
-			src = MIPS_R_ZERO;
-		} else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) {
-			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
-			cmp_eq = bpf_op == BPF_JGE;
-			dst = MIPS_R_AT;
-			src = MIPS_R_ZERO;
-		} else { /* JNE/JEQ case */
-			cmp_eq = (bpf_op == BPF_JEQ);
-		}
-jeq_common:
-		/*
-		 * If the next insn is EXIT and we are jumping arround
-		 * only it, invert the sense of the compare and
-		 * conditionally jump to the exit.  Poor man's branch
-		 * chaining.
-		 */
-		if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
-			b_off = b_imm(exit_idx, ctx);
-			if (is_bad_offset(b_off)) {
-				target = j_target(ctx, exit_idx);
-				if (target == (unsigned int)-1)
-					return -E2BIG;
-				cmp_eq = !cmp_eq;
-				b_off = 4 * 3;
-				if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
-					ctx->offsets[this_idx] |= OFFSETS_B_CONV;
-					ctx->long_b_conversion = 1;
-				}
-			}
-
-			if (cmp_eq)
-				emit_instr(ctx, bne, dst, src, b_off);
-			else
-				emit_instr(ctx, beq, dst, src, b_off);
-			emit_instr(ctx, nop);
-			if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
-				emit_instr(ctx, j, target);
-				emit_instr(ctx, nop);
-			}
-			return 2; /* We consumed the exit. */
-		}
-		b_off = b_imm(this_idx + insn->off + 1, ctx);
-		if (is_bad_offset(b_off)) {
-			target = j_target(ctx, this_idx + insn->off + 1);
-			if (target == (unsigned int)-1)
-				return -E2BIG;
-			cmp_eq = !cmp_eq;
-			b_off = 4 * 3;
-			if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
-				ctx->offsets[this_idx] |= OFFSETS_B_CONV;
-				ctx->long_b_conversion = 1;
-			}
-		}
-
-		if (cmp_eq)
-			emit_instr(ctx, beq, dst, src, b_off);
-		else
-			emit_instr(ctx, bne, dst, src, b_off);
-		emit_instr(ctx, nop);
-		if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
-			emit_instr(ctx, j, target);
-			emit_instr(ctx, nop);
-		}
-		break;
-	case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */
-	case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */
-	case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */
-	case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */
-		cmp_eq = (bpf_op == BPF_JSGE);
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		if (dst < 0)
-			return dst;
-
-		if (insn->imm == 0) {
-			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
-				b_off = b_imm(exit_idx, ctx);
-				if (is_bad_offset(b_off))
-					return -E2BIG;
-				switch (bpf_op) {
-				case BPF_JSGT:
-					emit_instr(ctx, blez, dst, b_off);
-					break;
-				case BPF_JSGE:
-					emit_instr(ctx, bltz, dst, b_off);
-					break;
-				case BPF_JSLT:
-					emit_instr(ctx, bgez, dst, b_off);
-					break;
-				case BPF_JSLE:
-					emit_instr(ctx, bgtz, dst, b_off);
-					break;
-				}
-				emit_instr(ctx, nop);
-				return 2; /* We consumed the exit. */
-			}
-			b_off = b_imm(this_idx + insn->off + 1, ctx);
-			if (is_bad_offset(b_off))
-				return -E2BIG;
-			switch (bpf_op) {
-			case BPF_JSGT:
-				emit_instr(ctx, bgtz, dst, b_off);
-				break;
-			case BPF_JSGE:
-				emit_instr(ctx, bgez, dst, b_off);
-				break;
-			case BPF_JSLT:
-				emit_instr(ctx, bltz, dst, b_off);
-				break;
-			case BPF_JSLE:
-				emit_instr(ctx, blez, dst, b_off);
-				break;
-			}
-			emit_instr(ctx, nop);
-			break;
-		}
-		/*
-		 * only "LT" compare available, so we must use imm + 1
-		 * to generate "GT" and imm -1 to generate LE
-		 */
-		if (bpf_op == BPF_JSGT)
-			t64s = insn->imm + 1;
-		else if (bpf_op == BPF_JSLE)
-			t64s = insn->imm + 1;
-		else
-			t64s = insn->imm;
-
-		cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE;
-		if (t64s >= S16_MIN && t64s <= S16_MAX) {
-			emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s);
-			src = MIPS_R_AT;
-			dst = MIPS_R_ZERO;
-			goto jeq_common;
-		}
-		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
-		emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT);
-		src = MIPS_R_AT;
-		dst = MIPS_R_ZERO;
-		goto jeq_common;
-
-	case BPF_JMP | BPF_JGT | BPF_K:
-	case BPF_JMP | BPF_JGE | BPF_K:
-	case BPF_JMP | BPF_JLT | BPF_K:
-	case BPF_JMP | BPF_JLE | BPF_K:
-		cmp_eq = (bpf_op == BPF_JGE);
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		if (dst < 0)
-			return dst;
-		/*
-		 * only "LT" compare available, so we must use imm + 1
-		 * to generate "GT" and imm -1 to generate LE
-		 */
-		if (bpf_op == BPF_JGT)
-			t64s = (u64)(u32)(insn->imm) + 1;
-		else if (bpf_op == BPF_JLE)
-			t64s = (u64)(u32)(insn->imm) + 1;
-		else
-			t64s = (u64)(u32)(insn->imm);
-
-		cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE;
-
-		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
-		emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT);
-		src = MIPS_R_AT;
-		dst = MIPS_R_ZERO;
-		goto jeq_common;
-
-	case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		if (dst < 0)
-			return dst;
-
-		if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) {
-			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
-				b_off = b_imm(exit_idx, ctx);
-				if (is_bad_offset(b_off))
-					return -E2BIG;
-				emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off);
-				emit_instr(ctx, nop);
-				return 2; /* We consumed the exit. */
-			}
-			b_off = b_imm(this_idx + insn->off + 1, ctx);
-			if (is_bad_offset(b_off))
-				return -E2BIG;
-			emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off);
-			emit_instr(ctx, nop);
-			break;
-		}
-		t64 = (u32)insn->imm;
-		emit_const_to_reg(ctx, MIPS_R_AT, t64);
-		emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT);
-		src = MIPS_R_AT;
-		dst = MIPS_R_ZERO;
-		cmp_eq = false;
-		goto jeq_common;
-
-	case BPF_JMP | BPF_JA:
-		/*
-		 * Prefer relative branch for easier debugging, but
-		 * fall back if needed.
-		 */
-		b_off = b_imm(this_idx + insn->off + 1, ctx);
-		if (is_bad_offset(b_off)) {
-			target = j_target(ctx, this_idx + insn->off + 1);
-			if (target == (unsigned int)-1)
-				return -E2BIG;
-			emit_instr(ctx, j, target);
-		} else {
-			emit_instr(ctx, b, b_off);
-		}
-		emit_instr(ctx, nop);
-		break;
-	case BPF_LD | BPF_DW | BPF_IMM:
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32);
-		emit_const_to_reg(ctx, dst, t64);
-		return 2; /* Double slot insn */
-
-	case BPF_JMP | BPF_CALL:
-		emit_bpf_call(ctx, insn);
-		break;
-
-	case BPF_JMP | BPF_TAIL_CALL:
-		if (emit_bpf_tail_call(ctx, this_idx))
-			return -EINVAL;
-		break;
-
-	case BPF_ALU | BPF_END | BPF_FROM_BE:
-	case BPF_ALU | BPF_END | BPF_FROM_LE:
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		if (dst < 0)
-			return dst;
-		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
-		if (insn->imm == 64 && td == REG_32BIT)
-			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-
-		if (insn->imm != 64 && td == REG_64BIT) {
-			/* sign extend */
-			emit_instr(ctx, sll, dst, dst, 0);
-		}
-
-#ifdef __BIG_ENDIAN
-		need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE);
-#else
-		need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE);
-#endif
-		if (insn->imm == 16) {
-			if (need_swap)
-				emit_instr(ctx, wsbh, dst, dst);
-			emit_instr(ctx, andi, dst, dst, 0xffff);
-		} else if (insn->imm == 32) {
-			if (need_swap) {
-				emit_instr(ctx, wsbh, dst, dst);
-				emit_instr(ctx, rotr, dst, dst, 16);
-			}
-		} else { /* 64-bit*/
-			if (need_swap) {
-				emit_instr(ctx, dsbh, dst, dst);
-				emit_instr(ctx, dshd, dst, dst);
-			}
-		}
-		break;
-
-	case BPF_ST | BPF_NOSPEC: /* speculation barrier */
-		break;
-
-	case BPF_ST | BPF_B | BPF_MEM:
-	case BPF_ST | BPF_H | BPF_MEM:
-	case BPF_ST | BPF_W | BPF_MEM:
-	case BPF_ST | BPF_DW | BPF_MEM:
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		if (dst < 0)
-			return dst;
-		mem_off = insn->off;
-		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
-		switch (BPF_SIZE(insn->code)) {
-		case BPF_B:
-			emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst);
-			break;
-		case BPF_H:
-			emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst);
-			break;
-		case BPF_W:
-			emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst);
-			break;
-		case BPF_DW:
-			emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst);
-			break;
-		}
-		break;
-
-	case BPF_LDX | BPF_B | BPF_MEM:
-	case BPF_LDX | BPF_H | BPF_MEM:
-	case BPF_LDX | BPF_W | BPF_MEM:
-	case BPF_LDX | BPF_DW | BPF_MEM:
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
-		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
-		if (dst < 0 || src < 0)
-			return -EINVAL;
-		mem_off = insn->off;
-		switch (BPF_SIZE(insn->code)) {
-		case BPF_B:
-			emit_instr(ctx, lbu, dst, mem_off, src);
-			break;
-		case BPF_H:
-			emit_instr(ctx, lhu, dst, mem_off, src);
-			break;
-		case BPF_W:
-			emit_instr(ctx, lw, dst, mem_off, src);
-			break;
-		case BPF_DW:
-			emit_instr(ctx, ld, dst, mem_off, src);
-			break;
-		}
-		break;
-
-	case BPF_STX | BPF_B | BPF_MEM:
-	case BPF_STX | BPF_H | BPF_MEM:
-	case BPF_STX | BPF_W | BPF_MEM:
-	case BPF_STX | BPF_DW | BPF_MEM:
-	case BPF_STX | BPF_W | BPF_ATOMIC:
-	case BPF_STX | BPF_DW | BPF_ATOMIC:
-		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
-		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
-		if (src < 0 || dst < 0)
-			return -EINVAL;
-		mem_off = insn->off;
-		if (BPF_MODE(insn->code) == BPF_ATOMIC) {
-			if (insn->imm != BPF_ADD) {
-				pr_err("ATOMIC OP %02x NOT HANDLED\n", insn->imm);
-				return -EINVAL;
-			}
-			/*
-			 * If mem_off does not fit within the 9 bit ll/sc
-			 * instruction immediate field, use a temp reg.
-			 */
-			if (MIPS_ISA_REV >= 6 &&
-			    (mem_off >= BIT(8) || mem_off < -BIT(8))) {
-				emit_instr(ctx, daddiu, MIPS_R_T6,
-						dst, mem_off);
-				mem_off = 0;
-				dst = MIPS_R_T6;
-			}
-			switch (BPF_SIZE(insn->code)) {
-			case BPF_W:
-				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
-					emit_instr(ctx, sll, MIPS_R_AT, src, 0);
-					src = MIPS_R_AT;
-				}
-				emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst);
-				emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src);
-				emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst);
-				/*
-				 * On failure back up to LL (-4
-				 * instructions of 4 bytes each
-				 */
-				emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
-				emit_instr(ctx, nop);
-				break;
-			case BPF_DW:
-				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
-					emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
-					emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
-					src = MIPS_R_AT;
-				}
-				emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst);
-				emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src);
-				emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst);
-				emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
-				emit_instr(ctx, nop);
-				break;
-			}
-		} else { /* BPF_MEM */
-			switch (BPF_SIZE(insn->code)) {
-			case BPF_B:
-				emit_instr(ctx, sb, src, mem_off, dst);
-				break;
-			case BPF_H:
-				emit_instr(ctx, sh, src, mem_off, dst);
-				break;
-			case BPF_W:
-				emit_instr(ctx, sw, src, mem_off, dst);
-				break;
-			case BPF_DW:
-				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
-					emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
-					emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
-					src = MIPS_R_AT;
-				}
-				emit_instr(ctx, sd, src, mem_off, dst);
-				break;
-			}
-		}
-		break;
-
-	default:
-		pr_err("NOT HANDLED %d - (%02x)\n",
-		       this_idx, (unsigned int)insn->code);
-		return -EINVAL;
-	}
-	return 1;
-}
-
-#define RVT_VISITED_MASK 0xc000000000000000ull
-#define RVT_FALL_THROUGH 0x4000000000000000ull
-#define RVT_BRANCH_TAKEN 0x8000000000000000ull
-#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN)
-
-static int build_int_body(struct jit_ctx *ctx)
-{
-	const struct bpf_prog *prog = ctx->prog;
-	const struct bpf_insn *insn;
-	int i, r;
-
-	for (i = 0; i < prog->len; ) {
-		insn = prog->insnsi + i;
-		if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) {
-			/* dead instruction, don't emit it. */
-			i++;
-			continue;
-		}
-
-		if (ctx->target == NULL)
-			ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4);
-
-		r = build_one_insn(insn, ctx, i, prog->len);
-		if (r < 0)
-			return r;
-		i += r;
-	}
-	/* epilogue offset */
-	if (ctx->target == NULL)
-		ctx->offsets[i] = ctx->idx * 4;
-
-	/*
-	 * All exits have an offset of the epilogue, some offsets may
-	 * not have been set due to banch-around threading, so set
-	 * them now.
-	 */
-	if (ctx->target == NULL)
-		for (i = 0; i < prog->len; i++) {
-			insn = prog->insnsi + i;
-			if (insn->code == (BPF_JMP | BPF_EXIT))
-				ctx->offsets[i] = ctx->idx * 4;
-		}
-	return 0;
-}
-
-/* return the last idx processed, or negative for error */
-static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt,
-				   int start_idx, bool follow_taken)
-{
-	const struct bpf_prog *prog = ctx->prog;
-	const struct bpf_insn *insn;
-	u64 exit_rvt = initial_rvt;
-	u64 *rvt = ctx->reg_val_types;
-	int idx;
-	int reg;
-
-	for (idx = start_idx; idx < prog->len; idx++) {
-		rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt;
-		insn = prog->insnsi + idx;
-		switch (BPF_CLASS(insn->code)) {
-		case BPF_ALU:
-			switch (BPF_OP(insn->code)) {
-			case BPF_ADD:
-			case BPF_SUB:
-			case BPF_MUL:
-			case BPF_DIV:
-			case BPF_OR:
-			case BPF_AND:
-			case BPF_LSH:
-			case BPF_RSH:
-			case BPF_ARSH:
-			case BPF_NEG:
-			case BPF_MOD:
-			case BPF_XOR:
-				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
-				break;
-			case BPF_MOV:
-				if (BPF_SRC(insn->code)) {
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
-				} else {
-					/* IMM to REG move*/
-					if (insn->imm >= 0)
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
-					else
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
-				}
-				break;
-			case BPF_END:
-				if (insn->imm == 64)
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
-				else if (insn->imm == 32)
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
-				else /* insn->imm == 16 */
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
-				break;
-			}
-			rvt[idx] |= RVT_DONE;
-			break;
-		case BPF_ALU64:
-			switch (BPF_OP(insn->code)) {
-			case BPF_MOV:
-				if (BPF_SRC(insn->code)) {
-					/* REG to REG move*/
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
-				} else {
-					/* IMM to REG move*/
-					if (insn->imm >= 0)
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
-					else
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
-				}
-				break;
-			default:
-				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
-			}
-			rvt[idx] |= RVT_DONE;
-			break;
-		case BPF_LD:
-			switch (BPF_SIZE(insn->code)) {
-			case BPF_DW:
-				if (BPF_MODE(insn->code) == BPF_IMM) {
-					s64 val;
-
-					val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32));
-					if (val > 0 && val <= S32_MAX)
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
-					else if (val >= S32_MIN && val <= S32_MAX)
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
-					else
-						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
-					rvt[idx] |= RVT_DONE;
-					idx++;
-				} else {
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
-				}
-				break;
-			case BPF_B:
-			case BPF_H:
-				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
-				break;
-			case BPF_W:
-				if (BPF_MODE(insn->code) == BPF_IMM)
-					set_reg_val_type(&exit_rvt, insn->dst_reg,
-							 insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT);
-				else
-					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
-				break;
-			}
-			rvt[idx] |= RVT_DONE;
-			break;
-		case BPF_LDX:
-			switch (BPF_SIZE(insn->code)) {
-			case BPF_DW:
-				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
-				break;
-			case BPF_B:
-			case BPF_H:
-				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
-				break;
-			case BPF_W:
-				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
-				break;
-			}
-			rvt[idx] |= RVT_DONE;
-			break;
-		case BPF_JMP:
-		case BPF_JMP32:
-			switch (BPF_OP(insn->code)) {
-			case BPF_EXIT:
-				rvt[idx] = RVT_DONE | exit_rvt;
-				rvt[prog->len] = exit_rvt;
-				return idx;
-			case BPF_JA:
-			{
-				int tgt = idx + 1 + insn->off;
-				bool visited = (rvt[tgt] & RVT_FALL_THROUGH);
-
-				rvt[idx] |= RVT_DONE;
-				/*
-				 * Verifier dead code patching can use
-				 * infinite-loop traps, causing hangs and
-				 * RCU stalls here. Treat traps as nops
-				 * if detected and fall through.
-				 */
-				if (insn->off == -1)
-					break;
-				/*
-				 * Bounded loops cause the same issues in
-				 * fallthrough mode; follow only if jump
-				 * target is unvisited to mitigate.
-				 */
-				if (insn->off < 0 && !follow_taken && visited)
-					break;
-				idx += insn->off;
-				break;
-			}
-			case BPF_JEQ:
-			case BPF_JGT:
-			case BPF_JGE:
-			case BPF_JLT:
-			case BPF_JLE:
-			case BPF_JSET:
-			case BPF_JNE:
-			case BPF_JSGT:
-			case BPF_JSGE:
-			case BPF_JSLT:
-			case BPF_JSLE:
-				if (follow_taken) {
-					rvt[idx] |= RVT_BRANCH_TAKEN;
-					idx += insn->off;
-					follow_taken = false;
-				} else {
-					rvt[idx] |= RVT_FALL_THROUGH;
-				}
-				break;
-			case BPF_CALL:
-				set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT);
-				/* Upon call return, argument registers are clobbered. */
-				for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++)
-					set_reg_val_type(&exit_rvt, reg, REG_64BIT);
-
-				rvt[idx] |= RVT_DONE;
-				break;
-			case BPF_TAIL_CALL:
-				rvt[idx] |= RVT_DONE;
-				break;
-			default:
-				WARN(1, "Unhandled BPF_JMP case.\n");
-				rvt[idx] |= RVT_DONE;
-				break;
-			}
-			break;
-		default:
-			rvt[idx] |= RVT_DONE;
-			break;
-		}
-	}
-	return idx;
-}
-
-/*
- * Track the value range (i.e. 32-bit vs. 64-bit) of each register at
- * each eBPF insn.  This allows unneeded sign and zero extension
- * operations to be omitted.
- *
- * Doesn't handle yet confluence of control paths with conflicting
- * ranges, but it is good enough for most sane code.
- */
-static int reg_val_propagate(struct jit_ctx *ctx)
-{
-	const struct bpf_prog *prog = ctx->prog;
-	u64 exit_rvt;
-	int reg;
-	int i;
-
-	/*
-	 * 11 registers * 3 bits/reg leaves top bits free for other
-	 * uses.  Bit-62..63 used to see if we have visited an insn.
-	 */
-	exit_rvt = 0;
-
-	/* Upon entry, argument registers are 64-bit. */
-	for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++)
-		set_reg_val_type(&exit_rvt, reg, REG_64BIT);
-
-	/*
-	 * First follow all conditional branches on the fall-through
-	 * edge of control flow..
-	 */
-	reg_val_propagate_range(ctx, exit_rvt, 0, false);
-restart_search:
-	/*
-	 * Then repeatedly find the first conditional branch where
-	 * both edges of control flow have not been taken, and follow
-	 * the branch taken edge.  We will end up restarting the
-	 * search once per conditional branch insn.
-	 */
-	for (i = 0; i < prog->len; i++) {
-		u64 rvt = ctx->reg_val_types[i];
-
-		if ((rvt & RVT_VISITED_MASK) == RVT_DONE ||
-		    (rvt & RVT_VISITED_MASK) == 0)
-			continue;
-		if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) {
-			reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true);
-		} else { /* RVT_BRANCH_TAKEN */
-			WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n");
-			reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false);
-		}
-		goto restart_search;
-	}
-	/*
-	 * Eventually all conditional branches have been followed on
-	 * both branches and we are done.  Any insn that has not been
-	 * visited at this point is dead.
-	 */
-
-	return 0;
-}
-
-static void jit_fill_hole(void *area, unsigned int size)
-{
-	u32 *p;
-
-	/* We are guaranteed to have aligned memory. */
-	for (p = area; size >= sizeof(u32); size -= sizeof(u32))
-		uasm_i_break(&p, BRK_BUG); /* Increments p */
-}
-
-/*
- * Save and restore the BPF VM state across a direct kernel call. This
- * includes the caller-saved registers used for BPF_REG_0 .. BPF_REG_5
- * and BPF_REG_AX used by the verifier for blinding and other dark arts.
- * Restore avoids clobbering bpf_ret, which holds the call return value.
- * BPF_REG_6 .. BPF_REG_10 and TCC are already callee-saved or on stack.
- */
-static const int bpf_caller_save[] = {
-	BPF_REG_0,
-	BPF_REG_1,
-	BPF_REG_2,
-	BPF_REG_3,
-	BPF_REG_4,
-	BPF_REG_5,
-	BPF_REG_AX,
-};
-
-#define CALLER_ENV_SIZE (ARRAY_SIZE(bpf_caller_save) * sizeof(u64))
-
-void emit_caller_save(struct jit_ctx *ctx)
-{
-	int stack_adj = ALIGN(CALLER_ENV_SIZE, STACK_ALIGN);
-	int i, bpf, reg, store_offset;
-
-	emit_instr_long(ctx, daddiu, addiu, MIPS_R_SP, MIPS_R_SP, -stack_adj);
-
-	for (i = 0; i < ARRAY_SIZE(bpf_caller_save); i++) {
-		bpf = bpf_caller_save[i];
-		reg = bpf2mips[bpf].reg;
-		store_offset = i * sizeof(u64);
-
-		if (is64bit()) {
-			emit_instr(ctx, sd, reg, store_offset, MIPS_R_SP);
-		} else {
-			emit_instr(ctx, sw, LO(reg),
-						OFFLO(store_offset), MIPS_R_SP);
-			emit_instr(ctx, sw, HI(reg),
-						OFFHI(store_offset), MIPS_R_SP);
-		}
-	}
-}
-
-void emit_caller_restore(struct jit_ctx *ctx, int bpf_ret)
-{
-	int stack_adj = ALIGN(CALLER_ENV_SIZE, STACK_ALIGN);
-	int i, bpf, reg, store_offset;
-
-	for (i = 0; i < ARRAY_SIZE(bpf_caller_save); i++) {
-		bpf = bpf_caller_save[i];
-		reg = bpf2mips[bpf].reg;
-		store_offset = i * sizeof(u64);
-		if (bpf == bpf_ret)
-			continue;
-
-		if (is64bit()) {
-			emit_instr(ctx, ld, reg, store_offset, MIPS_R_SP);
-		} else {
-			emit_instr(ctx, lw, LO(reg),
-						OFFLO(store_offset), MIPS_R_SP);
-			emit_instr(ctx, lw, HI(reg),
-						OFFHI(store_offset), MIPS_R_SP);
-		}
-	}
-
-	emit_instr_long(ctx, daddiu, addiu, MIPS_R_SP, MIPS_R_SP, stack_adj);
-}
-
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
-{
-	bool tmp_blinded = false, extra_pass = false;
-	struct bpf_prog *tmp, *orig_prog = prog;
-	struct bpf_binary_header *header = NULL;
-	unsigned int image_size, pass = 3;
-	struct jit_ctx *ctx;
-
-	if (!prog->jit_requested)
-		return orig_prog;
-
-	/* Attempt blinding but fall back to the interpreter on failure. */
-	tmp = bpf_jit_blind_constants(prog);
-	if (IS_ERR(tmp))
-		return orig_prog;
-	if (tmp != prog) {
-		tmp_blinded = true;
-		prog = tmp;
-	}
-
-	ctx = prog->aux->jit_data;
-	if (!ctx) {
-		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-		if (!ctx) {
-			prog = orig_prog;
-			goto out;
-		}
-	}
-
-	/*
-	 * Assume extra pass needed for patching addresses if previous
-	 * ctx exists in saved jit_data, so skip to code generation.
-	 */
-	if (ctx->offsets) {
-		extra_pass = true;
-		pass++;
-		image_size = 4 * ctx->idx;
-		header = bpf_jit_binary_hdr(ctx->prog);
-		goto skip_init_ctx;
-	}
-
-	ctx->prog = prog;
-	ctx->offsets = kcalloc(prog->len + 1,
-			       sizeof(*ctx->offsets),
-			       GFP_KERNEL);
-	if (!ctx->offsets)
-		goto out_err;
-
-	/* Check Octeon bbit ops only for MIPS64. */
-	if (is64bit()) {
-		preempt_disable();
-		switch (current_cpu_type()) {
-		case CPU_CAVIUM_OCTEON:
-		case CPU_CAVIUM_OCTEON_PLUS:
-		case CPU_CAVIUM_OCTEON2:
-		case CPU_CAVIUM_OCTEON3:
-			ctx->use_bbit_insns = 1;
-			break;
-		default:
-			ctx->use_bbit_insns = 0;
-		}
-		preempt_enable();
-	}
-
-	ctx->reg_val_types = kcalloc(prog->len + 1,
-				     sizeof(*ctx->reg_val_types),
-				     GFP_KERNEL);
-	if (!ctx->reg_val_types)
-		goto out_err;
-
-	if (reg_val_propagate(ctx))
-		goto out_err;
-
-	/*
-	 * First pass discovers used resources and instruction offsets
-	 * assuming short branches are used.
-	 */
-	if (build_int_body(ctx))
-		goto out_err;
-
-	/*
-	 * If no calls are made (EBPF_SAVE_RA), then tailcall count located
-	 * in runtime reg if defined, else we backup to save reg or stack.
-	 */
-	if (tail_call_present(ctx)) {
-		if (ctx->flags & EBPF_SAVE_RA)
-			ctx->flags |= bpf2mips[JIT_SAV_TCC].flags;
-		else if (bpf2mips[JIT_RUN_TCC].reg)
-			ctx->flags |= EBPF_TCC_IN_RUN;
-	}
-
-	/*
-	 * Second pass generates offsets, if any branches are out of
-	 * range a jump-around long sequence is generated, and we have
-	 * to try again from the beginning to generate the new
-	 * offsets.  This is done until no additional conversions are
-	 * necessary.
-	 */
-	do {
-		ctx->idx = 0;
-		ctx->gen_b_offsets = 1;
-		ctx->long_b_conversion = 0;
-		if (build_int_prologue(ctx))
-			goto out_err;
-		if (build_int_body(ctx))
-			goto out_err;
-		if (build_int_epilogue(ctx, MIPS_R_RA))
-			goto out_err;
-	} while (ctx->long_b_conversion);
-
-	image_size = 4 * ctx->idx;
-
-	header = bpf_jit_binary_alloc(image_size, (void *)&ctx->target,
-				      sizeof(u32), jit_fill_hole);
-	if (!header)
-		goto out_err;
-
-skip_init_ctx:
-
-	/* Third pass generates the code (fourth patches call addresses) */
-	ctx->idx = 0;
-	if (build_int_prologue(ctx))
-		goto out_err;
-	if (build_int_body(ctx))
-		goto out_err;
-	if (build_int_epilogue(ctx, MIPS_R_RA))
-		goto out_err;
-
-	if (bpf_jit_enable > 1)
-		/* Dump JIT code */
-		bpf_jit_dump(prog->len, image_size, pass, ctx->target);
-
-	/* Update the icache */
-	flush_icache_range((unsigned long)ctx->target,
-			   (unsigned long)&ctx->target[ctx->idx]);
-
-	if (!prog->is_func || extra_pass)
-		bpf_jit_binary_lock_ro(header);
-	else
-		prog->aux->jit_data = ctx;
-
-	prog->bpf_func = (void *)ctx->target;
-	prog->jited = 1;
-	prog->jited_len = image_size;
-
-	if (!prog->is_func || extra_pass) {
-		bpf_prog_fill_jited_linfo(prog, ctx->offsets + 1);
-out_ctx:
-		kfree(ctx->offsets);
-		kfree(ctx->reg_val_types);
-		kfree(ctx);
-		prog->aux->jit_data = NULL;
-	}
-out:
-	if (tmp_blinded)
-		bpf_jit_prog_release_other(prog, prog == orig_prog ?
-					   tmp : orig_prog);
-	return prog;
-
-out_err:
-	prog = orig_prog;
-	if (header)
-		bpf_jit_binary_free(header);
-	goto out_ctx;
-}
-
-/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
-bool bpf_jit_supports_subprog_tailcalls(void)
-{
-	return true;
-}
diff --git a/arch/mips/net/ebpf_jit.h b/arch/mips/net/ebpf_jit.h
new file mode 100644
index 000000000000..82227e16e503
--- /dev/null
+++ b/arch/mips/net/ebpf_jit.h
@@ -0,0 +1,297 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Just-In-Time compiler for eBPF filters on MIPS32/MIPS64
+ * Copyright (c) 2021 Tony Ambardar <Tony.Ambardar@gmail.com>
+ *
+ * Based on code from:
+ *
+ * Copyright (c) 2017 Cavium, Inc.
+ * Author: David Daney <david.daney@cavium.com>
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ */
+
+#ifndef _EBPF_JIT_H
+#define _EBPF_JIT_H
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <asm/byteorder.h>
+#include <asm/uasm.h>
+
+/* Registers used by JIT:	  (MIPS32)	(MIPS64) */
+#define MIPS_R_ZERO	0
+#define MIPS_R_AT	1
+#define MIPS_R_V0	2	/* BPF_R0	BPF_R0 */
+#define MIPS_R_V1	3	/* BPF_R0	BPF_TCC */
+#define MIPS_R_A0	4	/* BPF_R1	BPF_R1 */
+#define MIPS_R_A1	5	/* BPF_R1	BPF_R2 */
+#define MIPS_R_A2	6	/* BPF_R2	BPF_R3 */
+#define MIPS_R_A3	7	/* BPF_R2	BPF_R4 */
+/* MIPS64 swaps T0-T3 regs for extra args A4-A7. */
+#ifdef CONFIG_64BIT
+#  define MIPS_R_A4	8	/* (n/a)	BPF_R5 */
+#else /* CONFIG_32BIT */
+#  define MIPS_R_T0	8	/* BPF_R3	(n/a)  */
+#  define MIPS_R_T1	9	/* BPF_R3	(n/a)  */
+#  define MIPS_R_T2	10	/* BPF_R4	(n/a)  */
+#  define MIPS_R_T3	11	/* BPF_R4	(n/a)  */
+#endif
+#define MIPS_R_T4	12	/* BPF_R5	BPF_AX */
+#define MIPS_R_T5	13	/* BPF_R5	(free) */
+#define MIPS_R_T6	14	/* BPF_AX	(used) */
+#define MIPS_R_T7	15	/* BPF_AX	(free) */
+#define MIPS_R_S0	16	/* BPF_R6	BPF_R6 */
+#define MIPS_R_S1	17	/* BPF_R6	BPF_R7 */
+#define MIPS_R_S2	18	/* BPF_R7	BPF_R8 */
+#define MIPS_R_S3	19	/* BPF_R7	BPF_R9 */
+#define MIPS_R_S4	20	/* BPF_R8	BPF_TCC */
+#define MIPS_R_S5	21	/* BPF_R8	(free) */
+#define MIPS_R_S6	22	/* BPF_R9	(free) */
+#define MIPS_R_S7	23	/* BPF_R9	(free) */
+#define MIPS_R_T8	24	/* (used)	(used) */
+#define MIPS_R_T9	25	/* (used)	(used) */
+#define MIPS_R_SP	29
+#define MIPS_R_S8	30	/* BPF_R10	BPF_R10 */
+#define MIPS_R_RA	31
+
+/* eBPF flags */
+#define EBPF_SAVE_S0	BIT(0)
+#define EBPF_SAVE_S1	BIT(1)
+#define EBPF_SAVE_S2	BIT(2)
+#define EBPF_SAVE_S3	BIT(3)
+#define EBPF_SAVE_S4	BIT(4)
+#define EBPF_SAVE_S5	BIT(5)
+#define EBPF_SAVE_S6	BIT(6)
+#define EBPF_SAVE_S7	BIT(7)
+#define EBPF_SAVE_S8	BIT(8)
+#define EBPF_SAVE_RA	BIT(9)
+#define EBPF_SEEN_FP	BIT(10)
+#define EBPF_SEEN_TC	BIT(11)
+#define EBPF_TCC_IN_RUN	BIT(12)
+
+/*
+ * Word-size and endianness-aware helpers for building MIPS32 vs MIPS64
+ * tables and selecting 32-bit subregisters from a register pair base.
+ * Simplify use by emulating MIPS_R_SP and MIPS_R_ZERO as register pairs
+ * and adding HI/LO word memory offsets.
+ */
+#ifdef CONFIG_64BIT
+#  define HI(reg) (reg)
+#  define LO(reg) (reg)
+#  define OFFHI(mem) (mem)
+#  define OFFLO(mem) (mem)
+#else /* CONFIG_32BIT */
+#  ifdef __BIG_ENDIAN
+#    define HI(reg) ((reg) == MIPS_R_SP ? MIPS_R_ZERO : \
+		     (reg) == MIPS_R_S8 ? MIPS_R_ZERO : \
+		     (reg))
+#    define LO(reg) ((reg) == MIPS_R_ZERO ? (reg) : \
+		     (reg) == MIPS_R_SP ? (reg) : \
+		     (reg) == MIPS_R_S8 ? (reg) : \
+		     (reg) + 1)
+#    define OFFHI(mem) (mem)
+#    define OFFLO(mem) ((mem) + sizeof(long))
+#  else	/* __LITTLE_ENDIAN */
+#    define HI(reg) ((reg) == MIPS_R_ZERO ? (reg) : \
+		     (reg) == MIPS_R_SP ? MIPS_R_ZERO : \
+		     (reg) == MIPS_R_S8 ? MIPS_R_ZERO : \
+		     (reg) + 1)
+#    define LO(reg) (reg)
+#    define OFFHI(mem) ((mem) + sizeof(long))
+#    define OFFLO(mem) (mem)
+#  endif
+#endif
+
+static inline bool is64bit(void)
+{
+	return IS_ENABLED(CONFIG_64BIT);
+}
+
+static inline bool isbigend(void)
+{
+	return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+}
+
+/*
+ * For the mips64 ISA, we need to track the value range or type for
+ * each JIT register.  The BPF machine requires zero extended 32-bit
+ * values, but the mips64 ISA requires sign extended 32-bit values.
+ * At each point in the BPF program we track the state of every
+ * register so that we can zero extend or sign extend as the BPF
+ * semantics require.
+ */
+enum reg_val_type {
+	/* uninitialized */
+	REG_UNKNOWN,
+	/* not known to be 32-bit compatible. */
+	REG_64BIT,
+	/* 32-bit compatible, no truncation needed for 64-bit ops. */
+	REG_64BIT_32BIT,
+	/* 32-bit compatible, need truncation for 64-bit ops. */
+	REG_32BIT,
+	/* 32-bit no sign/zero extension needed. */
+	REG_32BIT_POS
+};
+
+/**
+ * struct jit_ctx - JIT context
+ * @prog:		The program
+ * @stack_size:		eBPF stack size
+ * @bpf_stack_off:	eBPF FP offset
+ * @idx:		Instruction index
+ * @flags:		JIT flags
+ * @offsets:		Instruction offsets
+ * @target:		Memory location for compiled instructions
+ * @reg_val_types	Packed enum reg_val_type for each register
+ */
+struct jit_ctx {
+	const struct bpf_prog *prog;
+	int stack_size;
+	int bpf_stack_off;
+	int prolog_skip;
+	u32 idx;
+	u32 flags;
+	u32 *offsets;
+	u32 *target;
+	u64 *reg_val_types;
+	unsigned int long_b_conversion:1;
+	unsigned int gen_b_offsets:1;
+	unsigned int use_bbit_insns:1;
+};
+
+static inline void set_reg_val_type(u64 *rvt, int reg, enum reg_val_type type)
+{
+	*rvt &= ~(7ull << (reg * 3));
+	*rvt |= ((u64)type << (reg * 3));
+}
+
+static inline enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx,
+					  int index, int reg)
+{
+	return (ctx->reg_val_types[index] >> (reg * 3)) & 7;
+}
+
+/* Simply emit the instruction if the JIT memory space has been allocated */
+#define emit_instr_long(ctx, func64, func32, ...)		\
+do {								\
+	if ((ctx)->target != NULL) {				\
+		u32 *p = &(ctx)->target[ctx->idx];		\
+		if (IS_ENABLED(CONFIG_64BIT))			\
+			uasm_i_##func64(&p, ##__VA_ARGS__);	\
+		else						\
+			uasm_i_##func32(&p, ##__VA_ARGS__);	\
+	}							\
+	(ctx)->idx++;						\
+} while (0)
+
+#define emit_instr(ctx, func, ...)				\
+	emit_instr_long(ctx, func, func, ##__VA_ARGS__)
+
+/*
+ * High bit of offsets indicates if long branch conversion done at
+ * this insn.
+ */
+#define OFFSETS_B_CONV	BIT(31)
+
+static inline unsigned int j_target(struct jit_ctx *ctx, int target_idx)
+{
+	unsigned long target_va, base_va;
+	unsigned int r;
+
+	if (!ctx->target)
+		return 0;
+
+	base_va = (unsigned long)ctx->target;
+	target_va = base_va + (ctx->offsets[target_idx] & ~OFFSETS_B_CONV);
+
+	if ((base_va & ~0x0ffffffful) != (target_va & ~0x0ffffffful))
+		return (unsigned int)-1;
+	r = target_va & 0x0ffffffful;
+	return r;
+}
+
+/* Compute the immediate value for PC-relative branches. */
+static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
+{
+	if (!ctx->gen_b_offsets)
+		return 0;
+
+	/*
+	 * We want a pc-relative branch.  tgt is the instruction offset
+	 * we want to jump to.
+
+	 * Branch on MIPS:
+	 * I: target_offset <- sign_extend(offset)
+	 * I+1: PC += target_offset (delay slot)
+	 *
+	 * ctx->idx currently points to the branch instruction
+	 * but the offset is added to the delay slot so we need
+	 * to subtract 4.
+	 */
+	return (ctx->offsets[tgt] & ~OFFSETS_B_CONV) -
+		(ctx->idx * 4) - 4;
+}
+
+static inline bool tail_call_present(struct jit_ctx *ctx)
+{
+	return ctx->flags & EBPF_SEEN_TC || ctx->prog->aux->tail_call_reachable;
+}
+
+static inline bool is_bad_offset(int b_off)
+{
+	return b_off > 0x1ffff || b_off < -0x20000;
+}
+
+/* Sign-extend dst register or HI 32-bit reg of pair. */
+static inline void gen_sext_insn(int dst, struct jit_ctx *ctx)
+{
+	if (is64bit())
+		emit_instr(ctx, sll, dst, dst, 0);
+	else
+		emit_instr(ctx, sra, HI(dst), LO(dst), 31);
+}
+
+/*
+ * Zero-extend dst register or HI 32-bit reg of pair, if either forced
+ * or the BPF verifier does not insert its own zext insns.
+ */
+static inline void gen_zext_insn(int dst, bool force, struct jit_ctx *ctx)
+{
+	if (!ctx->prog->aux->verifier_zext || force) {
+		if (is64bit())
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		else
+			emit_instr(ctx, and, HI(dst), MIPS_R_ZERO, MIPS_R_ZERO);
+	}
+}
+
+enum reg_usage {
+	REG_SRC_FP_OK,
+	REG_SRC_NO_FP,
+	REG_DST_FP_OK,
+	REG_DST_NO_FP
+};
+
+extern int ebpf_to_mips_reg(struct jit_ctx *ctx,
+			    const struct bpf_insn *insn,
+			    enum reg_usage u);
+
+extern void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
+			   struct jit_ctx *ctx);
+
+extern void emit_const_to_reg(struct jit_ctx *ctx, int dst, unsigned long value);
+
+extern void emit_bpf_call(struct jit_ctx *ctx, const struct bpf_insn *insn);
+
+extern int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx);
+
+extern void emit_caller_save(struct jit_ctx *ctx);
+
+extern void emit_caller_restore(struct jit_ctx *ctx, int bpf_ret);
+
+extern int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+		   int this_idx, int exit_idx);
+
+#endif /* _EBPF_JIT_H */
diff --git a/arch/mips/net/ebpf_jit_comp64.c b/arch/mips/net/ebpf_jit_comp64.c
new file mode 100644
index 000000000000..c38d93d37ce3
--- /dev/null
+++ b/arch/mips/net/ebpf_jit_comp64.c
@@ -0,0 +1,990 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Just-In-Time compiler for eBPF filters on MIPS32/MIPS64
+ * Copyright (c) 2021 Tony Ambardar <Tony.Ambardar@gmail.com>
+ *
+ * Based on code from:
+ *
+ * Copyright (c) 2017 Cavium, Inc.
+ * Author: David Daney <david.daney@cavium.com>
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <asm/uasm.h>
+
+#include "ebpf_jit.h"
+
+static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+			int idx)
+{
+	int upper_bound, lower_bound;
+	int dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+
+	if (dst < 0)
+		return dst;
+
+	switch (BPF_OP(insn->code)) {
+	case BPF_MOV:
+	case BPF_ADD:
+		upper_bound = S16_MAX;
+		lower_bound = S16_MIN;
+		break;
+	case BPF_SUB:
+		upper_bound = -(int)S16_MIN;
+		lower_bound = -(int)S16_MAX;
+		break;
+	case BPF_AND:
+	case BPF_OR:
+	case BPF_XOR:
+		upper_bound = 0xffff;
+		lower_bound = 0;
+		break;
+	case BPF_RSH:
+	case BPF_LSH:
+	case BPF_ARSH:
+		/* Shift amounts are truncated, no need for bounds */
+		upper_bound = S32_MAX;
+		lower_bound = S32_MIN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Immediate move clobbers the register, so no sign/zero
+	 * extension needed.
+	 */
+	if (BPF_CLASS(insn->code) == BPF_ALU64 &&
+	    BPF_OP(insn->code) != BPF_MOV &&
+	    get_reg_val_type(ctx, idx, insn->dst_reg) == REG_32BIT)
+		emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+	/* BPF_ALU | BPF_LSH doesn't need separate sign extension */
+	if (BPF_CLASS(insn->code) == BPF_ALU &&
+	    BPF_OP(insn->code) != BPF_LSH &&
+	    BPF_OP(insn->code) != BPF_MOV &&
+	    get_reg_val_type(ctx, idx, insn->dst_reg) != REG_32BIT)
+		emit_instr(ctx, sll, dst, dst, 0);
+
+	if (insn->imm >= lower_bound && insn->imm <= upper_bound) {
+		/* single insn immediate case */
+		switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
+		case BPF_ALU64 | BPF_MOV:
+			emit_instr(ctx, daddiu, dst, MIPS_R_ZERO, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_AND:
+		case BPF_ALU | BPF_AND:
+			emit_instr(ctx, andi, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_OR:
+		case BPF_ALU | BPF_OR:
+			emit_instr(ctx, ori, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_XOR:
+		case BPF_ALU | BPF_XOR:
+			emit_instr(ctx, xori, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_ADD:
+			emit_instr(ctx, daddiu, dst, dst, insn->imm);
+			break;
+		case BPF_ALU64 | BPF_SUB:
+			emit_instr(ctx, daddiu, dst, dst, -insn->imm);
+			break;
+		case BPF_ALU64 | BPF_RSH:
+			emit_instr(ctx, dsrl_safe, dst, dst, insn->imm & 0x3f);
+			break;
+		case BPF_ALU | BPF_RSH:
+			emit_instr(ctx, srl, dst, dst, insn->imm & 0x1f);
+			break;
+		case BPF_ALU64 | BPF_LSH:
+			emit_instr(ctx, dsll_safe, dst, dst, insn->imm & 0x3f);
+			break;
+		case BPF_ALU | BPF_LSH:
+			emit_instr(ctx, sll, dst, dst, insn->imm & 0x1f);
+			break;
+		case BPF_ALU64 | BPF_ARSH:
+			emit_instr(ctx, dsra_safe, dst, dst, insn->imm & 0x3f);
+			break;
+		case BPF_ALU | BPF_ARSH:
+			emit_instr(ctx, sra, dst, dst, insn->imm & 0x1f);
+			break;
+		case BPF_ALU | BPF_MOV:
+			emit_instr(ctx, addiu, dst, MIPS_R_ZERO, insn->imm);
+			break;
+		case BPF_ALU | BPF_ADD:
+			emit_instr(ctx, addiu, dst, dst, insn->imm);
+			break;
+		case BPF_ALU | BPF_SUB:
+			emit_instr(ctx, addiu, dst, dst, -insn->imm);
+			break;
+		default:
+			return -EINVAL;
+		}
+	} else {
+		/* multi insn immediate case */
+		if (BPF_OP(insn->code) == BPF_MOV) {
+			gen_imm_to_reg(insn, dst, ctx);
+		} else {
+			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+			switch (BPF_OP(insn->code) | BPF_CLASS(insn->code)) {
+			case BPF_ALU64 | BPF_AND:
+			case BPF_ALU | BPF_AND:
+				emit_instr(ctx, and, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_OR:
+			case BPF_ALU | BPF_OR:
+				emit_instr(ctx, or, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_XOR:
+			case BPF_ALU | BPF_XOR:
+				emit_instr(ctx, xor, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_ADD:
+				emit_instr(ctx, daddu, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU64 | BPF_SUB:
+				emit_instr(ctx, dsubu, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU | BPF_ADD:
+				emit_instr(ctx, addu, dst, dst, MIPS_R_AT);
+				break;
+			case BPF_ALU | BPF_SUB:
+				emit_instr(ctx, subu, dst, dst, MIPS_R_AT);
+				break;
+			default:
+				return -EINVAL;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* Returns the number of insn slots consumed. */
+int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
+		   int this_idx, int exit_idx)
+{
+	int src, dst, r, td, ts, mem_off, b_off;
+	bool need_swap, did_move, cmp_eq;
+	unsigned int target = 0;
+	u64 t64;
+	s64 t64s;
+	int bpf_op = BPF_OP(insn->code);
+
+	switch (insn->code) {
+	case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_OR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_AND | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_LSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_RSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_XOR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU64 | BPF_MOV | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_MOV | BPF_K: /* ALU32_IMM */
+	case BPF_ALU | BPF_ADD | BPF_K: /* ALU32_IMM */
+	case BPF_ALU | BPF_SUB | BPF_K: /* ALU32_IMM */
+	case BPF_ALU | BPF_OR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_AND | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_LSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_RSH | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_XOR | BPF_K: /* ALU64_IMM */
+	case BPF_ALU | BPF_ARSH | BPF_K: /* ALU64_IMM */
+		r = gen_imm_insn(insn, ctx, this_idx);
+		if (r < 0)
+			return r;
+		break;
+	case BPF_ALU64 | BPF_MUL | BPF_K: /* ALU64_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		if (insn->imm == 1) /* Mult by 1 is a nop */
+			break;
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		if (MIPS_ISA_REV >= 6) {
+			emit_instr(ctx, dmulu, dst, dst, MIPS_R_AT);
+		} else {
+			emit_instr(ctx, dmultu, MIPS_R_AT, dst);
+			emit_instr(ctx, mflo, dst);
+		}
+		break;
+	case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		emit_instr(ctx, dsubu, dst, MIPS_R_ZERO, dst);
+		break;
+	case BPF_ALU | BPF_MUL | BPF_K: /* ALU_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+		if (insn->imm == 1) /* Mult by 1 is a nop */
+			break;
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		if (MIPS_ISA_REV >= 6) {
+			emit_instr(ctx, mulu, dst, dst, MIPS_R_AT);
+		} else {
+			emit_instr(ctx, multu, dst, MIPS_R_AT);
+			emit_instr(ctx, mflo, dst);
+		}
+		break;
+	case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+		emit_instr(ctx, subu, dst, MIPS_R_ZERO, dst);
+		break;
+	case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
+	case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
+		if (insn->imm == 0)
+			return -EINVAL;
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT)
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		if (insn->imm == 1) {
+			/* div by 1 is a nop, mod by 1 is zero */
+			if (bpf_op == BPF_MOD)
+				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
+			break;
+		}
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		if (MIPS_ISA_REV >= 6) {
+			if (bpf_op == BPF_DIV)
+				emit_instr(ctx, divu_r6, dst, dst, MIPS_R_AT);
+			else
+				emit_instr(ctx, modu, dst, dst, MIPS_R_AT);
+			break;
+		}
+		emit_instr(ctx, divu, dst, MIPS_R_AT);
+		if (bpf_op == BPF_DIV)
+			emit_instr(ctx, mflo, dst);
+		else
+			emit_instr(ctx, mfhi, dst);
+		break;
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
+		if (insn->imm == 0)
+			return -EINVAL;
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		if (insn->imm == 1) {
+			/* div by 1 is a nop, mod by 1 is zero */
+			if (bpf_op == BPF_MOD)
+				emit_instr(ctx, addu, dst, MIPS_R_ZERO, MIPS_R_ZERO);
+			break;
+		}
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		if (MIPS_ISA_REV >= 6) {
+			if (bpf_op == BPF_DIV)
+				emit_instr(ctx, ddivu_r6, dst, dst, MIPS_R_AT);
+			else
+				emit_instr(ctx, dmodu, dst, dst, MIPS_R_AT);
+			break;
+		}
+		emit_instr(ctx, ddivu, dst, MIPS_R_AT);
+		if (bpf_op == BPF_DIV)
+			emit_instr(ctx, mflo, dst);
+		else
+			emit_instr(ctx, mfhi, dst);
+		break;
+	case BPF_ALU64 | BPF_MOV | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_ADD | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_SUB | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_XOR | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_OR | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_AND | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_MUL | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_LSH | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_RSH | BPF_X: /* ALU64_REG */
+	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ALU64_REG */
+		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+		did_move = false;
+		if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+			int tmp_reg = MIPS_R_AT;
+
+			if (bpf_op == BPF_MOV) {
+				tmp_reg = dst;
+				did_move = true;
+			}
+			emit_instr(ctx, daddu, tmp_reg, src, MIPS_R_ZERO);
+			emit_instr(ctx, dinsu, tmp_reg, MIPS_R_ZERO, 32, 32);
+			src = MIPS_R_AT;
+		}
+		switch (bpf_op) {
+		case BPF_MOV:
+			if (!did_move)
+				emit_instr(ctx, daddu, dst, src, MIPS_R_ZERO);
+			break;
+		case BPF_ADD:
+			emit_instr(ctx, daddu, dst, dst, src);
+			break;
+		case BPF_SUB:
+			emit_instr(ctx, dsubu, dst, dst, src);
+			break;
+		case BPF_XOR:
+			emit_instr(ctx, xor, dst, dst, src);
+			break;
+		case BPF_OR:
+			emit_instr(ctx, or, dst, dst, src);
+			break;
+		case BPF_AND:
+			emit_instr(ctx, and, dst, dst, src);
+			break;
+		case BPF_MUL:
+			if (MIPS_ISA_REV >= 6) {
+				emit_instr(ctx, dmulu, dst, dst, src);
+			} else {
+				emit_instr(ctx, dmultu, dst, src);
+				emit_instr(ctx, mflo, dst);
+			}
+			break;
+		case BPF_DIV:
+		case BPF_MOD:
+			if (MIPS_ISA_REV >= 6) {
+				if (bpf_op == BPF_DIV)
+					emit_instr(ctx, ddivu_r6,
+							dst, dst, src);
+				else
+					emit_instr(ctx, dmodu, dst, dst, src);
+				break;
+			}
+			emit_instr(ctx, ddivu, dst, src);
+			if (bpf_op == BPF_DIV)
+				emit_instr(ctx, mflo, dst);
+			else
+				emit_instr(ctx, mfhi, dst);
+			break;
+		case BPF_LSH:
+			emit_instr(ctx, dsllv, dst, dst, src);
+			break;
+		case BPF_RSH:
+			emit_instr(ctx, dsrlv, dst, dst, src);
+			break;
+		case BPF_ARSH:
+			emit_instr(ctx, dsrav, dst, dst, src);
+			break;
+		default:
+			pr_err("ALU64_REG NOT HANDLED\n");
+			return -EINVAL;
+		}
+		break;
+	case BPF_ALU | BPF_MOV | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_ADD | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_SUB | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_XOR | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_OR | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_AND | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_MUL | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_DIV | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_MOD | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_LSH | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_RSH | BPF_X: /* ALU_REG */
+	case BPF_ALU | BPF_ARSH | BPF_X: /* ALU_REG */
+		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (td == REG_64BIT) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+		did_move = false;
+		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+		if (ts == REG_64BIT) {
+			int tmp_reg = MIPS_R_AT;
+
+			if (bpf_op == BPF_MOV) {
+				tmp_reg = dst;
+				did_move = true;
+			}
+			/* sign extend */
+			emit_instr(ctx, sll, tmp_reg, src, 0);
+			src = MIPS_R_AT;
+		}
+		switch (bpf_op) {
+		case BPF_MOV:
+			if (!did_move)
+				emit_instr(ctx, addu, dst, src, MIPS_R_ZERO);
+			break;
+		case BPF_ADD:
+			emit_instr(ctx, addu, dst, dst, src);
+			break;
+		case BPF_SUB:
+			emit_instr(ctx, subu, dst, dst, src);
+			break;
+		case BPF_XOR:
+			emit_instr(ctx, xor, dst, dst, src);
+			break;
+		case BPF_OR:
+			emit_instr(ctx, or, dst, dst, src);
+			break;
+		case BPF_AND:
+			emit_instr(ctx, and, dst, dst, src);
+			break;
+		case BPF_MUL:
+			emit_instr(ctx, mul, dst, dst, src);
+			break;
+		case BPF_DIV:
+		case BPF_MOD:
+			if (MIPS_ISA_REV >= 6) {
+				if (bpf_op == BPF_DIV)
+					emit_instr(ctx, divu_r6, dst, dst, src);
+				else
+					emit_instr(ctx, modu, dst, dst, src);
+				break;
+			}
+			emit_instr(ctx, divu, dst, src);
+			if (bpf_op == BPF_DIV)
+				emit_instr(ctx, mflo, dst);
+			else
+				emit_instr(ctx, mfhi, dst);
+			break;
+		case BPF_LSH:
+			emit_instr(ctx, sllv, dst, dst, src);
+			break;
+		case BPF_RSH:
+			emit_instr(ctx, srlv, dst, dst, src);
+			break;
+		case BPF_ARSH:
+			emit_instr(ctx, srav, dst, dst, src);
+			break;
+		default:
+			pr_err("ALU_REG NOT HANDLED\n");
+			return -EINVAL;
+		}
+		break;
+	case BPF_JMP | BPF_EXIT:
+		if (this_idx + 1 < exit_idx) {
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off)) {
+				target = j_target(ctx, exit_idx);
+				if (target == (unsigned int)-1)
+					return -E2BIG;
+				emit_instr(ctx, j, target);
+			} else {
+				emit_instr(ctx, b, b_off);
+			}
+			emit_instr(ctx, nop);
+		}
+		break;
+	case BPF_JMP | BPF_JEQ | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JNE | BPF_K: /* JMP_IMM */
+		cmp_eq = (bpf_op == BPF_JEQ);
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		if (dst < 0)
+			return dst;
+		if (insn->imm == 0) {
+			src = MIPS_R_ZERO;
+		} else {
+			gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+			src = MIPS_R_AT;
+		}
+		goto jeq_common;
+	case BPF_JMP | BPF_JEQ | BPF_X: /* JMP_REG */
+	case BPF_JMP | BPF_JNE | BPF_X:
+	case BPF_JMP | BPF_JSLT | BPF_X:
+	case BPF_JMP | BPF_JSLE | BPF_X:
+	case BPF_JMP | BPF_JSGT | BPF_X:
+	case BPF_JMP | BPF_JSGE | BPF_X:
+	case BPF_JMP | BPF_JLT | BPF_X:
+	case BPF_JMP | BPF_JLE | BPF_X:
+	case BPF_JMP | BPF_JGT | BPF_X:
+	case BPF_JMP | BPF_JGE | BPF_X:
+	case BPF_JMP | BPF_JSET | BPF_X:
+		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		ts = get_reg_val_type(ctx, this_idx, insn->src_reg);
+		if (td == REG_32BIT && ts != REG_32BIT) {
+			emit_instr(ctx, sll, MIPS_R_AT, src, 0);
+			src = MIPS_R_AT;
+		} else if (ts == REG_32BIT && td != REG_32BIT) {
+			emit_instr(ctx, sll, MIPS_R_AT, dst, 0);
+			dst = MIPS_R_AT;
+		}
+		if (bpf_op == BPF_JSET) {
+			emit_instr(ctx, and, MIPS_R_AT, dst, src);
+			cmp_eq = false;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else if (bpf_op == BPF_JSGT || bpf_op == BPF_JSLE) {
+			emit_instr(ctx, dsubu, MIPS_R_AT, dst, src);
+			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+				b_off = b_imm(exit_idx, ctx);
+				if (is_bad_offset(b_off))
+					return -E2BIG;
+				if (bpf_op == BPF_JSGT)
+					emit_instr(ctx, blez, MIPS_R_AT, b_off);
+				else
+					emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+				emit_instr(ctx, nop);
+				return 2; /* We consumed the exit. */
+			}
+			b_off = b_imm(this_idx + insn->off + 1, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			if (bpf_op == BPF_JSGT)
+				emit_instr(ctx, bgtz, MIPS_R_AT, b_off);
+			else
+				emit_instr(ctx, blez, MIPS_R_AT, b_off);
+			emit_instr(ctx, nop);
+			break;
+		} else if (bpf_op == BPF_JSGE || bpf_op == BPF_JSLT) {
+			emit_instr(ctx, slt, MIPS_R_AT, dst, src);
+			cmp_eq = bpf_op == BPF_JSGE;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else if (bpf_op == BPF_JGT || bpf_op == BPF_JLE) {
+			/* dst or src could be AT */
+			emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
+			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
+			/* SP known to be non-zero, movz becomes boolean not */
+			if (MIPS_ISA_REV >= 6) {
+				emit_instr(ctx, seleqz, MIPS_R_T9,
+						MIPS_R_SP, MIPS_R_T8);
+			} else {
+				emit_instr(ctx, movz, MIPS_R_T9,
+						MIPS_R_SP, MIPS_R_T8);
+				emit_instr(ctx, movn, MIPS_R_T9,
+						MIPS_R_ZERO, MIPS_R_T8);
+			}
+			emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
+			cmp_eq = bpf_op == BPF_JGT;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else if (bpf_op == BPF_JGE || bpf_op == BPF_JLT) {
+			emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
+			cmp_eq = bpf_op == BPF_JGE;
+			dst = MIPS_R_AT;
+			src = MIPS_R_ZERO;
+		} else { /* JNE/JEQ case */
+			cmp_eq = (bpf_op == BPF_JEQ);
+		}
+jeq_common:
+		/*
+		 * If the next insn is EXIT and we are jumping arround
+		 * only it, invert the sense of the compare and
+		 * conditionally jump to the exit.  Poor man's branch
+		 * chaining.
+		 */
+		if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+			b_off = b_imm(exit_idx, ctx);
+			if (is_bad_offset(b_off)) {
+				target = j_target(ctx, exit_idx);
+				if (target == (unsigned int)-1)
+					return -E2BIG;
+				cmp_eq = !cmp_eq;
+				b_off = 4 * 3;
+				if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+					ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+					ctx->long_b_conversion = 1;
+				}
+			}
+
+			if (cmp_eq)
+				emit_instr(ctx, bne, dst, src, b_off);
+			else
+				emit_instr(ctx, beq, dst, src, b_off);
+			emit_instr(ctx, nop);
+			if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
+				emit_instr(ctx, j, target);
+				emit_instr(ctx, nop);
+			}
+			return 2; /* We consumed the exit. */
+		}
+		b_off = b_imm(this_idx + insn->off + 1, ctx);
+		if (is_bad_offset(b_off)) {
+			target = j_target(ctx, this_idx + insn->off + 1);
+			if (target == (unsigned int)-1)
+				return -E2BIG;
+			cmp_eq = !cmp_eq;
+			b_off = 4 * 3;
+			if (!(ctx->offsets[this_idx] & OFFSETS_B_CONV)) {
+				ctx->offsets[this_idx] |= OFFSETS_B_CONV;
+				ctx->long_b_conversion = 1;
+			}
+		}
+
+		if (cmp_eq)
+			emit_instr(ctx, beq, dst, src, b_off);
+		else
+			emit_instr(ctx, bne, dst, src, b_off);
+		emit_instr(ctx, nop);
+		if (ctx->offsets[this_idx] & OFFSETS_B_CONV) {
+			emit_instr(ctx, j, target);
+			emit_instr(ctx, nop);
+		}
+		break;
+	case BPF_JMP | BPF_JSGT | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JSGE | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JSLT | BPF_K: /* JMP_IMM */
+	case BPF_JMP | BPF_JSLE | BPF_K: /* JMP_IMM */
+		cmp_eq = (bpf_op == BPF_JSGE);
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		if (dst < 0)
+			return dst;
+
+		if (insn->imm == 0) {
+			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+				b_off = b_imm(exit_idx, ctx);
+				if (is_bad_offset(b_off))
+					return -E2BIG;
+				switch (bpf_op) {
+				case BPF_JSGT:
+					emit_instr(ctx, blez, dst, b_off);
+					break;
+				case BPF_JSGE:
+					emit_instr(ctx, bltz, dst, b_off);
+					break;
+				case BPF_JSLT:
+					emit_instr(ctx, bgez, dst, b_off);
+					break;
+				case BPF_JSLE:
+					emit_instr(ctx, bgtz, dst, b_off);
+					break;
+				}
+				emit_instr(ctx, nop);
+				return 2; /* We consumed the exit. */
+			}
+			b_off = b_imm(this_idx + insn->off + 1, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			switch (bpf_op) {
+			case BPF_JSGT:
+				emit_instr(ctx, bgtz, dst, b_off);
+				break;
+			case BPF_JSGE:
+				emit_instr(ctx, bgez, dst, b_off);
+				break;
+			case BPF_JSLT:
+				emit_instr(ctx, bltz, dst, b_off);
+				break;
+			case BPF_JSLE:
+				emit_instr(ctx, blez, dst, b_off);
+				break;
+			}
+			emit_instr(ctx, nop);
+			break;
+		}
+		/*
+		 * only "LT" compare available, so we must use imm + 1
+		 * to generate "GT" and imm -1 to generate LE
+		 */
+		if (bpf_op == BPF_JSGT)
+			t64s = insn->imm + 1;
+		else if (bpf_op == BPF_JSLE)
+			t64s = insn->imm + 1;
+		else
+			t64s = insn->imm;
+
+		cmp_eq = bpf_op == BPF_JSGT || bpf_op == BPF_JSGE;
+		if (t64s >= S16_MIN && t64s <= S16_MAX) {
+			emit_instr(ctx, slti, MIPS_R_AT, dst, (int)t64s);
+			src = MIPS_R_AT;
+			dst = MIPS_R_ZERO;
+			goto jeq_common;
+		}
+		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
+		emit_instr(ctx, slt, MIPS_R_AT, dst, MIPS_R_AT);
+		src = MIPS_R_AT;
+		dst = MIPS_R_ZERO;
+		goto jeq_common;
+
+	case BPF_JMP | BPF_JGT | BPF_K:
+	case BPF_JMP | BPF_JGE | BPF_K:
+	case BPF_JMP | BPF_JLT | BPF_K:
+	case BPF_JMP | BPF_JLE | BPF_K:
+		cmp_eq = (bpf_op == BPF_JGE);
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		if (dst < 0)
+			return dst;
+		/*
+		 * only "LT" compare available, so we must use imm + 1
+		 * to generate "GT" and imm -1 to generate LE
+		 */
+		if (bpf_op == BPF_JGT)
+			t64s = (u64)(u32)(insn->imm) + 1;
+		else if (bpf_op == BPF_JLE)
+			t64s = (u64)(u32)(insn->imm) + 1;
+		else
+			t64s = (u64)(u32)(insn->imm);
+
+		cmp_eq = bpf_op == BPF_JGT || bpf_op == BPF_JGE;
+
+		emit_const_to_reg(ctx, MIPS_R_AT, (u64)t64s);
+		emit_instr(ctx, sltu, MIPS_R_AT, dst, MIPS_R_AT);
+		src = MIPS_R_AT;
+		dst = MIPS_R_ZERO;
+		goto jeq_common;
+
+	case BPF_JMP | BPF_JSET | BPF_K: /* JMP_IMM */
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		if (dst < 0)
+			return dst;
+
+		if (ctx->use_bbit_insns && hweight32((u32)insn->imm) == 1) {
+			if ((insn + 1)->code == (BPF_JMP | BPF_EXIT) && insn->off == 1) {
+				b_off = b_imm(exit_idx, ctx);
+				if (is_bad_offset(b_off))
+					return -E2BIG;
+				emit_instr(ctx, bbit0, dst, ffs((u32)insn->imm) - 1, b_off);
+				emit_instr(ctx, nop);
+				return 2; /* We consumed the exit. */
+			}
+			b_off = b_imm(this_idx + insn->off + 1, ctx);
+			if (is_bad_offset(b_off))
+				return -E2BIG;
+			emit_instr(ctx, bbit1, dst, ffs((u32)insn->imm) - 1, b_off);
+			emit_instr(ctx, nop);
+			break;
+		}
+		t64 = (u32)insn->imm;
+		emit_const_to_reg(ctx, MIPS_R_AT, t64);
+		emit_instr(ctx, and, MIPS_R_AT, dst, MIPS_R_AT);
+		src = MIPS_R_AT;
+		dst = MIPS_R_ZERO;
+		cmp_eq = false;
+		goto jeq_common;
+
+	case BPF_JMP | BPF_JA:
+		/*
+		 * Prefer relative branch for easier debugging, but
+		 * fall back if needed.
+		 */
+		b_off = b_imm(this_idx + insn->off + 1, ctx);
+		if (is_bad_offset(b_off)) {
+			target = j_target(ctx, this_idx + insn->off + 1);
+			if (target == (unsigned int)-1)
+				return -E2BIG;
+			emit_instr(ctx, j, target);
+		} else {
+			emit_instr(ctx, b, b_off);
+		}
+		emit_instr(ctx, nop);
+		break;
+	case BPF_LD | BPF_DW | BPF_IMM:
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		t64 = ((u64)(u32)insn->imm) | ((u64)(insn + 1)->imm << 32);
+		emit_const_to_reg(ctx, dst, t64);
+		return 2; /* Double slot insn */
+
+	case BPF_JMP | BPF_CALL:
+		emit_bpf_call(ctx, insn);
+		break;
+
+	case BPF_JMP | BPF_TAIL_CALL:
+		if (emit_bpf_tail_call(ctx, this_idx))
+			return -EINVAL;
+		break;
+
+	case BPF_ALU | BPF_END | BPF_FROM_BE:
+	case BPF_ALU | BPF_END | BPF_FROM_LE:
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		if (dst < 0)
+			return dst;
+		td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
+		if (insn->imm == 64 && td == REG_32BIT)
+			emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
+
+		if (insn->imm != 64 && td == REG_64BIT) {
+			/* sign extend */
+			emit_instr(ctx, sll, dst, dst, 0);
+		}
+
+#ifdef __BIG_ENDIAN
+		need_swap = (BPF_SRC(insn->code) == BPF_FROM_LE);
+#else
+		need_swap = (BPF_SRC(insn->code) == BPF_FROM_BE);
+#endif
+		if (insn->imm == 16) {
+			if (need_swap)
+				emit_instr(ctx, wsbh, dst, dst);
+			emit_instr(ctx, andi, dst, dst, 0xffff);
+		} else if (insn->imm == 32) {
+			if (need_swap) {
+				emit_instr(ctx, wsbh, dst, dst);
+				emit_instr(ctx, rotr, dst, dst, 16);
+			}
+		} else { /* 64-bit*/
+			if (need_swap) {
+				emit_instr(ctx, dsbh, dst, dst);
+				emit_instr(ctx, dshd, dst, dst);
+			}
+		}
+		break;
+
+	case BPF_ST | BPF_NOSPEC: /* speculation barrier */
+		break;
+
+	case BPF_ST | BPF_B | BPF_MEM:
+	case BPF_ST | BPF_H | BPF_MEM:
+	case BPF_ST | BPF_W | BPF_MEM:
+	case BPF_ST | BPF_DW | BPF_MEM:
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		if (dst < 0)
+			return dst;
+		mem_off = insn->off;
+		gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+		switch (BPF_SIZE(insn->code)) {
+		case BPF_B:
+			emit_instr(ctx, sb, MIPS_R_AT, mem_off, dst);
+			break;
+		case BPF_H:
+			emit_instr(ctx, sh, MIPS_R_AT, mem_off, dst);
+			break;
+		case BPF_W:
+			emit_instr(ctx, sw, MIPS_R_AT, mem_off, dst);
+			break;
+		case BPF_DW:
+			emit_instr(ctx, sd, MIPS_R_AT, mem_off, dst);
+			break;
+		}
+		break;
+
+	case BPF_LDX | BPF_B | BPF_MEM:
+	case BPF_LDX | BPF_H | BPF_MEM:
+	case BPF_LDX | BPF_W | BPF_MEM:
+	case BPF_LDX | BPF_DW | BPF_MEM:
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_NO_FP);
+		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
+		if (dst < 0 || src < 0)
+			return -EINVAL;
+		mem_off = insn->off;
+		switch (BPF_SIZE(insn->code)) {
+		case BPF_B:
+			emit_instr(ctx, lbu, dst, mem_off, src);
+			break;
+		case BPF_H:
+			emit_instr(ctx, lhu, dst, mem_off, src);
+			break;
+		case BPF_W:
+			emit_instr(ctx, lw, dst, mem_off, src);
+			break;
+		case BPF_DW:
+			emit_instr(ctx, ld, dst, mem_off, src);
+			break;
+		}
+		break;
+
+	case BPF_STX | BPF_B | BPF_MEM:
+	case BPF_STX | BPF_H | BPF_MEM:
+	case BPF_STX | BPF_W | BPF_MEM:
+	case BPF_STX | BPF_DW | BPF_MEM:
+	case BPF_STX | BPF_W | BPF_ATOMIC:
+	case BPF_STX | BPF_DW | BPF_ATOMIC:
+		dst = ebpf_to_mips_reg(ctx, insn, REG_DST_FP_OK);
+		src = ebpf_to_mips_reg(ctx, insn, REG_SRC_FP_OK);
+		if (src < 0 || dst < 0)
+			return -EINVAL;
+		mem_off = insn->off;
+		if (BPF_MODE(insn->code) == BPF_ATOMIC) {
+			if (insn->imm != BPF_ADD) {
+				pr_err("ATOMIC OP %02x NOT HANDLED\n", insn->imm);
+				return -EINVAL;
+			}
+			/*
+			 * If mem_off does not fit within the 9 bit ll/sc
+			 * instruction immediate field, use a temp reg.
+			 */
+			if (MIPS_ISA_REV >= 6 &&
+			    (mem_off >= BIT(8) || mem_off < -BIT(8))) {
+				emit_instr(ctx, daddiu, MIPS_R_T6,
+						dst, mem_off);
+				mem_off = 0;
+				dst = MIPS_R_T6;
+			}
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_W:
+				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+					emit_instr(ctx, sll, MIPS_R_AT, src, 0);
+					src = MIPS_R_AT;
+				}
+				emit_instr(ctx, ll, MIPS_R_T8, mem_off, dst);
+				emit_instr(ctx, addu, MIPS_R_T8, MIPS_R_T8, src);
+				emit_instr(ctx, sc, MIPS_R_T8, mem_off, dst);
+				/*
+				 * On failure back up to LL (-4
+				 * instructions of 4 bytes each
+				 */
+				emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
+				emit_instr(ctx, nop);
+				break;
+			case BPF_DW:
+				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+					emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
+					emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
+					src = MIPS_R_AT;
+				}
+				emit_instr(ctx, lld, MIPS_R_T8, mem_off, dst);
+				emit_instr(ctx, daddu, MIPS_R_T8, MIPS_R_T8, src);
+				emit_instr(ctx, scd, MIPS_R_T8, mem_off, dst);
+				emit_instr(ctx, beq, MIPS_R_T8, MIPS_R_ZERO, -4 * 4);
+				emit_instr(ctx, nop);
+				break;
+			}
+		} else { /* BPF_MEM */
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_B:
+				emit_instr(ctx, sb, src, mem_off, dst);
+				break;
+			case BPF_H:
+				emit_instr(ctx, sh, src, mem_off, dst);
+				break;
+			case BPF_W:
+				emit_instr(ctx, sw, src, mem_off, dst);
+				break;
+			case BPF_DW:
+				if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
+					emit_instr(ctx, daddu, MIPS_R_AT, src, MIPS_R_ZERO);
+					emit_instr(ctx, dinsu, MIPS_R_AT, MIPS_R_ZERO, 32, 32);
+					src = MIPS_R_AT;
+				}
+				emit_instr(ctx, sd, src, mem_off, dst);
+				break;
+			}
+		}
+		break;
+
+	default:
+		pr_err("NOT HANDLED %d - (%02x)\n",
+		       this_idx, (unsigned int)insn->code);
+		return -EINVAL;
+	}
+	return 1;
+}
diff --git a/arch/mips/net/ebpf_jit_core.c b/arch/mips/net/ebpf_jit_core.c
new file mode 100644
index 000000000000..37b496f47ddb
--- /dev/null
+++ b/arch/mips/net/ebpf_jit_core.c
@@ -0,0 +1,1189 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Just-In-Time compiler for eBPF filters on MIPS32/MIPS64
+ * Copyright (c) 2021 Tony Ambardar <Tony.Ambardar@gmail.com>
+ *
+ * Based on code from:
+ *
+ * Copyright (c) 2017 Cavium, Inc.
+ * Author: David Daney <david.daney@cavium.com>
+ *
+ * Copyright (c) 2014 Imagination Technologies Ltd.
+ * Author: Markos Chandras <markos.chandras@imgtec.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/filter.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu-features.h>
+#include <asm/uasm.h>
+
+#include "ebpf_jit.h"
+
+/*
+ * Extra JIT registers dedicated to holding TCC during runtime or saving
+ * across calls.
+ */
+enum {
+	JIT_RUN_TCC = MAX_BPF_JIT_REG,
+	JIT_SAV_TCC
+};
+/* Temporary register for passing TCC if nothing dedicated. */
+#define TEMP_PASS_TCC MIPS_R_T8
+
+#ifdef CONFIG_64BIT
+#  define M(expr32, expr64) (expr64)
+#else
+#  define M(expr32, expr64) (expr32)
+#endif
+static const struct {
+	/* Register or pair base */
+	int reg;
+	/* Register flags */
+	u32 flags;
+	/* Usage table:   (MIPS32)			 (MIPS64) */
+} bpf2mips[] = {
+	/* Return value from in-kernel function, and exit value from eBPF. */
+	[BPF_REG_0] =  {M(MIPS_R_V0,			MIPS_R_V0)},
+	/* Arguments from eBPF program to in-kernel/BPF functions. */
+	[BPF_REG_1] =  {M(MIPS_R_A0,			MIPS_R_A0)},
+	[BPF_REG_2] =  {M(MIPS_R_A2,			MIPS_R_A1)},
+	[BPF_REG_3] =  {M(MIPS_R_T0,			MIPS_R_A2)},
+	[BPF_REG_4] =  {M(MIPS_R_T2,			MIPS_R_A3)},
+	[BPF_REG_5] =  {M(MIPS_R_T4,			MIPS_R_A4)},
+	/* Callee-saved registers preserved by in-kernel/BPF functions. */
+	[BPF_REG_6] =  {M(MIPS_R_S0,			MIPS_R_S0),
+			M(EBPF_SAVE_S0|EBPF_SAVE_S1,	EBPF_SAVE_S0)},
+	[BPF_REG_7] =  {M(MIPS_R_S2,			MIPS_R_S1),
+			M(EBPF_SAVE_S2|EBPF_SAVE_S3,	EBPF_SAVE_S1)},
+	[BPF_REG_8] =  {M(MIPS_R_S4,			MIPS_R_S2),
+			M(EBPF_SAVE_S4|EBPF_SAVE_S5,	EBPF_SAVE_S2)},
+	[BPF_REG_9] =  {M(MIPS_R_S6,			MIPS_R_S3),
+			M(EBPF_SAVE_S6|EBPF_SAVE_S7,	EBPF_SAVE_S3)},
+	[BPF_REG_10] = {M(MIPS_R_S8,			MIPS_R_S8),
+			M(EBPF_SAVE_S8|EBPF_SEEN_FP,	EBPF_SAVE_S8|EBPF_SEEN_FP)},
+	/* Internal register for rewriting insns during JIT blinding. */
+	[BPF_REG_AX] = {M(MIPS_R_T6,			MIPS_R_T4)},
+	/*
+	 * Internal registers for TCC runtime holding and saving during
+	 * calls. A zero save register indicates using scratch space on
+	 * the stack for storage during calls. A zero hold register means
+	 * no dedicated register holds TCC during runtime (but a temp reg
+	 * still passes TCC to tailcall or bpf2bpf call).
+	 */
+	[JIT_RUN_TCC] =	{M(0,				MIPS_R_V1)},
+	[JIT_SAV_TCC] =	{M(0,				MIPS_R_S4),
+			 M(0,				EBPF_SAVE_S4)}
+};
+#undef M
+
+/*
+ * For eBPF, the register mapping naturally falls out of the
+ * requirements of eBPF and MIPS N64/O32 ABIs.  We also maintain
+ * a separate frame pointer, setting BPF_REG_10 relative to $sp.
+ */
+int ebpf_to_mips_reg(struct jit_ctx *ctx,
+		     const struct bpf_insn *insn,
+		     enum reg_usage u)
+{
+	int ebpf_reg = (u == REG_SRC_FP_OK || u == REG_SRC_NO_FP) ?
+		insn->src_reg : insn->dst_reg;
+
+	switch (ebpf_reg) {
+	case BPF_REG_0:
+	case BPF_REG_1:
+	case BPF_REG_2:
+	case BPF_REG_3:
+	case BPF_REG_4:
+	case BPF_REG_5:
+	case BPF_REG_6:
+	case BPF_REG_7:
+	case BPF_REG_8:
+	case BPF_REG_9:
+	case BPF_REG_AX:
+		ctx->flags |= bpf2mips[ebpf_reg].flags;
+		return bpf2mips[ebpf_reg].reg;
+	case BPF_REG_10:
+		if (u == REG_DST_NO_FP || u == REG_SRC_NO_FP)
+			goto bad_reg;
+		ctx->flags |= bpf2mips[ebpf_reg].flags;
+		return bpf2mips[ebpf_reg].reg;
+	default:
+bad_reg:
+		WARN(1, "Illegal bpf reg: %d\n", ebpf_reg);
+		return -EINVAL;
+	}
+}
+
+void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
+			   struct jit_ctx *ctx)
+{
+	if (insn->imm >= S16_MIN && insn->imm <= S16_MAX) {
+		emit_instr(ctx, addiu, reg, MIPS_R_ZERO, insn->imm);
+	} else {
+		int lower = (s16)(insn->imm & 0xffff);
+		int upper = insn->imm - lower;
+
+		emit_instr(ctx, lui, reg, upper >> 16);
+		/* lui already clears lower halfword */
+		if (lower)
+			emit_instr(ctx, addiu, reg, reg, lower);
+	}
+}
+
+void emit_const_to_reg(struct jit_ctx *ctx, int dst, unsigned long value)
+{
+	if (value >= S16_MIN || value <= S16_MAX) {
+		emit_instr_long(ctx, daddiu, addiu, dst, MIPS_R_ZERO, (int)value);
+	} else if (value >= S32_MIN ||
+		   (value <= S32_MAX && value > U16_MAX)) {
+		emit_instr(ctx, lui, dst, (s32)(s16)(value >> 16));
+		emit_instr(ctx, ori, dst, dst, (unsigned int)(value & 0xffff));
+	} else {
+		int i;
+		bool seen_part = false;
+		int needed_shift = 0;
+
+		for (i = 0; i < 4; i++) {
+			u64 part = (value >> (16 * (3 - i))) & 0xffff;
+
+			if (seen_part && needed_shift > 0 && (part || i == 3)) {
+				emit_instr(ctx, dsll_safe, dst, dst, needed_shift);
+				needed_shift = 0;
+			}
+			if (part) {
+				if (i == 0 || (!seen_part && i < 3 && part < 0x8000)) {
+					emit_instr(ctx, lui, dst, (s32)(s16)part);
+					needed_shift = -16;
+				} else {
+					emit_instr(ctx, ori, dst,
+						   seen_part ? dst : MIPS_R_ZERO,
+						   (unsigned int)part);
+				}
+				seen_part = true;
+			}
+			if (seen_part)
+				needed_shift += 16;
+		}
+	}
+}
+
+#define RVT_VISITED_MASK 0xc000000000000000ull
+#define RVT_FALL_THROUGH 0x4000000000000000ull
+#define RVT_BRANCH_TAKEN 0x8000000000000000ull
+#define RVT_DONE (RVT_FALL_THROUGH | RVT_BRANCH_TAKEN)
+
+/* return the last idx processed, or negative for error */
+static int reg_val_propagate_range(struct jit_ctx *ctx, u64 initial_rvt,
+				   int start_idx, bool follow_taken)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	const struct bpf_insn *insn;
+	u64 exit_rvt = initial_rvt;
+	u64 *rvt = ctx->reg_val_types;
+	int idx;
+	int reg;
+
+	for (idx = start_idx; idx < prog->len; idx++) {
+		rvt[idx] = (rvt[idx] & RVT_VISITED_MASK) | exit_rvt;
+		insn = prog->insnsi + idx;
+		switch (BPF_CLASS(insn->code)) {
+		case BPF_ALU:
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD:
+			case BPF_SUB:
+			case BPF_MUL:
+			case BPF_DIV:
+			case BPF_OR:
+			case BPF_AND:
+			case BPF_LSH:
+			case BPF_RSH:
+			case BPF_ARSH:
+			case BPF_NEG:
+			case BPF_MOD:
+			case BPF_XOR:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				break;
+			case BPF_MOV:
+				if (BPF_SRC(insn->code)) {
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				} else {
+					/* IMM to REG move*/
+					if (insn->imm >= 0)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+					else
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				}
+				break;
+			case BPF_END:
+				if (insn->imm == 64)
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				else if (insn->imm == 32)
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				else /* insn->imm == 16 */
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+				break;
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_ALU64:
+			switch (BPF_OP(insn->code)) {
+			case BPF_MOV:
+				if (BPF_SRC(insn->code)) {
+					/* REG to REG move*/
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				} else {
+					/* IMM to REG move*/
+					if (insn->imm >= 0)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+					else
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
+				}
+				break;
+			default:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_LD:
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_DW:
+				if (BPF_MODE(insn->code) == BPF_IMM) {
+					s64 val;
+
+					val = (s64)((u32)insn->imm | ((u64)(insn + 1)->imm << 32));
+					if (val > 0 && val <= S32_MAX)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+					else if (val >= S32_MIN && val <= S32_MAX)
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT_32BIT);
+					else
+						set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+					rvt[idx] |= RVT_DONE;
+					idx++;
+				} else {
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				}
+				break;
+			case BPF_B:
+			case BPF_H:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+				break;
+			case BPF_W:
+				if (BPF_MODE(insn->code) == BPF_IMM)
+					set_reg_val_type(&exit_rvt, insn->dst_reg,
+							 insn->imm >= 0 ? REG_32BIT_POS : REG_32BIT);
+				else
+					set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				break;
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_LDX:
+			switch (BPF_SIZE(insn->code)) {
+			case BPF_DW:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_64BIT);
+				break;
+			case BPF_B:
+			case BPF_H:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT_POS);
+				break;
+			case BPF_W:
+				set_reg_val_type(&exit_rvt, insn->dst_reg, REG_32BIT);
+				break;
+			}
+			rvt[idx] |= RVT_DONE;
+			break;
+		case BPF_JMP:
+		case BPF_JMP32:
+			switch (BPF_OP(insn->code)) {
+			case BPF_EXIT:
+				rvt[idx] = RVT_DONE | exit_rvt;
+				rvt[prog->len] = exit_rvt;
+				return idx;
+			case BPF_JA:
+			{
+				int tgt = idx + 1 + insn->off;
+				bool visited = (rvt[tgt] & RVT_FALL_THROUGH);
+
+				rvt[idx] |= RVT_DONE;
+				/*
+				 * Verifier dead code patching can use
+				 * infinite-loop traps, causing hangs and
+				 * RCU stalls here. Treat traps as nops
+				 * if detected and fall through.
+				 */
+				if (insn->off == -1)
+					break;
+				/*
+				 * Bounded loops cause the same issues in
+				 * fallthrough mode; follow only if jump
+				 * target is unvisited to mitigate.
+				 */
+				if (insn->off < 0 && !follow_taken && visited)
+					break;
+				idx += insn->off;
+				break;
+			}
+			case BPF_JEQ:
+			case BPF_JGT:
+			case BPF_JGE:
+			case BPF_JLT:
+			case BPF_JLE:
+			case BPF_JSET:
+			case BPF_JNE:
+			case BPF_JSGT:
+			case BPF_JSGE:
+			case BPF_JSLT:
+			case BPF_JSLE:
+				if (follow_taken) {
+					rvt[idx] |= RVT_BRANCH_TAKEN;
+					idx += insn->off;
+					follow_taken = false;
+				} else {
+					rvt[idx] |= RVT_FALL_THROUGH;
+				}
+				break;
+			case BPF_CALL:
+				set_reg_val_type(&exit_rvt, BPF_REG_0, REG_64BIT);
+				/* Upon call return, argument registers are clobbered. */
+				for (reg = BPF_REG_0; reg <= BPF_REG_5; reg++)
+					set_reg_val_type(&exit_rvt, reg, REG_64BIT);
+
+				rvt[idx] |= RVT_DONE;
+				break;
+			case BPF_TAIL_CALL:
+				rvt[idx] |= RVT_DONE;
+				break;
+			default:
+				WARN(1, "Unhandled BPF_JMP case.\n");
+				rvt[idx] |= RVT_DONE;
+				break;
+			}
+			break;
+		default:
+			rvt[idx] |= RVT_DONE;
+			break;
+		}
+	}
+	return idx;
+}
+
+/*
+ * Track the value range (i.e. 32-bit vs. 64-bit) of each register at
+ * each eBPF insn.  This allows unneeded sign and zero extension
+ * operations to be omitted.
+ *
+ * Doesn't handle yet confluence of control paths with conflicting
+ * ranges, but it is good enough for most sane code.
+ */
+static int reg_val_propagate(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	u64 exit_rvt;
+	int reg;
+	int i;
+
+	/*
+	 * 11 registers * 3 bits/reg leaves top bits free for other
+	 * uses.  Bit-62..63 used to see if we have visited an insn.
+	 */
+	exit_rvt = 0;
+
+	/* Upon entry, argument registers are 64-bit. */
+	for (reg = BPF_REG_1; reg <= BPF_REG_5; reg++)
+		set_reg_val_type(&exit_rvt, reg, REG_64BIT);
+
+	/*
+	 * First follow all conditional branches on the fall-through
+	 * edge of control flow..
+	 */
+	reg_val_propagate_range(ctx, exit_rvt, 0, false);
+restart_search:
+	/*
+	 * Then repeatedly find the first conditional branch where
+	 * both edges of control flow have not been taken, and follow
+	 * the branch taken edge.  We will end up restarting the
+	 * search once per conditional branch insn.
+	 */
+	for (i = 0; i < prog->len; i++) {
+		u64 rvt = ctx->reg_val_types[i];
+
+		if ((rvt & RVT_VISITED_MASK) == RVT_DONE ||
+		    (rvt & RVT_VISITED_MASK) == 0)
+			continue;
+		if ((rvt & RVT_VISITED_MASK) == RVT_FALL_THROUGH) {
+			reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, true);
+		} else { /* RVT_BRANCH_TAKEN */
+			WARN(1, "Unexpected RVT_BRANCH_TAKEN case.\n");
+			reg_val_propagate_range(ctx, rvt & ~RVT_VISITED_MASK, i, false);
+		}
+		goto restart_search;
+	}
+	/*
+	 * Eventually all conditional branches have been followed on
+	 * both branches and we are done.  Any insn that has not been
+	 * visited at this point is dead.
+	 */
+
+	return 0;
+}
+
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	u32 *p;
+
+	/* We are guaranteed to have aligned memory. */
+	for (p = area; size >= sizeof(u32); size -= sizeof(u32))
+		uasm_i_break(&p, BRK_BUG); /* Increments p */
+}
+
+/* Stack region alignment under N64 and O32 ABIs */
+#define STACK_ALIGN (2 * sizeof(long))
+
+/*
+ * eBPF stack frame will be something like:
+ *
+ *  Entry $sp ------>   +--------------------------------+
+ *                      |   $ra  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s8  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s7  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s6  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s5  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s4  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s3  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s2  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s1  (optional)              |
+ *                      +--------------------------------+
+ *                      |   $s0  (optional)              |
+ *                      +--------------------------------+
+ *                      |   tmp-storage  (optional)      |
+ * $sp + bpf_stack_off->+--------------------------------+ <--BPF_REG_10
+ *                      |   BPF_REG_10 relative storage  |
+ *                      |    MAX_BPF_STACK (optional)    |
+ *                      |      .                         |
+ *                      |      .                         |
+ *                      |      .                         |
+ *        $sp ------>   +--------------------------------+
+ *
+ * If BPF_REG_10 is never referenced, then the MAX_BPF_STACK sized
+ * area is not allocated.
+ */
+static int build_int_prologue(struct jit_ctx *ctx)
+{
+	int tcc_run = bpf2mips[JIT_RUN_TCC].reg ?
+		      bpf2mips[JIT_RUN_TCC].reg :
+		      TEMP_PASS_TCC;
+	int tcc_sav = bpf2mips[JIT_SAV_TCC].reg;
+	const struct bpf_prog *prog = ctx->prog;
+	int r10 = bpf2mips[BPF_REG_10].reg;
+	int r1 = bpf2mips[BPF_REG_1].reg;
+	int stack_adjust = 0;
+	int store_offset;
+	int locals_size;
+	int start_idx;
+
+	if (ctx->flags & EBPF_SAVE_RA)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S8)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S7)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S6)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S5)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S4)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S3)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S2)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S1)
+		stack_adjust += sizeof(long);
+	if (ctx->flags & EBPF_SAVE_S0)
+		stack_adjust += sizeof(long);
+	if (tail_call_present(ctx) &&
+	    !(ctx->flags & EBPF_TCC_IN_RUN) && !tcc_sav)
+		/* Allocate scratch space for holding TCC if needed. */
+		stack_adjust += sizeof(long);
+
+	stack_adjust = ALIGN(stack_adjust, STACK_ALIGN);
+
+	locals_size = (ctx->flags & EBPF_SEEN_FP) ? prog->aux->stack_depth : 0;
+	locals_size = ALIGN(locals_size, STACK_ALIGN);
+
+	stack_adjust += locals_size;
+
+	ctx->stack_size = stack_adjust;
+	ctx->bpf_stack_off = locals_size;
+
+	/*
+	 * First instruction initializes the tail call count (TCC) and
+	 * assumes a call from kernel using the native ABI. Calls made
+	 * using the BPF ABI (bpf2bpf or tail call) will skip this insn
+	 * and pass the TCC via register.
+	 */
+	start_idx = ctx->idx;
+	emit_instr(ctx, addiu, tcc_run, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
+
+	/*
+	 * When called from kernel under O32 ABI we must set up BPF R1
+	 * context, since BPF R1 is an endian-order regster pair ($a0:$a1
+	 * or $a1:$a0) but context is always passed in $a0 as a 32-bit
+	 * pointer. As above, bpf2bpf and tail calls will skip these insns
+	 * since all registers are correctly set up already.
+	 */
+	if (!is64bit()) {
+		if (isbigend())
+			emit_instr(ctx, move, LO(r1), MIPS_R_A0);
+		/* Sanitize upper 32-bit reg */
+		gen_zext_insn(r1, true, ctx);
+	}
+	/*
+	 * Calls using BPF ABI (bpf2bpf and tail calls) will skip TCC
+	 * initialization and R1 context fixup needed by kernel calls.
+	 */
+	ctx->prolog_skip = (ctx->idx - start_idx) * 4;
+
+	if (stack_adjust)
+		emit_instr_long(ctx, daddiu, addiu,
+					MIPS_R_SP, MIPS_R_SP, -stack_adjust);
+	else
+		return 0;
+
+	store_offset = stack_adjust - sizeof(long);
+
+	if (ctx->flags & EBPF_SAVE_RA) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_RA, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S8) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S8, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S7) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S7, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S6) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S6, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S5) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S5, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S4) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S4, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S3) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S3, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S2) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S2, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S1) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S1, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S0) {
+		emit_instr_long(ctx, sd, sw,
+					MIPS_R_S0, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+
+	/* Store TCC in backup register or stack scratch space if indicated. */
+	if (tail_call_present(ctx) && !(ctx->flags & EBPF_TCC_IN_RUN)) {
+		if (tcc_sav)
+			emit_instr(ctx, move, tcc_sav, tcc_run);
+		else
+			emit_instr_long(ctx, sd, sw,
+					tcc_run, ctx->bpf_stack_off, MIPS_R_SP);
+	}
+
+	/* Prepare BPF FP as single-reg ptr, emulate upper 32-bits as needed.*/
+	if (ctx->flags & EBPF_SEEN_FP)
+		emit_instr_long(ctx, daddiu, addiu, r10,
+						MIPS_R_SP, ctx->bpf_stack_off);
+
+	return 0;
+}
+
+static int build_int_body(struct jit_ctx *ctx)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	const struct bpf_insn *insn;
+	int i, r;
+
+	for (i = 0; i < prog->len; ) {
+		insn = prog->insnsi + i;
+		if ((ctx->reg_val_types[i] & RVT_VISITED_MASK) == 0) {
+			/* dead instruction, don't emit it. */
+			i++;
+			continue;
+		}
+
+		if (ctx->target == NULL)
+			ctx->offsets[i] = (ctx->offsets[i] & OFFSETS_B_CONV) | (ctx->idx * 4);
+
+		r = build_one_insn(insn, ctx, i, prog->len);
+		if (r < 0)
+			return r;
+		i += r;
+	}
+	/* epilogue offset */
+	if (ctx->target == NULL)
+		ctx->offsets[i] = ctx->idx * 4;
+
+	/*
+	 * All exits have an offset of the epilogue, some offsets may
+	 * not have been set due to banch-around threading, so set
+	 * them now.
+	 */
+	if (ctx->target == NULL)
+		for (i = 0; i < prog->len; i++) {
+			insn = prog->insnsi + i;
+			if (insn->code == (BPF_JMP | BPF_EXIT))
+				ctx->offsets[i] = ctx->idx * 4;
+		}
+	return 0;
+}
+
+static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
+{
+	const struct bpf_prog *prog = ctx->prog;
+	int stack_adjust = ctx->stack_size;
+	int store_offset = stack_adjust - sizeof(long);
+	int ax = bpf2mips[BPF_REG_AX].reg;
+	int r0 = bpf2mips[BPF_REG_0].reg;
+	enum reg_val_type td;
+
+	/*
+	 * As in prologue code, we default to assuming exit to the kernel.
+	 * Returns to the kernel follow the N64 or O32 ABI. For N64, the
+	 * BPF R0 return value may need to be sign-extended, while O32 may
+	 * need fixup of BPF R0 to place the 32-bit return value in MIPS V0.
+	 *
+	 * Returns to BPF2BPF callers consistently use the BPF 64-bit ABI,
+	 * so register usage and mapping between JIT and OS is unchanged.
+	 * Accommodate by saving unmodified R0 register data to allow a
+	 * BPF caller to restore R0 after we return.
+	 */
+	if (dest_reg == MIPS_R_RA) { /* kernel or bpf2bpf function return */
+		if (is64bit()) {
+			/*
+			 * Backup BPF R0 to AX, allowing the caller to
+			 * restore it in case this is a BPF2BPF rather
+			 * than a kernel return.
+			 */
+			emit_instr(ctx, move, ax, r0);
+			/*
+			 * Don't let zero-extended R0 value escape to
+			 * kernel on return, so sign-extend if needed.
+			 */
+			td = get_reg_val_type(ctx, prog->len, BPF_REG_0);
+			if (td == REG_64BIT)
+				gen_sext_insn(r0, ctx);
+		} else if (isbigend()) { /* and 32-bit */
+			/*
+			 * Backup high 32-bit register of BPF R0 to AX,
+			 * since it occupies MIPS_R_V0 which needs to be
+			 * clobbered for a kernel return.
+			 */
+			emit_instr(ctx, move, HI(ax), HI(r0));
+			/*
+			 * O32 ABI specifies 32-bit return value always
+			 * placed in MIPS_R_V0 regardless of the native
+			 * endianness. This would be in the wrong position
+			 * in a BPF R0 reg pair on big-endian systems, so
+			 * we must relocate.
+			 */
+			emit_instr(ctx, move, MIPS_R_V0, LO(r0));
+		}
+	}
+
+
+	if (ctx->flags & EBPF_SAVE_RA) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_RA, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S8) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S8, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S7) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S7, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S6) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S6, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S5) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S5, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S4) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S4, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S3) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S3, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S2) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S2, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S1) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S1, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	if (ctx->flags & EBPF_SAVE_S0) {
+		emit_instr_long(ctx, ld, lw,
+					MIPS_R_S0, store_offset, MIPS_R_SP);
+		store_offset -= sizeof(long);
+	}
+	emit_instr(ctx, jr, dest_reg);
+
+	/* Delay slot */
+	if (stack_adjust)
+		emit_instr_long(ctx, daddiu, addiu,
+					MIPS_R_SP, MIPS_R_SP, stack_adjust);
+	else
+		emit_instr(ctx, nop);
+
+	return 0;
+}
+
+/*
+ * Push BPF regs R3-R5 to the stack, skipping BPF regs R1-R2 which are
+ * passed via MIPS register pairs in $a0-$a3. Register order within pairs
+ * and the memory storage order are identical i.e. endian native.
+ */
+static void emit_push_args(struct jit_ctx *ctx)
+{
+	int store_offset = 2 * sizeof(u64); /* Skip R1-R2 in $a0-$a3 */
+	int bpf, reg;
+
+	for (bpf = BPF_REG_3; bpf <= BPF_REG_5; bpf++) {
+		reg = bpf2mips[bpf].reg;
+
+		emit_instr(ctx, sw, LO(reg), OFFLO(store_offset), MIPS_R_SP);
+		emit_instr(ctx, sw, HI(reg), OFFHI(store_offset), MIPS_R_SP);
+		store_offset += sizeof(u64);
+	}
+}
+
+/*
+ * Common helper for BPF_CALL insn, handling TCC and ABI variations.
+ * Kernel calls under O32 ABI require arguments passed on the stack,
+ * while BPF2BPF calls need the TCC passed via register as expected
+ * by the subprog's prologue.
+ *
+ * Under MIPS32 O32 ABI calling convention, u64 BPF regs R1-R2 are passed
+ * via reg pairs in $a0-$a3, while BPF regs R3-R5 are passed via the stack.
+ * Stack space is still reserved for $a0-$a3, and the whole area aligned.
+ */
+#define ARGS_SIZE (5 * sizeof(u64))
+
+void emit_bpf_call(struct jit_ctx *ctx, const struct bpf_insn *insn)
+{
+	int stack_adjust = ALIGN(ARGS_SIZE, STACK_ALIGN);
+	int tcc_run = bpf2mips[JIT_RUN_TCC].reg ?
+		      bpf2mips[JIT_RUN_TCC].reg :
+		      TEMP_PASS_TCC;
+	int tcc_sav = bpf2mips[JIT_SAV_TCC].reg;
+	int ax = bpf2mips[BPF_REG_AX].reg;
+	int r0 = bpf2mips[BPF_REG_0].reg;
+	long func_addr;
+
+	ctx->flags |= EBPF_SAVE_RA;
+
+	/* Ensure TCC passed into BPF subprog */
+	if ((insn->src_reg == BPF_PSEUDO_CALL) &&
+	    tail_call_present(ctx) && !(ctx->flags & EBPF_TCC_IN_RUN)) {
+		/* Set TCC from reg or stack */
+		if (tcc_sav)
+			emit_instr(ctx, move, tcc_run, tcc_sav);
+		else
+			emit_instr_long(ctx, ld, lw, tcc_run,
+						ctx->bpf_stack_off, MIPS_R_SP);
+	}
+
+	/* Push O32 stack args for kernel call */
+	if (!is64bit() && (insn->src_reg != BPF_PSEUDO_CALL)) {
+		emit_instr(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust);
+		emit_push_args(ctx);
+	}
+
+	func_addr = (long)__bpf_call_base + insn->imm;
+
+	/* Skip TCC init and R1 register fixup with BPF ABI. */
+	if (insn->src_reg == BPF_PSEUDO_CALL)
+		func_addr += ctx->prolog_skip;
+
+	emit_const_to_reg(ctx, MIPS_R_T9, func_addr);
+	emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
+	/* Delay slot */
+	emit_instr(ctx, nop);
+
+	/* Restore stack */
+	if (!is64bit() && (insn->src_reg != BPF_PSEUDO_CALL))
+		emit_instr(ctx, addiu, MIPS_R_SP, MIPS_R_SP, stack_adjust);
+
+	/*
+	 * Assuming a kernel return, a MIPS64 function epilogue may
+	 * sign-extend R0, while MIPS32BE mangles the R0 register pair.
+	 * Undo both for a bpf2bpf call return.
+	 */
+	if (insn->src_reg == BPF_PSEUDO_CALL) {
+		/* Restore BPF R0 from AX */
+		if (is64bit()) {
+			emit_instr(ctx, move, r0, ax);
+		} else if (isbigend()) { /* and 32-bit */
+			emit_instr(ctx, move, LO(r0), MIPS_R_V0);
+			emit_instr(ctx, move, HI(r0), HI(ax));
+		}
+	}
+}
+
+/*
+ * Tail call helper arguments passed via BPF ABI as u64 parameters. On
+ * MIPS64 N64 ABI systems these are native regs, while on MIPS32 O32 ABI
+ * systems these are reg pairs:
+ *
+ * R1 -> &ctx
+ * R2 -> &array
+ * R3 -> index
+ */
+int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
+{
+	int tcc_run = bpf2mips[JIT_RUN_TCC].reg ?
+		      bpf2mips[JIT_RUN_TCC].reg :
+		      TEMP_PASS_TCC;
+	int tcc_sav = bpf2mips[JIT_SAV_TCC].reg;
+	int r2 = bpf2mips[BPF_REG_2].reg;
+	int r3 = bpf2mips[BPF_REG_3].reg;
+	int off, b_off;
+	int tcc;
+
+	ctx->flags |= EBPF_SEEN_TC;
+	/*
+	 * if (index >= array->map.max_entries)
+	 *     goto out;
+	 */
+	if (is64bit())
+		/* Mask index as 32-bit */
+		gen_zext_insn(r3, true, ctx);
+	off = offsetof(struct bpf_array, map.max_entries);
+	emit_instr_long(ctx, lwu, lw, MIPS_R_AT, off, LO(r2));
+	emit_instr(ctx, sltu, MIPS_R_AT, MIPS_R_AT, LO(r3));
+	b_off = b_imm(this_idx + 1, ctx);
+	emit_instr(ctx, bnez, MIPS_R_AT, b_off);
+	/*
+	 * if (TCC-- < 0)
+	 *     goto out;
+	 */
+	/* Delay slot */
+	tcc = (ctx->flags & EBPF_TCC_IN_RUN) ? tcc_run : tcc_sav;
+	/* Get TCC from reg or stack */
+	if (tcc)
+		emit_instr(ctx, move, MIPS_R_T8, tcc);
+	else
+		emit_instr_long(ctx, ld, lw, MIPS_R_T8,
+						ctx->bpf_stack_off, MIPS_R_SP);
+	b_off = b_imm(this_idx + 1, ctx);
+	emit_instr(ctx, bltz, MIPS_R_T8, b_off);
+	/*
+	 * prog = array->ptrs[index];
+	 * if (prog == NULL)
+	 *     goto out;
+	 */
+	/* Delay slot */
+	emit_instr_long(ctx, dsll, sll, MIPS_R_AT, LO(r3), ilog2(sizeof(long)));
+	emit_instr_long(ctx, daddu, addu, MIPS_R_AT, MIPS_R_AT, LO(r2));
+	off = offsetof(struct bpf_array, ptrs);
+	emit_instr_long(ctx, ld, lw, MIPS_R_AT, off, MIPS_R_AT);
+	b_off = b_imm(this_idx + 1, ctx);
+	emit_instr(ctx, beqz, MIPS_R_AT, b_off);
+	/* Delay slot */
+	emit_instr(ctx, nop);
+
+	/* goto *(prog->bpf_func + skip); */
+	off = offsetof(struct bpf_prog, bpf_func);
+	emit_instr_long(ctx, ld, lw, MIPS_R_T9, off, MIPS_R_AT);
+	/* All systems are go... decrement and propagate TCC */
+	emit_instr_long(ctx, daddiu, addiu, tcc_run, MIPS_R_T8, -1);
+	/* Skip first instructions (TCC init and R1 fixup) */
+	emit_instr_long(ctx, daddiu, addiu, MIPS_R_T9, MIPS_R_T9, ctx->prolog_skip);
+	return build_int_epilogue(ctx, MIPS_R_T9);
+}
+
+/*
+ * Save and restore the BPF VM state across a direct kernel call. This
+ * includes the caller-saved registers used for BPF_REG_0 .. BPF_REG_5
+ * and BPF_REG_AX used by the verifier for blinding and other dark arts.
+ * Restore avoids clobbering bpf_ret, which holds the call return value.
+ * BPF_REG_6 .. BPF_REG_10 and TCC are already callee-saved or on stack.
+ */
+static const int bpf_caller_save[] = {
+	BPF_REG_0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_AX,
+};
+
+#define CALLER_ENV_SIZE (ARRAY_SIZE(bpf_caller_save) * sizeof(u64))
+
+void emit_caller_save(struct jit_ctx *ctx)
+{
+	int stack_adj = ALIGN(CALLER_ENV_SIZE, STACK_ALIGN);
+	int i, bpf, reg, store_offset;
+
+	emit_instr_long(ctx, daddiu, addiu, MIPS_R_SP, MIPS_R_SP, -stack_adj);
+
+	for (i = 0; i < ARRAY_SIZE(bpf_caller_save); i++) {
+		bpf = bpf_caller_save[i];
+		reg = bpf2mips[bpf].reg;
+		store_offset = i * sizeof(u64);
+
+		if (is64bit()) {
+			emit_instr(ctx, sd, reg, store_offset, MIPS_R_SP);
+		} else {
+			emit_instr(ctx, sw, LO(reg),
+						OFFLO(store_offset), MIPS_R_SP);
+			emit_instr(ctx, sw, HI(reg),
+						OFFHI(store_offset), MIPS_R_SP);
+		}
+	}
+}
+
+void emit_caller_restore(struct jit_ctx *ctx, int bpf_ret)
+{
+	int stack_adj = ALIGN(CALLER_ENV_SIZE, STACK_ALIGN);
+	int i, bpf, reg, store_offset;
+
+	for (i = 0; i < ARRAY_SIZE(bpf_caller_save); i++) {
+		bpf = bpf_caller_save[i];
+		reg = bpf2mips[bpf].reg;
+		store_offset = i * sizeof(u64);
+		if (bpf == bpf_ret)
+			continue;
+
+		if (is64bit()) {
+			emit_instr(ctx, ld, reg, store_offset, MIPS_R_SP);
+		} else {
+			emit_instr(ctx, lw, LO(reg),
+						OFFLO(store_offset), MIPS_R_SP);
+			emit_instr(ctx, lw, HI(reg),
+						OFFHI(store_offset), MIPS_R_SP);
+		}
+	}
+
+	emit_instr_long(ctx, daddiu, addiu, MIPS_R_SP, MIPS_R_SP, stack_adj);
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+{
+	bool tmp_blinded = false, extra_pass = false;
+	struct bpf_prog *tmp, *orig_prog = prog;
+	struct bpf_binary_header *header = NULL;
+	unsigned int image_size, pass = 3;
+	struct jit_ctx *ctx;
+
+	if (!prog->jit_requested)
+		return orig_prog;
+
+	/* Attempt blinding but fall back to the interpreter on failure. */
+	tmp = bpf_jit_blind_constants(prog);
+	if (IS_ERR(tmp))
+		return orig_prog;
+	if (tmp != prog) {
+		tmp_blinded = true;
+		prog = tmp;
+	}
+
+	ctx = prog->aux->jit_data;
+	if (!ctx) {
+		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+		if (!ctx) {
+			prog = orig_prog;
+			goto out;
+		}
+	}
+
+	/*
+	 * Assume extra pass needed for patching addresses if previous
+	 * ctx exists in saved jit_data, so skip to code generation.
+	 */
+	if (ctx->offsets) {
+		extra_pass = true;
+		pass++;
+		image_size = 4 * ctx->idx;
+		header = bpf_jit_binary_hdr(ctx->prog);
+		goto skip_init_ctx;
+	}
+
+	ctx->prog = prog;
+	ctx->offsets = kcalloc(prog->len + 1,
+			       sizeof(*ctx->offsets),
+			       GFP_KERNEL);
+	if (!ctx->offsets)
+		goto out_err;
+
+	/* Check Octeon bbit ops only for MIPS64. */
+	if (is64bit()) {
+		preempt_disable();
+		switch (current_cpu_type()) {
+		case CPU_CAVIUM_OCTEON:
+		case CPU_CAVIUM_OCTEON_PLUS:
+		case CPU_CAVIUM_OCTEON2:
+		case CPU_CAVIUM_OCTEON3:
+			ctx->use_bbit_insns = 1;
+			break;
+		default:
+			ctx->use_bbit_insns = 0;
+		}
+		preempt_enable();
+	}
+
+	ctx->reg_val_types = kcalloc(prog->len + 1,
+				     sizeof(*ctx->reg_val_types),
+				     GFP_KERNEL);
+	if (!ctx->reg_val_types)
+		goto out_err;
+
+	if (reg_val_propagate(ctx))
+		goto out_err;
+
+	/*
+	 * First pass discovers used resources and instruction offsets
+	 * assuming short branches are used.
+	 */
+	if (build_int_body(ctx))
+		goto out_err;
+
+	/*
+	 * If no calls are made (EBPF_SAVE_RA), then tailcall count located
+	 * in runtime reg if defined, else we backup to save reg or stack.
+	 */
+	if (tail_call_present(ctx)) {
+		if (ctx->flags & EBPF_SAVE_RA)
+			ctx->flags |= bpf2mips[JIT_SAV_TCC].flags;
+		else if (bpf2mips[JIT_RUN_TCC].reg)
+			ctx->flags |= EBPF_TCC_IN_RUN;
+	}
+
+	/*
+	 * Second pass generates offsets, if any branches are out of
+	 * range a jump-around long sequence is generated, and we have
+	 * to try again from the beginning to generate the new
+	 * offsets.  This is done until no additional conversions are
+	 * necessary.
+	 */
+	do {
+		ctx->idx = 0;
+		ctx->gen_b_offsets = 1;
+		ctx->long_b_conversion = 0;
+		if (build_int_prologue(ctx))
+			goto out_err;
+		if (build_int_body(ctx))
+			goto out_err;
+		if (build_int_epilogue(ctx, MIPS_R_RA))
+			goto out_err;
+	} while (ctx->long_b_conversion);
+
+	image_size = 4 * ctx->idx;
+
+	header = bpf_jit_binary_alloc(image_size, (void *)&ctx->target,
+				      sizeof(u32), jit_fill_hole);
+	if (!header)
+		goto out_err;
+
+skip_init_ctx:
+
+	/* Third pass generates the code (fourth patches call addresses) */
+	ctx->idx = 0;
+	if (build_int_prologue(ctx))
+		goto out_err;
+	if (build_int_body(ctx))
+		goto out_err;
+	if (build_int_epilogue(ctx, MIPS_R_RA))
+		goto out_err;
+
+	if (bpf_jit_enable > 1)
+		/* Dump JIT code */
+		bpf_jit_dump(prog->len, image_size, pass, ctx->target);
+
+	/* Update the icache */
+	flush_icache_range((unsigned long)ctx->target,
+			   (unsigned long)&ctx->target[ctx->idx]);
+
+	if (!prog->is_func || extra_pass)
+		bpf_jit_binary_lock_ro(header);
+	else
+		prog->aux->jit_data = ctx;
+
+	prog->bpf_func = (void *)ctx->target;
+	prog->jited = 1;
+	prog->jited_len = image_size;
+
+	if (!prog->is_func || extra_pass) {
+		bpf_prog_fill_jited_linfo(prog, ctx->offsets + 1);
+out_ctx:
+		kfree(ctx->offsets);
+		kfree(ctx->reg_val_types);
+		kfree(ctx);
+		prog->aux->jit_data = NULL;
+	}
+out:
+	if (tmp_blinded)
+		bpf_jit_prog_release_other(prog, prog == orig_prog ?
+					   tmp : orig_prog);
+	return prog;
+
+out_err:
+	prog = orig_prog;
+	if (header)
+		bpf_jit_binary_free(header);
+	goto out_ctx;
+}
+
+/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
+bool bpf_jit_supports_subprog_tailcalls(void)
+{
+	return true;
+}