diff mbox series

[bpf-next,v6,7/9] bpf, x86: Add jit support for private stack

Message ID 20241020191425.2107971-1-yonghong.song@linux.dev (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: Support private stack for bpf progs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 16 maintainers not CCed: dave.hansen@linux.intel.com john.fastabend@gmail.com dsahern@kernel.org jolsa@kernel.org x86@kernel.org kpsingh@kernel.org song@kernel.org haoluo@google.com bp@alien8.de netdev@vger.kernel.org sdf@fomichev.me martin.lau@linux.dev hpa@zytor.com tglx@linutronix.de eddyz87@gmail.com mingo@redhat.com
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 67 this patch: 69
netdev/checkpatch warning CHECK: No space is necessary after a cast
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Yonghong Song Oct. 20, 2024, 7:14 p.m. UTC
Add jit support for private stack. For a particular subtree, e.g.,
  subtree_root <== stack depth 120
   subprog1    <== stack depth 80
    subprog2   <== stack depth 40
   subprog3    <== stack depth 160

Let us say that priv_stack_ptr is the memory address allocated for
private stack. The frame pointer for each above is calculated like below:
  subtree_root  <== subtree_root_fp = private_stack_ptr + 120
   subprog1     <== subtree_subprog1_fp = subtree_root_fp + 80
    subprog2    <== subtree_subprog2_fp = subtree_subprog1_fp + 40
   subprog3     <== subtree_subprog1_fp = subtree_root_fp + 160

For any function call to helper/kfunc, push/pop prog frame pointer
is needed in order to preserve frame pointer value.

To deal with exception handling, push/pop frame pointer is also used
surrounding call to subsequent subprog. For example,
  subtree_root
   subprog1
     ...
     insn: call bpf_throw
     ...

After jit, we will have
  subtree_root
   insn: push r9
   subprog1
     ...
     insn: push r9
     insn: call bpf_throw
     insn: pop r9
     ...
   insn: pop r9

  exception_handler
     pop r9
     ...
where r9 represents the fp for each subprog.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 arch/x86/net/bpf_jit_comp.c  | 88 +++++++++++++++++++++++++++++++++++-
 include/linux/bpf_verifier.h |  1 +
 2 files changed, 87 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 6be8c739c3c2..86ebca32befc 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -325,6 +325,22 @@  struct jit_context {
 /* Number of bytes that will be skipped on tailcall */
 #define X86_TAIL_CALL_OFFSET	(12 + ENDBR_INSN_SIZE)
 
+static void push_r9(u8 **pprog)
+{
+	u8 *prog = *pprog;
+
+	EMIT2(0x41, 0x51);   /* push r9 */
+	*pprog = prog;
+}
+
+static void pop_r9(u8 **pprog)
+{
+	u8 *prog = *pprog;
+
+	EMIT2(0x41, 0x59);   /* pop r9 */
+	*pprog = prog;
+}
+
 static void push_r12(u8 **pprog)
 {
 	u8 *prog = *pprog;
@@ -484,13 +500,17 @@  static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
 	*pprog = prog;
 }
 
+static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
+				enum bpf_priv_stack_mode priv_stack_mode);
+
 /*
  * Emit x86-64 prologue code for BPF program.
  * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
  * while jumping to another program
  */
 static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog,
-			  bool tail_call_reachable)
+			  bool tail_call_reachable,
+			  enum bpf_priv_stack_mode priv_stack_mode)
 {
 	bool ebpf_from_cbpf = bpf_prog_was_classic(bpf_prog);
 	bool is_exception_cb = bpf_prog->aux->exception_cb;
@@ -520,6 +540,8 @@  static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog
 		 * first restore those callee-saved regs from stack, before
 		 * reusing the stack frame.
 		 */
+		if (priv_stack_mode != NO_PRIV_STACK)
+			pop_r9(&prog);
 		pop_callee_regs(&prog, all_callee_regs_used);
 		pop_r12(&prog);
 		/* Reset the stack frame. */
@@ -532,6 +554,8 @@  static void emit_prologue(u8 **pprog, u32 stack_depth, struct bpf_prog *bpf_prog
 	/* X86_TAIL_CALL_OFFSET is here */
 	EMIT_ENDBR();
 
+	emit_priv_frame_ptr(&prog, bpf_prog, priv_stack_mode);
+
 	/* sub rsp, rounded_stack_depth */
 	if (stack_depth)
 		EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
@@ -1451,6 +1475,42 @@  static void emit_alu_imm(u8 **pprog, u8 insn_code, u32 dst_reg, s32 imm32)
 	*pprog = prog;
 }
 
+static void emit_root_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
+				     u32 orig_stack_depth)
+{
+	void __percpu *priv_frame_ptr;
+	u8 *prog = *pprog;
+
+	priv_frame_ptr = bpf_prog->aux->priv_stack_ptr + orig_stack_depth;
+
+	/* movabs r9, priv_frame_ptr */
+	emit_mov_imm64(&prog, X86_REG_R9, (long) priv_frame_ptr >> 32,
+		       (u32) (long) priv_frame_ptr);
+#ifdef CONFIG_SMP
+	/* add <r9>, gs:[<off>] */
+	EMIT2(0x65, 0x4c);
+	EMIT3(0x03, 0x0c, 0x25);
+	EMIT((u32)(unsigned long)&this_cpu_off, 4);
+#endif
+	*pprog = prog;
+}
+
+static void emit_priv_frame_ptr(u8 **pprog, struct bpf_prog *bpf_prog,
+				enum bpf_priv_stack_mode priv_stack_mode)
+{
+	u32 orig_stack_depth = round_up(bpf_prog->aux->stack_depth, 8);
+	u8 *prog = *pprog;
+
+	if (priv_stack_mode == PRIV_STACK_ROOT_PROG)
+		emit_root_priv_frame_ptr(&prog, bpf_prog, orig_stack_depth);
+	else if (priv_stack_mode == PRIV_STACK_SUB_PROG && orig_stack_depth)
+		/* r9 += orig_stack_depth */
+		emit_alu_imm(&prog, BPF_ALU64 | BPF_ADD | BPF_K, X86_REG_R9,
+			     orig_stack_depth);
+
+	*pprog = prog;
+}
+
 #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
 
 #define __LOAD_TCC_PTR(off)			\
@@ -1464,6 +1524,7 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 {
 	bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
 	struct bpf_insn *insn = bpf_prog->insnsi;
+	enum bpf_priv_stack_mode priv_stack_mode;
 	bool callee_regs_used[4] = {};
 	int insn_cnt = bpf_prog->len;
 	bool seen_exit = false;
@@ -1476,13 +1537,17 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 	int err;
 
 	stack_depth = bpf_prog->aux->stack_depth;
+	priv_stack_mode = bpf_prog->aux->priv_stack_mode;
+	if (priv_stack_mode != NO_PRIV_STACK)
+		stack_depth = 0;
 
 	arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
 	user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
 
 	detect_reg_usage(insn, insn_cnt, callee_regs_used);
 
-	emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable);
+	emit_prologue(&prog, stack_depth, bpf_prog, tail_call_reachable,
+		      priv_stack_mode);
 	/* Exception callback will clobber callee regs for its own use, and
 	 * restore the original callee regs from main prog's stack frame.
 	 */
@@ -1521,6 +1586,14 @@  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
 		u8 *func;
 		int nops;
 
+		if (priv_stack_mode != NO_PRIV_STACK) {
+			if (src_reg == BPF_REG_FP)
+				src_reg = X86_REG_R9;
+
+			if (dst_reg == BPF_REG_FP)
+				dst_reg = X86_REG_R9;
+		}
+
 		switch (insn->code) {
 			/* ALU */
 		case BPF_ALU | BPF_ADD | BPF_X:
@@ -2146,9 +2219,15 @@  st:			if (is_imm8(insn->off))
 			}
 			if (!imm32)
 				return -EINVAL;
+			if (priv_stack_mode != NO_PRIV_STACK) {
+				push_r9(&prog);
+				ip += 2;
+			}
 			ip += x86_call_depth_emit_accounting(&prog, func, ip);
 			if (emit_call(&prog, func, ip))
 				return -EINVAL;
+			if (priv_stack_mode != NO_PRIV_STACK)
+				pop_r9(&prog);
 			break;
 		}
 
@@ -3572,6 +3651,11 @@  bool bpf_jit_supports_exceptions(void)
 	return IS_ENABLED(CONFIG_UNWINDER_ORC);
 }
 
+bool bpf_jit_supports_private_stack(void)
+{
+	return true;
+}
+
 void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie)
 {
 #if defined(CONFIG_UNWINDER_ORC)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index bcfe868e3801..dd28b05bcff0 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -891,6 +891,7 @@  static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
 	case BPF_PROG_TYPE_TRACING:
 		return prog->expected_attach_type != BPF_TRACE_ITER;
 	case BPF_PROG_TYPE_STRUCT_OPS:
+		return prog->aux->priv_stack_eligible;
 	case BPF_PROG_TYPE_LSM:
 		return false;
 	default: