@@ -275,6 +275,14 @@ struct jit_context {
/* Number of bytes that will be skipped on tailcall */
#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE)
+static void push_r9(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x51);
+ *pprog = prog;
+}
+
static void push_r12(u8 **pprog)
{
u8 *prog = *pprog;
@@ -298,6 +306,14 @@ static void push_callee_regs(u8 **pprog, bool *callee_regs_used)
*pprog = prog;
}
+static void pop_r9(u8 **pprog)
+{
+ u8 *prog = *pprog;
+
+ EMIT2(0x41, 0x59);
+ *pprog = prog;
+}
+
static void pop_r12(u8 **pprog)
{
u8 *prog = *pprog;
@@ -605,7 +621,7 @@ static void emit_return(u8 **pprog, u8 *ip)
*/
static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
u8 **pprog, bool *callee_regs_used,
- u32 stack_depth, u8 *ip,
+ bool tail_call_reachable, u32 stack_depth, u8 *ip,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
@@ -658,6 +674,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
} else {
+ if (!tail_call_reachable && !bpf_prog->is_func)
+ pop_r9(&prog);
pop_callee_regs(&prog, callee_regs_used);
if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
pop_r12(&prog);
@@ -688,7 +706,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
struct bpf_jit_poke_descriptor *poke,
u8 **pprog, u8 *ip,
- bool *callee_regs_used, u32 stack_depth,
+ bool *callee_regs_used, bool tail_call_reachable, u32 stack_depth,
struct jit_context *ctx)
{
int tcc_off = -4 - round_up(stack_depth, 8);
@@ -719,6 +737,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
} else {
+ if (!tail_call_reachable && !bpf_prog->is_func)
+ pop_r9(&prog);
pop_callee_regs(&prog, callee_regs_used);
if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
pop_r12(&prog);
@@ -1321,6 +1341,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
+ u32 stack_depth = bpf_prog->aux->stack_depth;
struct bpf_insn *insn = bpf_prog->insnsi;
bool callee_regs_used[4] = {};
int insn_cnt = bpf_prog->len;
@@ -1333,6 +1354,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
u8 *prog = temp;
int err;
+ if (!tail_call_reachable && !bpf_prog->is_func)
+ stack_depth = 0;
+
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
@@ -1342,7 +1366,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
/* tail call's presence in current prog implies it is reachable */
tail_call_reachable |= tail_call_seen;
- emit_prologue(&prog, bpf_prog->aux->stack_depth,
+ emit_prologue(&prog, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
/* Exception callback will clobber callee regs for its own use, and
@@ -1359,6 +1383,10 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
if (arena_vm_start)
push_r12(&prog);
push_callee_regs(&prog, callee_regs_used);
+ if (!tail_call_reachable && !bpf_prog->is_func) {
+ emit_mov_reg(&prog, true, X86_REG_R9, BPF_REG_3);
+ push_r9(&prog);
+ }
}
if (arena_vm_start)
emit_mov_imm64(&prog, X86_REG_R12,
@@ -1383,6 +1411,20 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
u8 *func;
int nops;
+ if (!bpf_prog->aux->exception_boundary && !tail_call_reachable && !bpf_prog->is_func) {
+ if (src_reg == BPF_REG_FP) {
+ pop_r9(&prog);
+ push_r9(&prog);
+ src_reg = X86_REG_R9;
+ }
+
+ if (dst_reg == BPF_REG_FP) {
+ pop_r9(&prog);
+ push_r9(&prog);
+ dst_reg = X86_REG_R9;
+ }
+ }
+
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -2045,7 +2087,7 @@ st: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
- RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
+ RESTORE_TAIL_CALL_CNT(stack_depth);
if (!imm32)
return -EINVAL;
offs = 7 + x86_call_depth_emit_accounting(&prog, func);
@@ -2065,13 +2107,15 @@ st: if (is_imm8(insn->off))
&bpf_prog->aux->poke_tab[imm32 - 1],
&prog, image + addrs[i - 1],
callee_regs_used,
- bpf_prog->aux->stack_depth,
+ tail_call_reachable,
+ stack_depth,
ctx);
else
emit_bpf_tail_call_indirect(bpf_prog,
&prog,
callee_regs_used,
- bpf_prog->aux->stack_depth,
+ tail_call_reachable,
+ stack_depth,
image + addrs[i - 1],
ctx);
break;
@@ -2326,6 +2370,8 @@ st: if (is_imm8(insn->off))
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
} else {
+ if (!tail_call_reachable && !bpf_prog->is_func)
+ pop_r9(&prog);
pop_callee_regs(&prog, callee_regs_used);
if (arena_vm_start)
pop_r12(&prog);
@@ -2555,7 +2601,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog,
static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_link *l, int stack_size,
- int run_ctx_off, bool save_ret,
+ int run_ctx_off, int shadow_stack_off, bool save_ret,
void *image, void *rw_image)
{
u8 *prog = *pprog;
@@ -2597,6 +2643,13 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
jmp_insn = prog;
emit_nops(&prog, 2);
+ /* call bpf_shadow_stack_alloc */
+ /* arg1: mov rdi, prog */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+ if (emit_rsb_call(&prog, bpf_shadow_stack_alloc, image + (prog - (u8 *)rw_image)))
+ return -EINVAL;
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -shadow_stack_off);
+
/* arg1: lea rdi, [rbp - stack_size] */
if (!is_imm8(-stack_size))
EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size);
@@ -2607,6 +2660,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm64(&prog, BPF_REG_2,
(long) p->insnsi >> 32,
(u32) (long) p->insnsi);
+ /* arg3: shadow_stack for jit */
+ if (p->jited)
+ emit_mov_reg(&prog, true, BPF_REG_3, BPF_REG_0);
+
/* call JITed bpf program or interpreter */
if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image)))
return -EINVAL;
@@ -2622,6 +2679,12 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (save_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ /* call bpf_shadow_stack_free */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
+ emit_ldx(&prog, BPF_DW, BPF_REG_2, BPF_REG_FP, -shadow_stack_off);
+ if (emit_rsb_call(&prog, bpf_shadow_stack_free, image + (prog - (u8 *)rw_image)))
+ return -EINVAL;
+
/* replace 2 nops with JE insn, since jmp target is known */
jmp_insn[0] = X86_JE;
jmp_insn[1] = prog - jmp_insn - 2;
@@ -2670,7 +2733,7 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- int run_ctx_off, bool save_ret,
+ int run_ctx_off, int shadow_stack_off, bool save_ret,
void *image, void *rw_image)
{
int i;
@@ -2678,7 +2741,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
for (i = 0; i < tl->nr_links; i++) {
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
- run_ctx_off, save_ret, image, rw_image))
+ run_ctx_off, shadow_stack_off, save_ret, image, rw_image))
return -EINVAL;
}
*pprog = prog;
@@ -2687,7 +2750,7 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- int run_ctx_off, u8 **branches,
+ int run_ctx_off, int shadow_stack_off, u8 **branches,
void *image, void *rw_image)
{
u8 *prog = *pprog;
@@ -2699,7 +2762,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
emit_mov_imm32(&prog, false, BPF_REG_0, 0);
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
for (i = 0; i < tl->nr_links; i++) {
- if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+ if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, shadow_stack_off, true,
image, rw_image))
return -EINVAL;
@@ -2790,7 +2853,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
- int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
+ int regs_off, nregs_off, ip_off, run_ctx_off, shadow_stack_off, arg_stack_off, rbx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
@@ -2839,6 +2902,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
*
* RBP - run_ctx_off [ bpf_tramp_run_ctx ]
*
+ * RBP - shadow_stack_off [ shadow_stack ]
+ *
* [ stack_argN ] BPF_TRAMP_F_CALL_ORIG
* [ ... ]
* [ stack_arg2 ]
@@ -2869,6 +2934,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7;
run_ctx_off = stack_size;
+ stack_size += 8;
+ shadow_stack_off = stack_size;
+
if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) {
/* the space that used to pass arguments on-stack */
stack_size += (nr_regs - get_nr_used_regs(m)) * 8;
@@ -2949,7 +3017,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
if (fentry->nr_links) {
- if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
+ if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, shadow_stack_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
return -EINVAL;
}
@@ -2961,7 +3029,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
return -ENOMEM;
if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
- run_ctx_off, branches, image, rw_image)) {
+ run_ctx_off, shadow_stack_off, branches, image, rw_image)) {
ret = -EINVAL;
goto cleanup;
}
@@ -3011,7 +3079,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
if (fexit->nr_links) {
- if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
+ if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, shadow_stack_off,
false, image, rw_image)) {
ret = -EINVAL;
goto cleanup;
@@ -3121,11 +3189,11 @@ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
* We cannot use kvmalloc here, because we need image to be in
* module memory range.
*/
- image = bpf_jit_alloc_exec(PAGE_SIZE);
+ image = bpf_jit_alloc_exec(PAGE_SIZE * 2);
if (!image)
return -ENOMEM;
- ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
+ ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE * 2, image,
m, flags, tlinks, func_addr);
bpf_jit_free_exec(image);
return ret;
@@ -3361,8 +3429,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
cond_resched();
}
- if (bpf_jit_enable > 1)
+ if (bpf_jit_enable > 1) {
bpf_jit_dump(prog->len, proglen, pass + 1, rw_image);
+ }
if (image) {
if (!prog->is_func || extra_pass) {
@@ -24,11 +24,14 @@ struct ctl_table_header;
struct task_struct;
unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
- const struct bpf_insn *insn);
+ const struct bpf_insn *insn,
+ void *shadow_stack);
unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
- const struct bpf_insn *insn);
+ const struct bpf_insn *insn,
+ void *shadow_stack);
unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
- const struct bpf_insn *insn);
+ const struct bpf_insn *insn,
+ void *shadow_stack);
#ifdef CONFIG_CGROUP_BPF
@@ -69,7 +69,8 @@ typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
struct bpf_iter_aux_info *aux);
typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
typedef unsigned int (*bpf_func_t)(const void *,
- const struct bpf_insn *);
+ const struct bpf_insn *,
+ void *);
struct bpf_iter_seq_info {
const struct seq_operations *seq_ops;
bpf_iter_init_seq_priv_t init_seq_private;
@@ -1076,9 +1077,9 @@ struct btf_func_model {
*/
enum {
#if defined(__s390x__)
- BPF_MAX_TRAMP_LINKS = 27,
+ BPF_MAX_TRAMP_LINKS = 24,
#else
- BPF_MAX_TRAMP_LINKS = 38,
+ BPF_MAX_TRAMP_LINKS = 35,
#endif
};
@@ -1133,6 +1134,9 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
+void * notrace bpf_shadow_stack_alloc(struct bpf_prog *prog);
+void notrace bpf_shadow_stack_free(struct bpf_prog *prog, void *shadow_frame);
+
struct bpf_ksym {
unsigned long start;
unsigned long end;
@@ -1228,9 +1232,11 @@ struct bpf_dispatcher {
static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func(
const void *ctx,
const struct bpf_insn *insnsi,
- bpf_func_t bpf_func)
+ bpf_func_t bpf_func,
+ void *shadow_stack)
{
- return bpf_func(ctx, insnsi);
+ // printk("%s: shadow_stack = %px\n", __func__, shadow_stack);
+ return bpf_func(ctx, insnsi, shadow_stack);
}
/* the implementation of the opaque uapi struct bpf_dynptr */
@@ -1289,7 +1295,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func)
#define __BPF_DISPATCHER_CALL(name) \
- static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func)
+ static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func, shadow_stack)
#define __BPF_DISPATCHER_UPDATE(_d, _new) \
__static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new))
@@ -1297,7 +1303,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
#else
#define __BPF_DISPATCHER_SC_INIT(name)
#define __BPF_DISPATCHER_SC(name)
-#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi)
+#define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi, shadow_stack)
#define __BPF_DISPATCHER_UPDATE(_d, _new)
#endif
@@ -1320,7 +1326,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
noinline __bpfcall unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
- bpf_func_t bpf_func) \
+ bpf_func_t bpf_func, \
+ void *shadow_stack) \
{ \
return __BPF_DISPATCHER_CALL(name); \
} \
@@ -1332,7 +1339,8 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
unsigned int bpf_dispatcher_##name##_func( \
const void *ctx, \
const struct bpf_insn *insnsi, \
- bpf_func_t bpf_func); \
+ bpf_func_t bpf_func, \
+ void *shadow_stack); \
extern struct bpf_dispatcher bpf_dispatcher_##name;
#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func
@@ -1549,7 +1557,8 @@ struct bpf_prog {
struct bpf_prog_stats __percpu *stats;
int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
- const struct bpf_insn *insn);
+ const struct bpf_insn *insn,
+ void *shadow_stack);
struct bpf_prog_aux *aux; /* Auxiliary fields */
struct sock_fprog_kern *orig_prog; /* Original BPF program */
/* Instructions for interpreter */
@@ -665,13 +665,25 @@ extern int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
const struct bpf_insn *insnsi,
unsigned int (*bpf_func)(const void *,
- const struct bpf_insn *));
+ const struct bpf_insn *,
+ void *),
+ void *shadow_stack);
static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
const void *ctx,
bpf_dispatcher_fn dfunc)
{
- u32 ret;
+ void *shadow_stack = NULL, *shadow_frame = NULL;
+ u32 ret, roundup_stack_size;
+
+ if (prog->aux->stack_depth) {
+ roundup_stack_size = round_up(prog->aux->stack_depth, 16);
+ shadow_stack = kmalloc(roundup_stack_size, __GFP_NORETRY);
+ if (shadow_stack)
+ shadow_frame = shadow_stack + roundup_stack_size;
+ }
+
+ // printk("shadow_stack = %px, shadow_frame = %px\n", shadow_stack, shadow_frame);
cant_migrate();
if (static_branch_unlikely(&bpf_stats_enabled_key)) {
@@ -679,7 +691,7 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
u64 duration, start = sched_clock();
unsigned long flags;
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func, shadow_frame);
duration = sched_clock() - start;
stats = this_cpu_ptr(prog->stats);
@@ -688,8 +700,13 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
u64_stats_add(&stats->nsecs, duration);
u64_stats_update_end_irqrestore(&stats->syncp, flags);
} else {
- ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
+ ret = dfunc(ctx, prog->insnsi, prog->bpf_func, shadow_frame);
}
+
+ // printk("shadow_frame = %px\n", shadow_frame);
+
+ if (prog->aux->stack_depth)
+ kfree(shadow_stack);
return ret;
}
@@ -64,7 +64,8 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
}
unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
- const struct bpf_insn *insn)
+ const struct bpf_insn *insn,
+ void *shadow_stack)
{
const struct bpf_prog *shim_prog;
struct sock *sk;
@@ -86,7 +87,8 @@ unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
}
unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
- const struct bpf_insn *insn)
+ const struct bpf_insn *insn,
+ void *shadow_stack)
{
const struct bpf_prog *shim_prog;
struct socket *sock;
@@ -108,7 +110,8 @@ unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
}
unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
- const struct bpf_insn *insn)
+ const struct bpf_insn *insn,
+ void *shadow_stack)
{
const struct bpf_prog *shim_prog;
struct cgroup *cgrp;
@@ -563,7 +563,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
#ifdef CONFIG_BPF_JIT
/* All BPF JIT sysctl knobs here. */
-int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
+int bpf_jit_enable __read_mostly = 1;
int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
int bpf_jit_harden __read_mostly;
long bpf_jit_limit __read_mostly;
@@ -2213,7 +2213,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
#define PROG_NAME(stack_size) __bpf_prog_run##stack_size
#define DEFINE_BPF_PROG_RUN(stack_size) \
-static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \
+static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn, void *shadow_stack) \
{ \
u64 stack[stack_size / sizeof(u64)]; \
u64 regs[MAX_BPF_EXT_REG] = {}; \
@@ -2260,7 +2260,8 @@ EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512);
#define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size),
static unsigned int (*interpreters[])(const void *ctx,
- const struct bpf_insn *insn) = {
+ const struct bpf_insn *insn,
+ void *shadow_stack) = {
EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192)
EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384)
EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
@@ -2434,8 +2435,35 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
+void * notrace bpf_shadow_stack_alloc(struct bpf_prog *prog)
+{
+ void *shadow_stack = NULL, *shadow_frame = NULL;
+ u32 roundup_stack_size;
+
+ if (!prog->aux->stack_depth)
+ return NULL;
+
+ roundup_stack_size = round_up(prog->aux->stack_depth, 16);
+ shadow_stack = kmalloc(roundup_stack_size, __GFP_NORETRY);
+ if (shadow_stack)
+ shadow_frame = shadow_stack + roundup_stack_size;
+ return shadow_frame;
+}
+
+void notrace bpf_shadow_stack_free(struct bpf_prog *prog, void *shadow_frame)
+{
+ u32 roundup_stack_size;
+
+ if (!shadow_frame)
+ return;
+
+ roundup_stack_size = round_up(prog->aux->stack_depth, 16);
+ kfree(shadow_frame - roundup_stack_size);
+}
+
static unsigned int __bpf_prog_ret1(const void *ctx,
- const struct bpf_insn *insn)
+ const struct bpf_insn *insn,
+ void *shadow_stack)
{
return 1;
}
@@ -414,7 +414,8 @@ static int bpf_prog_offload_translate(struct bpf_prog *prog)
}
static unsigned int bpf_prog_warn_on_exec(const void *ctx,
- const struct bpf_insn *insn)
+ const struct bpf_insn *insn,
+ void *shadow_stack)
{
WARN(1, "attempt to execute device eBPF program on the host!");
return 0;
Try to add 3rd argument to bpf program where the 3rd argument is the frame pointer to bpf program stack. There are a few issues here: - Currently, only main bpf program is using shadow stack. other sub programs (static or global) still use stack. It is POSSIBLE to a hidden register to pass frame pointer (derived from main program) to those static or global functions. - But tail call and ext programs are not working now we we do not know at jit time what programs will be used in tail call and ext. It is possible to do some jit during text_poke time. But that will need additional stack allocation at jit time and that will be complicated. - For xdp program, need to patch jit for xdp dispatcher. Signed-off-by: Yonghong Song <yonghong.song@linux.dev> --- arch/x86/net/bpf_jit_comp.c | 105 +++++++++++++++++++++++++++++------- include/linux/bpf-cgroup.h | 9 ++-- include/linux/bpf.h | 29 ++++++---- include/linux/filter.h | 25 +++++++-- kernel/bpf/cgroup.c | 9 ++-- kernel/bpf/core.c | 36 +++++++++++-- kernel/bpf/offload.c | 3 +- 7 files changed, 173 insertions(+), 43 deletions(-)