From patchwork Wed Oct 5 14:13:06 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Florian Westphal X-Patchwork-Id: 12999279 X-Patchwork-Delegate: kuba@kernel.org Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7AD7CC4332F for ; Wed, 5 Oct 2022 14:13:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229864AbiJEONx (ORCPT ); Wed, 5 Oct 2022 10:13:53 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41598 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230019AbiJEONw (ORCPT ); Wed, 5 Oct 2022 10:13:52 -0400 Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc [IPv6:2a0a:51c0:0:12e:520::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9FD31786CA for ; Wed, 5 Oct 2022 07:13:50 -0700 (PDT) Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92) (envelope-from ) id 1og59h-0001fO-37; Wed, 05 Oct 2022 16:13:49 +0200 From: Florian Westphal To: bpf@vger.kernel.org Cc: Florian Westphal Subject: [RFC v2 6/9] netfilter: add bpf base hook program generator Date: Wed, 5 Oct 2022 16:13:06 +0200 Message-Id: <20221005141309.31758-7-fw@strlen.de> X-Mailer: git-send-email 2.35.1 In-Reply-To: <20221005141309.31758-1-fw@strlen.de> References: <20221005141309.31758-1-fw@strlen.de> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: bpf@vger.kernel.org X-Patchwork-Delegate: kuba@kernel.org X-Patchwork-State: RFC Add a kernel bpf program generator for netfilter base hooks. Currently netfilter hooks are invoked by nf_hook_slow: for i in hooks; do verdict = hooks[i]->indirect_func(hooks->[i].hook_arg, skb, state); switch (verdict) { .... The autogenerator unrolls the loop, so we get: state->priv = hooks->[0].hook_arg; v = first_hook_function(state); if (v != ACCEPT) goto done; state->priv = hooks->[1].hook_arg; v = second_hook_function(state); ... Indirections are replaced by direct calls. Invocation of the autogenerated programs is done via bpf dispatcher from nf_hook(). The autogenerated program has the same return value scheme as nf_hook_slow(). NF_HOOK() points are converted to call the autogenerated bpf program instead of nf_hook_slow(). Purpose of this is to eventually add a 'netfilter prog type' to bpf and permit attachment of (userspace generated) bpf programs to the netfilter machinery, e.g. 'attach bpf prog id 1234 to ipv6 PREROUTING at prio -300'. This will require to expose the context structure (program argument, '__nf_hook_state', with rewriting accesses to match nf_hook_state layout. Nat hooks are still handled via indirect calls, but they are only called once per connection. Signed-off-by: Florian Westphal --- include/linux/netfilter.h | 66 ++++- include/net/netfilter/nf_hook_bpf.h | 21 ++ net/netfilter/Kconfig | 10 + net/netfilter/Makefile | 1 + net/netfilter/core.c | 92 +++++- net/netfilter/nf_hook_bpf.c | 424 ++++++++++++++++++++++++++++ 6 files changed, 605 insertions(+), 9 deletions(-) create mode 100644 include/net/netfilter/nf_hook_bpf.h create mode 100644 net/netfilter/nf_hook_bpf.c diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 7c604ef8e8cb..b7874b772dd1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -2,6 +2,7 @@ #ifndef __LINUX_NETFILTER_H #define __LINUX_NETFILTER_H +#include #include #include #include @@ -106,6 +107,9 @@ struct nf_hook_entries_rcu_head { }; struct nf_hook_entries { +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + struct bpf_prog *hook_prog; +#endif u16 num_hook_entries; /* padding */ struct nf_hook_entry hooks[]; @@ -205,6 +209,17 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, const struct nf_hook_entries *e); + +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) +DECLARE_BPF_DISPATCHER(nf_hook_base); + +static __always_inline int bpf_prog_run_nf(const struct bpf_prog *prog, + struct nf_hook_state *state) +{ + return __bpf_prog_run(prog, state, BPF_DISPATCHER_FUNC(nf_hook_base)); +} +#endif + /** * nf_hook - call a netfilter hook * @@ -213,17 +228,17 @@ void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, * value indicates the packet has been consumed by the hook. */ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, - struct sock *sk, struct sk_buff *skb, - struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct net *, struct sock *, struct sk_buff *)) + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; int ret = 1; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && - __builtin_constant_p(hook) && - !static_key_false(&nf_hooks_needed[pf][hook])) + __builtin_constant_p(hook) && + !static_key_false(&nf_hooks_needed[pf][hook])) return 1; #endif @@ -254,11 +269,24 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, if (hook_head) { struct nf_hook_state state; +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + const struct bpf_prog *p = READ_ONCE(hook_head->hook_prog); + + nf_hook_state_init(&state, hook, pf, indev, outdev, + sk, net, okfn); + + state.priv = (void *)hook_head; + state.skb = skb; + migrate_disable(); + ret = bpf_prog_run_nf(p, &state); + migrate_enable(); +#else nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, hook_head); +#endif } rcu_read_unlock(); @@ -336,10 +364,38 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, if (hook_head) { struct nf_hook_state state; +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + const struct bpf_prog *p = hook_head->hook_prog; + struct sk_buff *skb, *next; + struct list_head sublist; + int ret; + + nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); + + INIT_LIST_HEAD(&sublist); + migrate_disable(); + + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); + + state.priv = (void *)hook_head; + state.skb = skb; + + ret = bpf_prog_run_nf(p, &state); + if (ret == 1) + list_add_tail(&skb->list, &sublist); + } + + migrate_enable(); + + /* Put passed packets back on main list */ + list_splice(&sublist, head); +#else nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); nf_hook_slow_list(head, &state, hook_head); +#endif } rcu_read_unlock(); } diff --git a/include/net/netfilter/nf_hook_bpf.h b/include/net/netfilter/nf_hook_bpf.h new file mode 100644 index 000000000000..1792f97a806d --- /dev/null +++ b/include/net/netfilter/nf_hook_bpf.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +struct bpf_dispatcher; +struct bpf_prog; + +struct bpf_prog *nf_hook_bpf_create_fb(void); + +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) +struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n); + +void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to); +#else +static inline void +nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *f, struct bpf_prog *t) +{ +} + +static inline struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n) +{ + return NULL; +} +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 4b8d04640ff3..2610786b6ad8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -30,6 +30,16 @@ config NETFILTER_FAMILY_BRIDGE config NETFILTER_FAMILY_ARP bool +config HAVE_NF_HOOK_BPF + bool + +config NF_HOOK_BPF + bool "netfilter base hook bpf translator" + depends on BPF_JIT + help + This unrolls the nf_hook_slow interpreter loop with + auto-generated BPF program. + config NETFILTER_NETLINK_HOOK tristate "Netfilter base hook dump support" depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 06df49ea6329..e465659e87ad 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -21,6 +21,7 @@ nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o endif obj-$(CONFIG_NETFILTER) = netfilter.o +obj-$(CONFIG_NF_HOOK_BPF) += nf_hook_bpf.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 17165f9cf4a1..6888c7fd5aeb 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "nf_internals.h" @@ -47,6 +48,33 @@ static DEFINE_MUTEX(nf_hook_mutex); #define nf_entry_dereference(e) \ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) +DEFINE_BPF_DISPATCHER(nf_hook_base); + +#define NF_DISPATCHER_PTR BPF_DISPATCHER_PTR(nf_hook_base) +#else +#define NF_DISPATCHER_PTR NULL +#endif + +static struct bpf_prog *fallback_nf_hook_slow; + +static void nf_hook_bpf_prog_set(struct nf_hook_entries *e, + struct bpf_prog *p) +{ +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + WRITE_ONCE(e->hook_prog, p); +#endif +} + +static struct bpf_prog *nf_hook_bpf_prog_get(struct nf_hook_entries *e) +{ +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + if (e) + return e->hook_prog; +#endif + return NULL; +} + static struct nf_hook_entries *allocate_hook_entries_size(u16 num) { struct nf_hook_entries *e; @@ -58,9 +86,23 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num) if (num == 0) return NULL; - e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT); - if (e) - e->num_hook_entries = num; +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + if (!fallback_nf_hook_slow) { + /* never free'd */ + fallback_nf_hook_slow = nf_hook_bpf_create_fb(); + + if (!fallback_nf_hook_slow) + return NULL; + } +#endif + + e = kvzalloc(alloc, GFP_KERNEL); + if (!e) + return NULL; + + e->num_hook_entries = num; + nf_hook_bpf_prog_set(e, fallback_nf_hook_slow); + return e; } @@ -98,6 +140,29 @@ static const struct nf_hook_ops dummy_ops = { .priority = INT_MIN, }; +static void nf_hook_entries_grow_bpf(const struct nf_hook_entries *old, + struct nf_hook_entries *new) +{ +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + struct bpf_prog *hook_bpf_prog = nf_hook_bpf_create(new); + + /* allocate_hook_entries_size() pre-inits new->hook_prog + * to a fallback program that calls nf_hook_slow(). + */ + if (hook_bpf_prog) { + struct bpf_prog *old_prog = NULL; + + new->hook_prog = hook_bpf_prog; + + if (old) + old_prog = old->hook_prog; + + nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base), + old_prog, hook_bpf_prog); + } +#endif +} + static struct nf_hook_entries * nf_hook_entries_grow(const struct nf_hook_entries *old, const struct nf_hook_ops *reg) @@ -156,6 +221,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old, new->hooks[nhooks].priv = reg->priv; } + nf_hook_entries_grow_bpf(old, new); return new; } @@ -221,6 +287,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old, struct nf_hook_entries __rcu **pp) { unsigned int i, j, skip = 0, hook_entries; + struct bpf_prog *hook_bpf_prog = NULL; struct nf_hook_entries *new = NULL; struct nf_hook_ops **orig_ops; struct nf_hook_ops **new_ops; @@ -244,8 +311,13 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old, hook_entries -= skip; new = allocate_hook_entries_size(hook_entries); - if (!new) + if (!new) { + struct bpf_prog *old_prog = nf_hook_bpf_prog_get(old); + + nf_hook_bpf_prog_set(old, fallback_nf_hook_slow); + nf_hook_bpf_change_prog(NF_DISPATCHER_PTR, old_prog, NULL); return NULL; + } new_ops = nf_hook_entries_get_hook_ops(new); for (i = 0, j = 0; i < old->num_hook_entries; i++) { @@ -256,7 +328,13 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old, j++; } hooks_validate(new); + + /* if this fails fallback prog calls nf_hook_slow. */ + hook_bpf_prog = nf_hook_bpf_create(new); + if (hook_bpf_prog) + nf_hook_bpf_prog_set(new, hook_bpf_prog); out_assign: + nf_hook_bpf_change_prog(NF_DISPATCHER_PTR, nf_hook_bpf_prog_get(old), hook_bpf_prog); rcu_assign_pointer(*pp, new); return old; } @@ -609,6 +687,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, int ret; state->skb = skb; + for (; s < e->num_hook_entries; s++) { verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state); switch (verdict & NF_VERDICT_MASK) { @@ -783,6 +862,11 @@ int __init netfilter_init(void) if (ret < 0) goto err_pernet; +#if IS_ENABLED(CONFIG_NF_HOOK_BPF) + fallback_nf_hook_slow = nf_hook_bpf_create_fb(); + WARN_ON_ONCE(!fallback_nf_hook_slow); +#endif + return 0; err_pernet: unregister_pernet_subsys(&netfilter_net_ops); diff --git a/net/netfilter/nf_hook_bpf.c b/net/netfilter/nf_hook_bpf.c new file mode 100644 index 000000000000..dab13b803801 --- /dev/null +++ b/net/netfilter/nf_hook_bpf.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include +#include + +#define JMP_INVALID 0 +#define JIT_SIZE_MAX 0xffff + +/* BPF translator for netfilter hooks. + * + * Create a bpf program that can be called *instead* of nf_hook_slow(). + * This program thus has same return value as nf_hook_slow and + * handles nfqueue and packet drops internally. + * Call nf_hook_bpf_create(struct nf_hook_entries *e, NF_HOOK_BPF_TYPE_BASE) + * to unroll the functions described by nf_hook_entries into such + * a bpf program. + * + * These bpf programs are called/run from nf_hook() inline function. + * + * Register usage is: + * + * BPF_REG_0: verdict. + * BPF_REG_1: struct nf_hook_state * + * BPF_REG_2: reserved as arg to nf_queue() + * BPF_REG_3: reserved as arg to nf_queue() + * + * Prologue storage: + * BPF_REG_6: copy of REG_1 (original struct nf_hook_state *) + * BPF_REG_7: copy of original state->priv value + * BPF_REG_8: copy of state->hook_index + */ +struct nf_hook_prog { + struct bpf_insn *insns; + unsigned int pos; +}; + +static bool emit(struct nf_hook_prog *p, struct bpf_insn insn) +{ + if (WARN_ON_ONCE(p->pos >= BPF_MAXINSNS)) + return false; + + p->insns[p->pos] = insn; + p->pos++; + return true; +} + +static bool xlate_one_hook(struct nf_hook_prog *p, const struct nf_hook_entries *e, + const struct nf_hook_entry *h) +{ + int width = bytes_to_bpf_size(sizeof(h->priv)); + + /* if priv is NULL, the called hookfn does not use the priv member. */ + if (!h->priv) + goto emit_hook_call; + + if (WARN_ON_ONCE(width < 0)) + return false; + + /* x = entries[s]->priv; */ + if (!emit(p, BPF_LDX_MEM(width, BPF_REG_2, BPF_REG_7, + (unsigned long)&h->priv - (unsigned long)e))) + return false; + + /* state->priv = x */ + if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_2, + offsetof(struct nf_hook_state, priv)))) + return false; + +emit_hook_call: + if (!emit(p, BPF_EMIT_CALL(h->hook))) + return false; + + /* Only advance to next hook on ACCEPT verdict. + * Else, skip rest and move to tail. + * + * Postprocessing patches the jump offset to the + * correct position, after last hook. + */ + if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, NF_ACCEPT, JMP_INVALID))) + return false; + + return true; +} + +static bool emit_mov_ptr_reg(struct nf_hook_prog *p, u8 dreg, u8 sreg) +{ + if (sizeof(void *) == sizeof(u64)) + return emit(p, BPF_MOV64_REG(dreg, sreg)); + if (sizeof(void *) == sizeof(u32)) + return emit(p, BPF_MOV32_REG(dreg, sreg)); + + return false; +} + +static bool do_prologue(struct nf_hook_prog *p) +{ + int width = bytes_to_bpf_size(sizeof(void *)); + + if (WARN_ON_ONCE(width < 0)) + return false; + + /* argument to program is a pointer to struct nf_hook_state, in BPF_REG_1. */ + if (!emit_mov_ptr_reg(p, BPF_REG_6, BPF_REG_1)) + return false; + + if (!emit(p, BPF_LDX_MEM(width, BPF_REG_7, BPF_REG_1, + offsetof(struct nf_hook_state, priv)))) + return false; + + /* could load state->hook_index, but we don't support index > 0 for bpf call. */ + if (!emit(p, BPF_MOV32_IMM(BPF_REG_8, 0))) + return false; + + return true; +} + +static void patch_hook_jumps(struct nf_hook_prog *p) +{ + unsigned int i; + + if (!p->insns) + return; + + for (i = 0; i < p->pos; i++) { + if (BPF_CLASS(p->insns[i].code) != BPF_JMP) + continue; + + if (p->insns[i].code == (BPF_EXIT | BPF_JMP)) + continue; + if (p->insns[i].code == (BPF_CALL | BPF_JMP)) + continue; + + if (p->insns[i].off != JMP_INVALID) + continue; + p->insns[i].off = p->pos - i - 1; + } +} + +static bool emit_retval(struct nf_hook_prog *p, int retval) +{ + if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, retval))) + return false; + + return emit(p, BPF_EXIT_INSN()); +} + +static bool emit_nf_hook_slow(struct nf_hook_prog *p) +{ + int width = bytes_to_bpf_size(sizeof(void *)); + + /* restore the original state->priv. */ + if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_7, + offsetof(struct nf_hook_state, priv)))) + return false; + + /* arg1 is state->skb */ + if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, + offsetof(struct nf_hook_state, skb)))) + return false; + + /* arg2 is "struct nf_hook_state *" */ + if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6))) + return false; + + /* arg3 is nf_hook_entries (original state->priv) */ + if (!emit(p, BPF_MOV64_REG(BPF_REG_3, BPF_REG_7))) + return false; + + if (!emit(p, BPF_EMIT_CALL(nf_hook_slow))) + return false; + + /* No further action needed, return retval provided by nf_hook_slow */ + return emit(p, BPF_EXIT_INSN()); +} + +static bool emit_nf_queue(struct nf_hook_prog *p) +{ + int width = bytes_to_bpf_size(sizeof(void *)); + + if (width < 0) { + WARN_ON_ONCE(1); + return false; + } + + /* int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int verdict) */ + if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, + offsetof(struct nf_hook_state, skb)))) + return false; + if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8, + offsetof(struct nf_hook_state, hook_index)))) + return false; + /* arg2: struct nf_hook_state * */ + if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6))) + return false; + /* arg3: original hook return value: (NUM << NF_VERDICT_QBITS | NF_QUEUE) */ + if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0))) + return false; + if (!emit(p, BPF_EMIT_CALL(nf_queue))) + return false; + + /* Check nf_queue return value. Abnormal case: nf_queue returned != 0. + * + * Fall back to nf_hook_slow(). + */ + if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2))) + return false; + + /* Normal case: skb was stolen. Return 0. */ + return emit_retval(p, 0); +} + +static bool do_epilogue_base_hooks(struct nf_hook_prog *p) +{ + int width = bytes_to_bpf_size(sizeof(void *)); + + if (WARN_ON_ONCE(width < 0)) + return false; + + /* last 'hook'. We arrive here if previous hook returned ACCEPT, + * i.e. all hooks passed -- we are done. + * + * Return 1, skb can continue traversing network stack. + */ + if (!emit_retval(p, 1)) + return false; + + /* Patch all hook jumps, in case any of these are taken + * we need to jump to this location. + * + * This happens when verdict is != ACCEPT. + */ + patch_hook_jumps(p); + + /* need to ignore upper 24 bits, might contain errno or queue number */ + if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0))) + return false; + if (!emit(p, BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 0xff))) + return false; + + /* ACCEPT handled, check STOLEN. */ + if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_STOLEN, 2))) + return false; + + if (!emit_retval(p, 0)) + return false; + + /* ACCEPT and STOLEN handled. Check DROP next */ + if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_DROP, 1 + 2 + 2 + 2 + 2))) + return false; + + /* First step. Extract the errno number. 1 insn. */ + if (!emit(p, BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, NF_VERDICT_QBITS))) + return false; + + /* Second step: replace errno with EPERM if it was 0. 2 insns. */ + if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1))) + return false; + if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, EPERM))) + return false; + + /* Third step: negate reg0: Caller expects -EFOO and stash the result. 2 insns. */ + if (!emit(p, BPF_ALU32_IMM(BPF_NEG, BPF_REG_0, 0))) + return false; + if (!emit(p, BPF_MOV32_REG(BPF_REG_8, BPF_REG_0))) + return false; + + /* Fourth step: free the skb. 2 insns. */ + if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, + offsetof(struct nf_hook_state, skb)))) + return false; + if (!emit(p, BPF_EMIT_CALL(kfree_skb))) + return false; + + /* Last step: return. 2 insns. */ + if (!emit(p, BPF_MOV32_REG(BPF_REG_0, BPF_REG_8))) + return false; + if (!emit(p, BPF_EXIT_INSN())) + return false; + + /* ACCEPT, STOLEN and DROP have been handled. + * REPEAT and STOP are not allowed anymore for individual hook functions. + * This leaves NFQUEUE as only remaing return value. + * + * In this case BPF_REG_0 still contains the original verdict of + * '(NUM << NF_VERDICT_QBITS | NF_QUEUE)', so pass it to nf_queue() as-is. + */ + if (!emit_nf_queue(p)) + return false; + + /* Increment hook index and store it in nf_hook_state so nf_hook_slow will + * start at the next hook, if any. + */ + if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1))) + return false; + if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8, + offsetof(struct nf_hook_state, hook_index)))) + return false; + + return emit_nf_hook_slow(p); +} + +static int nf_hook_prog_init(struct nf_hook_prog *p) +{ + memset(p, 0, sizeof(*p)); + + p->insns = kcalloc(BPF_MAXINSNS, sizeof(*p->insns), GFP_KERNEL); + if (!p->insns) + return -ENOMEM; + + return 0; +} + +static void nf_hook_prog_free(struct nf_hook_prog *p) +{ + kfree(p->insns); +} + +static int xlate_base_hooks(struct nf_hook_prog *p, const struct nf_hook_entries *e) +{ + unsigned int i, len; + + len = e->num_hook_entries; + + if (!do_prologue(p)) + goto out; + + for (i = 0; i < len; i++) { + if (!xlate_one_hook(p, e, &e->hooks[i])) + goto out; + + if (i + 1 < len) { + if (!emit(p, BPF_MOV64_REG(BPF_REG_1, BPF_REG_6))) + goto out; + + if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1))) + goto out; + } + } + + if (!do_epilogue_base_hooks(p)) + goto out; + + return 0; +out: + return -EINVAL; +} + +static struct bpf_prog *nf_hook_jit_compile(struct bpf_insn *insns, unsigned int len) +{ + struct bpf_prog *prog; + int err = 0; + + prog = bpf_prog_alloc(bpf_prog_size(len), 0); + if (!prog) + return NULL; + + prog->len = len; + prog->type = BPF_PROG_TYPE_SOCKET_FILTER; + memcpy(prog->insnsi, insns, prog->len * sizeof(struct bpf_insn)); + + prog = bpf_prog_select_runtime(prog, &err); + if (err) { + bpf_prog_free(prog); + return NULL; + } + + return prog; +} + +/* fallback program, invokes nf_hook_slow interpreter. + * + * Used when a hook is unregistered and new/replacement program cannot + * be compiled for some reason. + */ +struct bpf_prog *nf_hook_bpf_create_fb(void) +{ + struct bpf_prog *prog; + struct nf_hook_prog p; + int err; + + err = nf_hook_prog_init(&p); + if (err) + return NULL; + + if (!do_prologue(&p)) + goto err; + + if (!emit_nf_hook_slow(&p)) + goto err; + + prog = nf_hook_jit_compile(p.insns, p.pos); +err: + nf_hook_prog_free(&p); + return prog; +} + +struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *new) +{ + struct bpf_prog *prog; + struct nf_hook_prog p; + int err; + + err = nf_hook_prog_init(&p); + if (err) + return NULL; + + err = xlate_base_hooks(&p, new); + if (err) + goto err; + + prog = nf_hook_jit_compile(p.insns, p.pos); +err: + nf_hook_prog_free(&p); + return prog; +} + +void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to) +{ + bpf_dispatcher_change_prog(d, from, to); +}