From patchwork Wed Oct  5 14:13:06 2022
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Florian Westphal <fw@strlen.de>
X-Patchwork-Id: 12999279
X-Patchwork-Delegate: kuba@kernel.org
Return-Path: <bpf-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 7AD7CC4332F
	for <bpf@archiver.kernel.org>; Wed,  5 Oct 2022 14:13:55 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S229864AbiJEONx (ORCPT <rfc822;bpf@archiver.kernel.org>);
        Wed, 5 Oct 2022 10:13:53 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:41598 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S230019AbiJEONw (ORCPT <rfc822;bpf@vger.kernel.org>);
        Wed, 5 Oct 2022 10:13:52 -0400
Received: from Chamillionaire.breakpoint.cc (Chamillionaire.breakpoint.cc
 [IPv6:2a0a:51c0:0:12e:520::1])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 9FD31786CA
        for <bpf@vger.kernel.org>; Wed,  5 Oct 2022 07:13:50 -0700 (PDT)
Received: from fw by Chamillionaire.breakpoint.cc with local (Exim 4.92)
        (envelope-from <fw@breakpoint.cc>)
        id 1og59h-0001fO-37; Wed, 05 Oct 2022 16:13:49 +0200
From: Florian Westphal <fw@strlen.de>
To: bpf@vger.kernel.org
Cc: Florian Westphal <fw@strlen.de>
Subject: [RFC v2 6/9] netfilter: add bpf base hook program generator
Date: Wed,  5 Oct 2022 16:13:06 +0200
Message-Id: <20221005141309.31758-7-fw@strlen.de>
X-Mailer: git-send-email 2.35.1
In-Reply-To: <20221005141309.31758-1-fw@strlen.de>
References: <20221005141309.31758-1-fw@strlen.de>
MIME-Version: 1.0
Precedence: bulk
List-ID: <bpf.vger.kernel.org>
X-Mailing-List: bpf@vger.kernel.org
X-Patchwork-Delegate: kuba@kernel.org
X-Patchwork-State: RFC

Add a kernel bpf program generator for netfilter base hooks.

Currently netfilter hooks are invoked by nf_hook_slow:

for i in hooks; do
  verdict = hooks[i]->indirect_func(hooks->[i].hook_arg, skb, state);

  switch (verdict) { ....

The autogenerator unrolls the loop, so we get:

state->priv = hooks->[0].hook_arg;
v = first_hook_function(state);
if (v != ACCEPT) goto done;
state->priv = hooks->[1].hook_arg;
v = second_hook_function(state); ...

Indirections are replaced by direct calls. Invocation of the
autogenerated programs is done via bpf dispatcher from nf_hook().

The autogenerated program has the same return value scheme as
nf_hook_slow(). NF_HOOK() points are converted to call the
autogenerated bpf program instead of nf_hook_slow().

Purpose of this is to eventually add a 'netfilter prog type' to bpf and
permit attachment of (userspace generated) bpf programs to the netfilter
machinery, e.g.  'attach bpf prog id 1234 to ipv6 PREROUTING at prio -300'.

This will require to expose the context structure (program argument,
'__nf_hook_state', with rewriting accesses to match nf_hook_state layout.

Nat hooks are still handled via indirect calls, but they are only called
once per connection.

Signed-off-by: Florian Westphal <fw@strlen.de>
---
 include/linux/netfilter.h           |  66 ++++-
 include/net/netfilter/nf_hook_bpf.h |  21 ++
 net/netfilter/Kconfig               |  10 +
 net/netfilter/Makefile              |   1 +
 net/netfilter/core.c                |  92 +++++-
 net/netfilter/nf_hook_bpf.c         | 424 ++++++++++++++++++++++++++++
 6 files changed, 605 insertions(+), 9 deletions(-)
 create mode 100644 include/net/netfilter/nf_hook_bpf.h
 create mode 100644 net/netfilter/nf_hook_bpf.c

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 7c604ef8e8cb..b7874b772dd1 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_NETFILTER_H
 #define __LINUX_NETFILTER_H
 
+#include <linux/filter.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/net.h>
@@ -106,6 +107,9 @@ struct nf_hook_entries_rcu_head {
 };
 
 struct nf_hook_entries {
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	struct bpf_prog			*hook_prog;
+#endif
 	u16				num_hook_entries;
 	/* padding */
 	struct nf_hook_entry		hooks[];
@@ -205,6 +209,17 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 
 void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
 		       const struct nf_hook_entries *e);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+DECLARE_BPF_DISPATCHER(nf_hook_base);
+
+static __always_inline int bpf_prog_run_nf(const struct bpf_prog *prog,
+					   struct nf_hook_state *state)
+{
+	return __bpf_prog_run(prog, state, BPF_DISPATCHER_FUNC(nf_hook_base));
+}
+#endif
+
 /**
  *	nf_hook - call a netfilter hook
  *
@@ -213,17 +228,17 @@ void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
  *	value indicates the packet has been consumed by the hook.
  */
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
-			  struct sock *sk, struct sk_buff *skb,
-			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+		struct sock *sk, struct sk_buff *skb,
+		struct net_device *indev, struct net_device *outdev,
+		int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	struct nf_hook_entries *hook_head = NULL;
 	int ret = 1;
 
 #ifdef CONFIG_JUMP_LABEL
 	if (__builtin_constant_p(pf) &&
-	    __builtin_constant_p(hook) &&
-	    !static_key_false(&nf_hooks_needed[pf][hook]))
+			__builtin_constant_p(hook) &&
+			!static_key_false(&nf_hooks_needed[pf][hook]))
 		return 1;
 #endif
 
@@ -254,11 +269,24 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 
 	if (hook_head) {
 		struct nf_hook_state state;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+		const struct bpf_prog *p = READ_ONCE(hook_head->hook_prog);
+
+		nf_hook_state_init(&state, hook, pf, indev, outdev,
+				   sk, net, okfn);
+
+		state.priv = (void *)hook_head;
+		state.skb = skb;
 
+		migrate_disable();
+		ret = bpf_prog_run_nf(p, &state);
+		migrate_enable();
+#else
 		nf_hook_state_init(&state, hook, pf, indev, outdev,
 				   sk, net, okfn);
 
 		ret = nf_hook_slow(skb, &state, hook_head);
+#endif
 	}
 	rcu_read_unlock();
 
@@ -336,10 +364,38 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 
 	if (hook_head) {
 		struct nf_hook_state state;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+		const struct bpf_prog *p = hook_head->hook_prog;
+		struct sk_buff *skb, *next;
+		struct list_head sublist;
+		int ret;
+
+		nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
+
+		INIT_LIST_HEAD(&sublist);
 
+		migrate_disable();
+
+		list_for_each_entry_safe(skb, next, head, list) {
+			skb_list_del_init(skb);
+
+			state.priv = (void *)hook_head;
+			state.skb = skb;
+
+			ret = bpf_prog_run_nf(p, &state);
+			if (ret == 1)
+				list_add_tail(&skb->list, &sublist);
+		}
+
+		migrate_enable();
+
+		/* Put passed packets back on main list */
+		list_splice(&sublist, head);
+#else
 		nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
 
 		nf_hook_slow_list(head, &state, hook_head);
+#endif
 	}
 	rcu_read_unlock();
 }
diff --git a/include/net/netfilter/nf_hook_bpf.h b/include/net/netfilter/nf_hook_bpf.h
new file mode 100644
index 000000000000..1792f97a806d
--- /dev/null
+++ b/include/net/netfilter/nf_hook_bpf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+struct bpf_dispatcher;
+struct bpf_prog;
+
+struct bpf_prog *nf_hook_bpf_create_fb(void);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n);
+
+void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to);
+#else
+static inline void
+nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *f, struct bpf_prog *t)
+{
+}
+
+static inline struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n)
+{
+	return NULL;
+}
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 4b8d04640ff3..2610786b6ad8 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -30,6 +30,16 @@ config NETFILTER_FAMILY_BRIDGE
 config NETFILTER_FAMILY_ARP
 	bool
 
+config HAVE_NF_HOOK_BPF
+	bool
+
+config NF_HOOK_BPF
+	bool "netfilter base hook bpf translator"
+	depends on BPF_JIT
+	help
+	  This unrolls the nf_hook_slow interpreter loop with
+	  auto-generated BPF program.
+
 config NETFILTER_NETLINK_HOOK
 	tristate "Netfilter base hook dump support"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 06df49ea6329..e465659e87ad 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -21,6 +21,7 @@ nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
 endif
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NF_HOOK_BPF) += nf_hook_bpf.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 17165f9cf4a1..6888c7fd5aeb 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -24,6 +24,7 @@
 #include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_hook_bpf.h>
 #include <net/sock.h>
 
 #include "nf_internals.h"
@@ -47,6 +48,33 @@ static DEFINE_MUTEX(nf_hook_mutex);
 #define nf_entry_dereference(e) \
 	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+DEFINE_BPF_DISPATCHER(nf_hook_base);
+
+#define NF_DISPATCHER_PTR	BPF_DISPATCHER_PTR(nf_hook_base)
+#else
+#define NF_DISPATCHER_PTR	NULL
+#endif
+
+static struct bpf_prog *fallback_nf_hook_slow;
+
+static void nf_hook_bpf_prog_set(struct nf_hook_entries *e,
+				 struct bpf_prog *p)
+{
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	WRITE_ONCE(e->hook_prog, p);
+#endif
+}
+
+static struct bpf_prog *nf_hook_bpf_prog_get(struct nf_hook_entries *e)
+{
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	if (e)
+		return e->hook_prog;
+#endif
+	return NULL;
+}
+
 static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 {
 	struct nf_hook_entries *e;
@@ -58,9 +86,23 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 	if (num == 0)
 		return NULL;
 
-	e = kvzalloc(alloc, GFP_KERNEL_ACCOUNT);
-	if (e)
-		e->num_hook_entries = num;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	if (!fallback_nf_hook_slow) {
+		/* never free'd */
+		fallback_nf_hook_slow = nf_hook_bpf_create_fb();
+
+		if (!fallback_nf_hook_slow)
+			return NULL;
+	}
+#endif
+
+	e = kvzalloc(alloc, GFP_KERNEL);
+	if (!e)
+		return NULL;
+
+	e->num_hook_entries = num;
+	nf_hook_bpf_prog_set(e, fallback_nf_hook_slow);
+
 	return e;
 }
 
@@ -98,6 +140,29 @@ static const struct nf_hook_ops dummy_ops = {
 	.priority = INT_MIN,
 };
 
+static void nf_hook_entries_grow_bpf(const struct nf_hook_entries *old,
+				     struct nf_hook_entries *new)
+{
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	struct bpf_prog *hook_bpf_prog = nf_hook_bpf_create(new);
+
+	/* allocate_hook_entries_size() pre-inits new->hook_prog
+	 * to a fallback program that calls nf_hook_slow().
+	 */
+	if (hook_bpf_prog) {
+		struct bpf_prog *old_prog = NULL;
+
+		new->hook_prog = hook_bpf_prog;
+
+		if (old)
+			old_prog = old->hook_prog;
+
+		nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
+					old_prog, hook_bpf_prog);
+	}
+#endif
+}
+
 static struct nf_hook_entries *
 nf_hook_entries_grow(const struct nf_hook_entries *old,
 		     const struct nf_hook_ops *reg)
@@ -156,6 +221,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
 		new->hooks[nhooks].priv = reg->priv;
 	}
 
+	nf_hook_entries_grow_bpf(old, new);
 	return new;
 }
 
@@ -221,6 +287,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 					  struct nf_hook_entries __rcu **pp)
 {
 	unsigned int i, j, skip = 0, hook_entries;
+	struct bpf_prog *hook_bpf_prog = NULL;
 	struct nf_hook_entries *new = NULL;
 	struct nf_hook_ops **orig_ops;
 	struct nf_hook_ops **new_ops;
@@ -244,8 +311,13 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 
 	hook_entries -= skip;
 	new = allocate_hook_entries_size(hook_entries);
-	if (!new)
+	if (!new) {
+		struct bpf_prog *old_prog = nf_hook_bpf_prog_get(old);
+
+		nf_hook_bpf_prog_set(old, fallback_nf_hook_slow);
+		nf_hook_bpf_change_prog(NF_DISPATCHER_PTR, old_prog, NULL);
 		return NULL;
+	}
 
 	new_ops = nf_hook_entries_get_hook_ops(new);
 	for (i = 0, j = 0; i < old->num_hook_entries; i++) {
@@ -256,7 +328,13 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 		j++;
 	}
 	hooks_validate(new);
+
+	/* if this fails fallback prog calls nf_hook_slow. */
+	hook_bpf_prog = nf_hook_bpf_create(new);
+	if (hook_bpf_prog)
+		nf_hook_bpf_prog_set(new, hook_bpf_prog);
 out_assign:
+	nf_hook_bpf_change_prog(NF_DISPATCHER_PTR, nf_hook_bpf_prog_get(old), hook_bpf_prog);
 	rcu_assign_pointer(*pp, new);
 	return old;
 }
@@ -609,6 +687,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 	int ret;
 
 	state->skb = skb;
+
 	for (; s < e->num_hook_entries; s++) {
 		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 		switch (verdict & NF_VERDICT_MASK) {
@@ -783,6 +862,11 @@ int __init netfilter_init(void)
 	if (ret < 0)
 		goto err_pernet;
 
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	fallback_nf_hook_slow = nf_hook_bpf_create_fb();
+	WARN_ON_ONCE(!fallback_nf_hook_slow);
+#endif
+
 	return 0;
 err_pernet:
 	unregister_pernet_subsys(&netfilter_net_ops);
diff --git a/net/netfilter/nf_hook_bpf.c b/net/netfilter/nf_hook_bpf.c
new file mode 100644
index 000000000000..dab13b803801
--- /dev/null
+++ b/net/netfilter/nf_hook_bpf.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_hook_bpf.h>
+#include <net/netfilter/nf_queue.h>
+
+#define JMP_INVALID 0
+#define JIT_SIZE_MAX 0xffff
+
+/* BPF translator for netfilter hooks.
+ *
+ * Create a bpf program that can be called *instead* of nf_hook_slow().
+ * This program thus has same return value as nf_hook_slow and
+ * handles nfqueue and packet drops internally.
+ * Call nf_hook_bpf_create(struct nf_hook_entries *e, NF_HOOK_BPF_TYPE_BASE)
+ * to unroll the functions described by nf_hook_entries into such
+ * a bpf program.
+ *
+ * These bpf programs are called/run from nf_hook() inline function.
+ *
+ * Register usage is:
+ *
+ * BPF_REG_0: verdict.
+ * BPF_REG_1: struct nf_hook_state *
+ * BPF_REG_2: reserved as arg to nf_queue()
+ * BPF_REG_3: reserved as arg to nf_queue()
+ *
+ * Prologue storage:
+ * BPF_REG_6: copy of REG_1 (original struct nf_hook_state *)
+ * BPF_REG_7: copy of original state->priv value
+ * BPF_REG_8: copy of state->hook_index
+ */
+struct nf_hook_prog {
+	struct bpf_insn *insns;
+	unsigned int pos;
+};
+
+static bool emit(struct nf_hook_prog *p, struct bpf_insn insn)
+{
+	if (WARN_ON_ONCE(p->pos >= BPF_MAXINSNS))
+		return false;
+
+	p->insns[p->pos] = insn;
+	p->pos++;
+	return true;
+}
+
+static bool xlate_one_hook(struct nf_hook_prog *p, const struct nf_hook_entries *e,
+			   const struct nf_hook_entry *h)
+{
+	int width = bytes_to_bpf_size(sizeof(h->priv));
+
+	/* if priv is NULL, the called hookfn does not use the priv member. */
+	if (!h->priv)
+		goto emit_hook_call;
+
+	if (WARN_ON_ONCE(width < 0))
+		return false;
+
+	/* x = entries[s]->priv; */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_2, BPF_REG_7,
+				 (unsigned long)&h->priv - (unsigned long)e)))
+		return false;
+
+	/* state->priv = x */
+	if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_2,
+				 offsetof(struct nf_hook_state, priv))))
+		return false;
+
+emit_hook_call:
+	if (!emit(p, BPF_EMIT_CALL(h->hook)))
+		return false;
+
+	/* Only advance to next hook on ACCEPT verdict.
+	 * Else, skip rest and move to tail.
+	 *
+	 * Postprocessing patches the jump offset to the
+	 * correct position, after last hook.
+	 */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, NF_ACCEPT, JMP_INVALID)))
+		return false;
+
+	return true;
+}
+
+static bool emit_mov_ptr_reg(struct nf_hook_prog *p, u8 dreg, u8 sreg)
+{
+	if (sizeof(void *) == sizeof(u64))
+		return emit(p, BPF_MOV64_REG(dreg, sreg));
+	if (sizeof(void *) == sizeof(u32))
+		return emit(p, BPF_MOV32_REG(dreg, sreg));
+
+	return false;
+}
+
+static bool do_prologue(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	if (WARN_ON_ONCE(width < 0))
+		return false;
+
+	/* argument to program is a pointer to struct nf_hook_state, in BPF_REG_1. */
+	if (!emit_mov_ptr_reg(p, BPF_REG_6, BPF_REG_1))
+		return false;
+
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_7, BPF_REG_1,
+				 offsetof(struct nf_hook_state, priv))))
+		return false;
+
+	/* could load state->hook_index, but we don't support index > 0 for bpf call. */
+	if (!emit(p, BPF_MOV32_IMM(BPF_REG_8, 0)))
+		return false;
+
+	return true;
+}
+
+static void patch_hook_jumps(struct nf_hook_prog *p)
+{
+	unsigned int i;
+
+	if (!p->insns)
+		return;
+
+	for (i = 0; i < p->pos; i++) {
+		if (BPF_CLASS(p->insns[i].code) != BPF_JMP)
+			continue;
+
+		if (p->insns[i].code == (BPF_EXIT | BPF_JMP))
+			continue;
+		if (p->insns[i].code == (BPF_CALL | BPF_JMP))
+			continue;
+
+		if (p->insns[i].off != JMP_INVALID)
+			continue;
+		p->insns[i].off = p->pos - i - 1;
+	}
+}
+
+static bool emit_retval(struct nf_hook_prog *p, int retval)
+{
+	if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, retval)))
+		return false;
+
+	return emit(p, BPF_EXIT_INSN());
+}
+
+static bool emit_nf_hook_slow(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	/* restore the original state->priv. */
+	if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_7,
+				 offsetof(struct nf_hook_state, priv))))
+		return false;
+
+	/* arg1 is state->skb */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6,
+				 offsetof(struct nf_hook_state, skb))))
+		return false;
+
+	/* arg2 is "struct nf_hook_state *" */
+	if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)))
+		return false;
+
+	/* arg3 is nf_hook_entries (original state->priv) */
+	if (!emit(p, BPF_MOV64_REG(BPF_REG_3, BPF_REG_7)))
+		return false;
+
+	if (!emit(p, BPF_EMIT_CALL(nf_hook_slow)))
+		return false;
+
+	/* No further action needed, return retval provided by nf_hook_slow */
+	return emit(p, BPF_EXIT_INSN());
+}
+
+static bool emit_nf_queue(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	if (width < 0) {
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	/* int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int verdict) */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6,
+				 offsetof(struct nf_hook_state, skb))))
+		return false;
+	if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8,
+				 offsetof(struct nf_hook_state, hook_index))))
+		return false;
+	/* arg2: struct nf_hook_state * */
+	if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)))
+		return false;
+	/* arg3: original hook return value: (NUM << NF_VERDICT_QBITS | NF_QUEUE) */
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0)))
+		return false;
+	if (!emit(p, BPF_EMIT_CALL(nf_queue)))
+		return false;
+
+	/* Check nf_queue return value.  Abnormal case: nf_queue returned != 0.
+	 *
+	 * Fall back to nf_hook_slow().
+	 */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2)))
+		return false;
+
+	/* Normal case: skb was stolen. Return 0. */
+	return emit_retval(p, 0);
+}
+
+static bool do_epilogue_base_hooks(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	if (WARN_ON_ONCE(width < 0))
+		return false;
+
+	/* last 'hook'. We arrive here if previous hook returned ACCEPT,
+	 * i.e. all hooks passed -- we are done.
+	 *
+	 * Return 1, skb can continue traversing network stack.
+	 */
+	if (!emit_retval(p, 1))
+		return false;
+
+	/* Patch all hook jumps, in case any of these are taken
+	 * we need to jump to this location.
+	 *
+	 * This happens when verdict is != ACCEPT.
+	 */
+	patch_hook_jumps(p);
+
+	/* need to ignore upper 24 bits, might contain errno or queue number */
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0)))
+		return false;
+	if (!emit(p, BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 0xff)))
+		return false;
+
+	/* ACCEPT handled, check STOLEN. */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_STOLEN, 2)))
+		return false;
+
+	if (!emit_retval(p, 0))
+		return false;
+
+	/* ACCEPT and STOLEN handled.  Check DROP next */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_DROP, 1 + 2 + 2 + 2 + 2)))
+		return false;
+
+	/* First step. Extract the errno number. 1 insn. */
+	if (!emit(p, BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, NF_VERDICT_QBITS)))
+		return false;
+
+	/* Second step: replace errno with EPERM if it was 0. 2 insns. */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1)))
+		return false;
+	if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, EPERM)))
+		return false;
+
+	/* Third step: negate reg0: Caller expects -EFOO and stash the result.  2 insns. */
+	if (!emit(p, BPF_ALU32_IMM(BPF_NEG, BPF_REG_0, 0)))
+		return false;
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_8, BPF_REG_0)))
+		return false;
+
+	/* Fourth step: free the skb. 2 insns. */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6,
+				 offsetof(struct nf_hook_state, skb))))
+		return false;
+	if (!emit(p, BPF_EMIT_CALL(kfree_skb)))
+		return false;
+
+	/* Last step: return. 2 insns. */
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_0, BPF_REG_8)))
+		return false;
+	if (!emit(p, BPF_EXIT_INSN()))
+		return false;
+
+	/* ACCEPT, STOLEN and DROP have been handled.
+	 * REPEAT and STOP are not allowed anymore for individual hook functions.
+	 * This leaves NFQUEUE as only remaing return value.
+	 *
+	 * In this case BPF_REG_0 still contains the original verdict of
+	 * '(NUM << NF_VERDICT_QBITS | NF_QUEUE)', so pass it to nf_queue() as-is.
+	 */
+	if (!emit_nf_queue(p))
+		return false;
+
+	/* Increment hook index and store it in nf_hook_state so nf_hook_slow will
+	 * start at the next hook, if any.
+	 */
+	if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1)))
+		return false;
+	if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8,
+				 offsetof(struct nf_hook_state, hook_index))))
+		return false;
+
+	return emit_nf_hook_slow(p);
+}
+
+static int nf_hook_prog_init(struct nf_hook_prog *p)
+{
+	memset(p, 0, sizeof(*p));
+
+	p->insns = kcalloc(BPF_MAXINSNS, sizeof(*p->insns), GFP_KERNEL);
+	if (!p->insns)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nf_hook_prog_free(struct nf_hook_prog *p)
+{
+	kfree(p->insns);
+}
+
+static int xlate_base_hooks(struct nf_hook_prog *p, const struct nf_hook_entries *e)
+{
+	unsigned int i, len;
+
+	len = e->num_hook_entries;
+
+	if (!do_prologue(p))
+		goto out;
+
+	for (i = 0; i < len; i++) {
+		if (!xlate_one_hook(p, e, &e->hooks[i]))
+			goto out;
+
+		if (i + 1 < len) {
+			if (!emit(p, BPF_MOV64_REG(BPF_REG_1, BPF_REG_6)))
+				goto out;
+
+			if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1)))
+				goto out;
+		}
+	}
+
+	if (!do_epilogue_base_hooks(p))
+		goto out;
+
+	return 0;
+out:
+	return -EINVAL;
+}
+
+static struct bpf_prog *nf_hook_jit_compile(struct bpf_insn *insns, unsigned int len)
+{
+	struct bpf_prog *prog;
+	int err = 0;
+
+	prog = bpf_prog_alloc(bpf_prog_size(len), 0);
+	if (!prog)
+		return NULL;
+
+	prog->len = len;
+	prog->type = BPF_PROG_TYPE_SOCKET_FILTER;
+	memcpy(prog->insnsi, insns, prog->len * sizeof(struct bpf_insn));
+
+	prog = bpf_prog_select_runtime(prog, &err);
+	if (err) {
+		bpf_prog_free(prog);
+		return NULL;
+	}
+
+	return prog;
+}
+
+/* fallback program, invokes nf_hook_slow interpreter.
+ *
+ * Used when a hook is unregistered and new/replacement program cannot
+ * be compiled for some reason.
+ */
+struct bpf_prog *nf_hook_bpf_create_fb(void)
+{
+	struct bpf_prog *prog;
+	struct nf_hook_prog p;
+	int err;
+
+	err = nf_hook_prog_init(&p);
+	if (err)
+		return NULL;
+
+	if (!do_prologue(&p))
+		goto err;
+
+	if (!emit_nf_hook_slow(&p))
+		goto err;
+
+	prog = nf_hook_jit_compile(p.insns, p.pos);
+err:
+	nf_hook_prog_free(&p);
+	return prog;
+}
+
+struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *new)
+{
+	struct bpf_prog *prog;
+	struct nf_hook_prog p;
+	int err;
+
+	err = nf_hook_prog_init(&p);
+	if (err)
+		return NULL;
+
+	err = xlate_base_hooks(&p, new);
+	if (err)
+		goto err;
+
+	prog = nf_hook_jit_compile(p.insns, p.pos);
+err:
+	nf_hook_prog_free(&p);
+	return prog;
+}
+
+void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to)
+{
+	bpf_dispatcher_change_prog(d, from, to);
+}