@@ -80,6 +80,7 @@ typedef unsigned int nf_hookfn(void *priv,
enum nf_hook_ops_type {
NF_HOOK_OP_UNDEFINED,
NF_HOOK_OP_NF_TABLES,
+ NF_HOOK_OP_BPF,
};
struct nf_hook_ops {
new file mode 100644
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if IS_ENABLED(CONFIG_NETFILTER_BPF_LINK)
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+#else
+static inline int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ return -EOPNOTSUPP;
+}
+#endif
@@ -986,6 +986,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LSM,
BPF_PROG_TYPE_SK_LOOKUP,
BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
+ BPF_PROG_TYPE_NETFILTER,
};
enum bpf_attach_type {
@@ -1050,6 +1051,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_PERF_EVENT = 7,
BPF_LINK_TYPE_KPROBE_MULTI = 8,
BPF_LINK_TYPE_STRUCT_OPS = 9,
+ BPF_LINK_TYPE_NETFILTER = 10,
MAX_BPF_LINK_TYPE,
};
@@ -1560,6 +1562,12 @@ union bpf_attr {
*/
__u64 cookie;
} tracing;
+ struct {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+ } netfilter;
};
} link_create;
@@ -6410,6 +6418,12 @@ struct bpf_link_info {
struct {
__u32 map_id;
} struct_ops;
+ struct {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+ } netfilter;
};
} __attribute__((aligned(8)));
@@ -35,6 +35,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
+#include <net/netfilter/nf_bpf_link.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -2462,6 +2463,7 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_EXT: /* extends any prog */
+ case BPF_PROG_TYPE_NETFILTER:
return true;
case BPF_PROG_TYPE_CGROUP_SKB:
/* always unpriv */
@@ -4588,6 +4590,7 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
switch (prog->type) {
case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_NETFILTER:
break;
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
@@ -4654,6 +4657,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr)
case BPF_PROG_TYPE_XDP:
ret = bpf_xdp_link_attach(attr, prog);
break;
+ case BPF_PROG_TYPE_NETFILTER:
+ ret = bpf_nf_link_attach(attr, prog);
+ break;
#endif
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_TRACEPOINT:
@@ -30,6 +30,9 @@ config NETFILTER_FAMILY_BRIDGE
config NETFILTER_FAMILY_ARP
bool
+config NETFILTER_BPF_LINK
+ def_bool BPF_SYSCALL
+
config NETFILTER_NETLINK_HOOK
tristate "Netfilter base hook dump support"
depends on NETFILTER_ADVANCED
@@ -22,6 +22,7 @@ nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o
endif
obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NETFILTER_BPF_LINK) += nf_bpf_link.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
new file mode 100644
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_bpf_link.h>
+#include <uapi/linux/netfilter_ipv4.h>
+
+static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
+ const struct nf_hook_state *s)
+{
+ return NF_ACCEPT;
+}
+
+struct bpf_nf_link {
+ struct bpf_link link;
+ struct nf_hook_ops hook_ops;
+ struct net *net;
+ u32 dead;
+};
+
+static void bpf_nf_link_release(struct bpf_link *link)
+{
+ struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+ if (nf_link->dead)
+ return;
+
+ /* prevent hook-not-found warning splat from netfilter core when
+ * .detach was already called
+ */
+ if (!cmpxchg(&nf_link->dead, 0, 1))
+ nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+}
+
+static void bpf_nf_link_dealloc(struct bpf_link *link)
+{
+ struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+ kfree(nf_link);
+}
+
+static int bpf_nf_link_detach(struct bpf_link *link)
+{
+ bpf_nf_link_release(link);
+ return 0;
+}
+
+static void bpf_nf_link_show_info(const struct bpf_link *link,
+ struct seq_file *seq)
+{
+ struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+ seq_printf(seq, "pf:\t%u\thooknum:\t%u\tprio:\t%d\n",
+ nf_link->hook_ops.pf, nf_link->hook_ops.hooknum,
+ nf_link->hook_ops.priority);
+}
+
+static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
+ struct bpf_link_info *info)
+{
+ struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+
+ info->netfilter.pf = nf_link->hook_ops.pf;
+ info->netfilter.hooknum = nf_link->hook_ops.hooknum;
+ info->netfilter.priority = nf_link->hook_ops.priority;
+ info->netfilter.flags = 0;
+
+ return 0;
+}
+
+static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
+ struct bpf_prog *old_prog)
+{
+ return -EOPNOTSUPP;
+}
+
+static const struct bpf_link_ops bpf_nf_link_lops = {
+ .release = bpf_nf_link_release,
+ .dealloc = bpf_nf_link_dealloc,
+ .detach = bpf_nf_link_detach,
+ .show_fdinfo = bpf_nf_link_show_info,
+ .fill_link_info = bpf_nf_link_fill_link_info,
+ .update_prog = bpf_nf_link_update,
+};
+
+static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
+{
+ switch (attr->link_create.netfilter.pf) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ if (attr->link_create.netfilter.hooknum >= NF_INET_NUMHOOKS)
+ return -EPROTO;
+ break;
+ default:
+ return -EAFNOSUPPORT;
+ }
+
+ if (attr->link_create.netfilter.flags)
+ return -EOPNOTSUPP;
+
+ /* make sure conntrack confirm is always last.
+ *
+ * In the future, if userspace can e.g. request defrag, then
+ * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
+ * should fail.
+ */
+ switch (attr->link_create.netfilter.priority) {
+ case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
+ case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
+ }
+
+ return 0;
+}
+
+int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+ struct net *net = current->nsproxy->net_ns;
+ struct bpf_link_primer link_primer;
+ struct bpf_nf_link *link;
+ int err;
+
+ if (attr->link_create.flags)
+ return -EINVAL;
+
+ err = bpf_nf_check_pf_and_hooks(attr);
+ if (err)
+ return err;
+
+ link = kzalloc(sizeof(*link), GFP_USER);
+ if (!link)
+ return -ENOMEM;
+
+ bpf_link_init(&link->link, BPF_LINK_TYPE_NETFILTER, &bpf_nf_link_lops, prog);
+
+ link->hook_ops.hook = nf_hook_run_bpf;
+ link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
+ link->hook_ops.priv = prog;
+
+ link->hook_ops.pf = attr->link_create.netfilter.pf;
+ link->hook_ops.priority = attr->link_create.netfilter.priority;
+ link->hook_ops.hooknum = attr->link_create.netfilter.hooknum;
+
+ link->net = net;
+ link->dead = false;
+
+ err = bpf_link_prime(&link->link, &link_primer);
+ if (err) {
+ kfree(link);
+ return err;
+ }
+
+ err = nf_register_net_hook(net, &link->hook_ops);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+
+ return bpf_link_settle(&link_primer);
+}
Add bpf_link support skeleton. To keep this reviewable, no bpf program can be invoked yet, if a program is attached only a c-stub is called and not the actual bpf program. Defaults to 'y' if both netfilter and bpf syscall are enabled in kconfig. Uapi example usage: union bpf_attr attr = { }; attr.link_create.prog_fd = progfd; attr.link_create.attach_type = 0; /* unused */ attr.link_create.netfilter.pf = PF_INET; attr.link_create.netfilter.hooknum = NF_INET_LOCAL_IN; attr.link_create.netfilter.priority = -128; err = bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); ... this would attach progfd to ipv4:input hook. Such hook gets removed automatically if the calling program exits. BPF_NETFILTER program invocation is added in followup change. NF_HOOK_OP_BPF enum will eventually be read from nfnetlink_hook, it allows to tell userspace which program is attached at the given hook when user runs 'nft hook list' command rather than just the priority and not-very-helpful 'this hook runs a bpf prog but I can't tell which one'. Will also be used to disallow registration of two bpf programs with same priority in a followup patch. v4: arm32 cmpxchg only supports 32bit operand s/prio/priority/ v3: restrict prog attachment to ip/ip6 for now, lets lift restrictions if more use cases pop up (arptables, ebtables, netdev ingress/egress etc). Signed-off-by: Florian Westphal <fw@strlen.de> --- include/linux/netfilter.h | 1 + include/net/netfilter/nf_bpf_link.h | 10 ++ include/uapi/linux/bpf.h | 14 +++ kernel/bpf/syscall.c | 6 ++ net/netfilter/Kconfig | 3 + net/netfilter/Makefile | 1 + net/netfilter/nf_bpf_link.c | 159 ++++++++++++++++++++++++++++ 7 files changed, 194 insertions(+) create mode 100644 include/net/netfilter/nf_bpf_link.h create mode 100644 net/netfilter/nf_bpf_link.c