diff mbox series

[bpf-next,v3,03/10] af_unix: implement ->psock_update_sk_prot()

Message ID 20210426025001.7899-4-xiyou.wangcong@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series sockmap: add sockmap support to Unix datagram socket | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 13 maintainers not CCed: gustavoars@kernel.org yhs@fb.com kpsingh@kernel.org andrii@kernel.org jingxiangfeng@huawei.com kafai@fb.com orcohen2006@gmail.com ast@kernel.org jamorris@linux.microsoft.com songliubraving@fb.com christian.brauner@ubuntu.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 73 this patch: 73
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: memory barrier without comment
netdev/build_allmodconfig_warn success Errors and warnings before: 72 this patch: 72
netdev/header_inline success Link

Commit Message

Cong Wang April 26, 2021, 2:49 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

unix_proto is special, it is very different from INET proto,
which even does not have a ->close(). We have to add a dummy
one to satisfy sockmap.

And now we can implement unix_bpf_update_proto() to update
sk_prot.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jakub Sitnicki <jakub@cloudflare.com>
Cc: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 MAINTAINERS           |  1 +
 include/net/af_unix.h | 10 +++++++++
 net/core/sock_map.c   |  1 +
 net/unix/Makefile     |  1 +
 net/unix/af_unix.c    | 12 ++++++++++-
 net/unix/unix_bpf.c   | 47 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 net/unix/unix_bpf.c

Comments

Jakub Sitnicki May 6, 2021, 1:04 p.m. UTC | #1
On Mon, Apr 26, 2021 at 04:49 AM CEST, Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
>
> unix_proto is special, it is very different from INET proto,
> which even does not have a ->close(). We have to add a dummy
> one to satisfy sockmap.
>
> And now we can implement unix_bpf_update_proto() to update
> sk_prot.
>
> Cc: John Fastabend <john.fastabend@gmail.com>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Jakub Sitnicki <jakub@cloudflare.com>
> Cc: Lorenz Bauer <lmb@cloudflare.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---

[...]

> diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
> new file mode 100644
> index 000000000000..b1582a659427
> --- /dev/null
> +++ b/net/unix/unix_bpf.c
> @@ -0,0 +1,47 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
> +
> +#include <linux/skmsg.h>
> +#include <linux/bpf.h>
> +#include <net/sock.h>
> +#include <net/af_unix.h>
> +
> +static struct proto *unix_prot_saved __read_mostly;
> +static DEFINE_SPINLOCK(unix_prot_lock);
> +static struct proto unix_bpf_prot;
> +
> +static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
> +{
> +	*prot        = *base;
> +	prot->close  = sock_map_close;
> +}

I think we also need unhash so that socket gets removed from sockmap
on disconnect, that is connect(fd, {sa_family=AF_UNSPEC, ...}, ...).
Cong Wang May 7, 2021, 12:55 a.m. UTC | #2
On Thu, May 6, 2021 at 6:04 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
> I think we also need unhash so that socket gets removed from sockmap
> on disconnect, that is connect(fd, {sa_family=AF_UNSPEC, ...}, ...).

Excellent catch! I thought disconnect is not supported for AF_UNIX
as there is not ->disconnect() in af_unix.c, but after reading
unix_dgram_connect() again, it is actually supported. Let me think
about how to handle this properly here.

Thanks.
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index 217c7470bfa9..02532e11da5b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10000,6 +10000,7 @@  F:	net/core/skmsg.c
 F:	net/core/sock_map.c
 F:	net/ipv4/tcp_bpf.c
 F:	net/ipv4/udp_bpf.c
+F:	net/unix/unix_bpf.c
 
 LANTIQ / INTEL Ethernet drivers
 M:	Hauke Mehrtens <hauke@hauke-m.de>
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index f42fdddecd41..cca645846af1 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -89,4 +89,14 @@  void unix_sysctl_unregister(struct net *net);
 static inline int unix_sysctl_register(struct net *net) { return 0; }
 static inline void unix_sysctl_unregister(struct net *net) {}
 #endif
+
+#ifdef CONFIG_BPF_SYSCALL
+extern struct proto unix_proto;
+
+int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+void __init unix_bpf_build_proto(void);
+#else
+static inline void __init unix_bpf_build_proto(void)
+{}
+#endif
 #endif
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 6f1b82b8ad49..1107c9dcc969 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1536,6 +1536,7 @@  void sock_map_close(struct sock *sk, long timeout)
 	release_sock(sk);
 	saved_close(sk, timeout);
 }
+EXPORT_SYMBOL_GPL(sock_map_close);
 
 static int sock_map_iter_attach_target(struct bpf_prog *prog,
 				       union bpf_iter_link_info *linfo,
diff --git a/net/unix/Makefile b/net/unix/Makefile
index 54e58cc4f945..20491825b4d0 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -7,6 +7,7 @@  obj-$(CONFIG_UNIX)	+= unix.o
 
 unix-y			:= af_unix.o garbage.o
 unix-$(CONFIG_SYSCTL)	+= sysctl_net_unix.o
+unix-$(CONFIG_BPF_SYSCALL) += unix_bpf.o
 
 obj-$(CONFIG_UNIX_DIAG)	+= unix_diag.o
 unix_diag-y		:= diag.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f4dc22db371d..8968ed44a89f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -772,10 +772,18 @@  static const struct proto_ops unix_seqpacket_ops = {
 	.show_fdinfo =	unix_show_fdinfo,
 };
 
-static struct proto unix_proto = {
+static void unix_close(struct sock *sk, long timeout)
+{
+}
+
+struct proto unix_proto = {
 	.name			= "UNIX",
 	.owner			= THIS_MODULE,
 	.obj_size		= sizeof(struct unix_sock),
+	.close			= unix_close,
+#ifdef CONFIG_BPF_SYSCALL
+	.psock_update_sk_prot	= unix_bpf_update_proto,
+#endif
 };
 
 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
@@ -859,6 +867,7 @@  static int unix_release(struct socket *sock)
 	if (!sk)
 		return 0;
 
+	sk->sk_prot->close(sk, 0);
 	unix_release_sock(sk, 0);
 	sock->sk = NULL;
 
@@ -2958,6 +2967,7 @@  static int __init af_unix_init(void)
 
 	sock_register(&unix_family_ops);
 	register_pernet_subsys(&unix_net_ops);
+	unix_bpf_build_proto();
 out:
 	return rc;
 }
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
new file mode 100644
index 000000000000..b1582a659427
--- /dev/null
+++ b/net/unix/unix_bpf.c
@@ -0,0 +1,47 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Cong Wang <cong.wang@bytedance.com> */
+
+#include <linux/skmsg.h>
+#include <linux/bpf.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
+
+static struct proto *unix_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(unix_prot_lock);
+static struct proto unix_bpf_prot;
+
+static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
+{
+	*prot        = *base;
+	prot->close  = sock_map_close;
+}
+
+static void unix_bpf_check_needs_rebuild(struct proto *ops)
+{
+	if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) {
+		spin_lock_bh(&unix_prot_lock);
+		if (likely(ops != unix_prot_saved)) {
+			unix_bpf_rebuild_protos(&unix_bpf_prot, ops);
+			smp_store_release(&unix_prot_saved, ops);
+		}
+		spin_unlock_bh(&unix_prot_lock);
+	}
+}
+
+int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
+{
+	if (restore) {
+		sk->sk_write_space = psock->saved_write_space;
+		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
+		return 0;
+	}
+
+	unix_bpf_check_needs_rebuild(psock->sk_proto);
+	WRITE_ONCE(sk->sk_prot, &unix_bpf_prot);
+	return 0;
+}
+
+void __init unix_bpf_build_proto(void)
+{
+	unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto);
+}