diff mbox series

[RFC,net-next,17/20] net/smc: add dummy implementation for inet smc sock

Message ID 1708412505-34470-18-git-send-email-alibuda@linux.alibaba.com (mailing list archive)
State Superseded
Headers show
Series Introduce IPPROTO_SMC | expand

Commit Message

D. Wythe Feb. 20, 2024, 7:01 a.m. UTC
From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch implements a dummy version of inet smc sock,
and register it into the inet protocols, which allows
us to create a inet smc sock.

Note that, the ops is forked from tcp ops. The vast majority of fields
are consistent with TCP, and those cannot be consistent, mainly including,

1. obj_size
2. tw_prot and rsk_prot
3. function than need to be override, explicitly set to NULL.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/smc/Makefile   |   1 +
 net/smc/af_smc.c   |  46 +++++++-
 net/smc/smc_inet.c | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_inet.h |  86 +++++++++++++++
 4 files changed, 447 insertions(+), 1 deletion(-)
 create mode 100644 net/smc/smc_inet.c
 create mode 100644 net/smc/smc_inet.h
diff mbox series

Patch

diff --git a/net/smc/Makefile b/net/smc/Makefile
index 875efcd..4f10c3b 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -5,4 +5,5 @@  obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
 smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
 smc-y += smc_tracepoint.o
+smc-y += smc_inet.o
 smc-$(CONFIG_SYSCTL) += smc_sysctl.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 97e3951..390fe6c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -36,6 +36,9 @@ 
 
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/protocol.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
 #include "smc_netns.h"
 
 #include "smc.h"
@@ -53,6 +56,7 @@ 
 #include "smc_stats.h"
 #include "smc_tracepoint.h"
 #include "smc_sysctl.h"
+#include "smc_inet.h"
 
 static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
 						 * creation on server
@@ -3658,9 +3662,36 @@  static int __init smc_init(void)
 		goto out_ib;
 	}
 
+	/* init smc inet sock related proto and proto_ops */
+	rc = smc_inet_sock_init();
+	if (!rc) {
+		/* registe smc inet proto */
+		rc = proto_register(&smc_inet_prot, 1);
+		if (rc) {
+			pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
+			goto out_ulp;
+		}
+		/* no return value */
+		inet_register_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+		/* register smc inet6 proto */
+		rc = proto_register(&smc_inet6_prot, 1);
+		if (rc) {
+			pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
+			goto out_proto_register;
+		}
+		/* no return value */
+		inet6_register_protosw(&smc_inet6_protosw);
+#endif
+	}
+
 	static_branch_enable(&tcp_have_smc);
 	return 0;
-
+out_proto_register:
+	inet_unregister_protosw(&smc_inet_protosw);
+	proto_unregister(&smc_inet_prot);
+out_ulp:
+	tcp_unregister_ulp(&smc_ulp_ops);
 out_ib:
 	smc_ib_unregister_client();
 out_sock:
@@ -3695,6 +3726,10 @@  static int __init smc_init(void)
 static void __exit smc_exit(void)
 {
 	static_branch_disable(&tcp_have_smc);
+	inet_unregister_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+	inet6_unregister_protosw(&smc_inet6_protosw);
+#endif
 	tcp_unregister_ulp(&smc_ulp_ops);
 	sock_unregister(PF_SMC);
 	smc_core_exit();
@@ -3705,6 +3740,10 @@  static void __exit smc_exit(void)
 	destroy_workqueue(smc_hs_wq);
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
+	proto_unregister(&smc_inet_prot);
+#if IS_ENABLED(CONFIG_IPV6)
+	proto_unregister(&smc_inet6_prot);
+#endif
 	smc_pnet_exit();
 	smc_nl_exit();
 	smc_clc_exit();
@@ -3720,5 +3759,10 @@  static void __exit smc_exit(void)
 MODULE_DESCRIPTION("smc socket address family");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_SMC);
+/* It seems that this macro has different
+ * understanding of enum type(IPPROTO_SMC or SOCK_STREAM)
+ */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
 MODULE_ALIAS_TCP_ULP("smc");
 MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c
new file mode 100644
index 00000000..d35b567
--- /dev/null
+++ b/net/smc/smc_inet.c
@@ -0,0 +1,315 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  AF_SMC protocol family socket handler keeping the AF_INET sock address type
+ *  applies to SOCK_STREAM sockets only
+ *  offers an alternative communication option for TCP-protocol sockets
+ *  applicable with RoCE-cards only
+ *
+ *  Initial restrictions:
+ *    - support for alternate links postponed
+ *
+ *  Copyright IBM Corp. 2016, 2018
+ *
+ */
+
+#include <net/sock.h>
+#include <net/inet_common.h>
+
+#include "smc_inet.h"
+#include "smc.h"
+
+static struct timewait_sock_ops smc_timewait_sock_ops = {
+	.twsk_obj_size		= sizeof(struct tcp_timewait_sock),
+	.twsk_unique		= tcp_twsk_unique,
+	.twsk_destructor	= tcp_twsk_destructor,
+};
+
+static struct timewait_sock_ops smc6_timewait_sock_ops = {
+	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
+	.twsk_unique		= tcp_twsk_unique,
+	.twsk_destructor	= tcp_twsk_destructor,
+};
+
+struct proto smc_inet_prot = {
+	.name			= "SMC",
+	.owner			= THIS_MODULE,
+	.close			= tcp_close,
+	.pre_connect		= NULL,
+	.connect		= tcp_v4_connect,
+	.disconnect		= tcp_disconnect,
+	.accept			= smc_inet_csk_accept,
+	.ioctl			= tcp_ioctl,
+	.init			= smc_inet_init_sock,
+	.destroy		= tcp_v4_destroy_sock,
+	.shutdown		= tcp_shutdown,
+	.setsockopt		= tcp_setsockopt,
+	.getsockopt		= tcp_getsockopt,
+	.keepalive		= tcp_set_keepalive,
+	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.backlog_rcv		= tcp_v4_do_rcv,
+	.release_cb		= smc_inet_sock_proto_release_cb,
+	.hash			= inet_hash,
+	.unhash			= inet_unhash,
+	.get_port		= inet_csk_get_port,
+	.enter_memory_pressure	= tcp_enter_memory_pressure,
+	.per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
+	.leave_memory_pressure	= tcp_leave_memory_pressure,
+	.stream_memory_free	= tcp_stream_memory_free,
+	.sockets_allocated	= &tcp_sockets_allocated,
+	.orphan_count		= &tcp_orphan_count,
+	.memory_allocated	= &tcp_memory_allocated,
+	.memory_pressure	= &tcp_memory_pressure,
+	.sysctl_mem		= sysctl_tcp_mem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
+	.max_header		= MAX_TCP_HEADER,
+	.obj_size		= sizeof(struct smc_sock),
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
+	.twsk_prot		= &smc_timewait_sock_ops,
+	/* tcp_conn_request will use tcp_request_sock_ops */
+	.rsk_prot		= NULL,
+	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
+	.diag_destroy		= tcp_abort,
+};
+EXPORT_SYMBOL_GPL(smc_inet_prot);
+
+const struct proto_ops smc_inet_stream_ops = {
+	.family		   = PF_INET,
+	.owner		   = THIS_MODULE,
+	.release	   = smc_inet_release,
+	.bind		   = inet_bind,
+	.connect	   = smc_inet_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = inet_accept,
+	.getname	   = inet_getname,
+	.poll		   = smc_inet_poll,
+	.ioctl		   = smc_inet_ioctl,
+	.gettstamp	   = sock_gettstamp,
+	.listen		   = smc_inet_listen,
+	.shutdown	   = smc_inet_shutdown,
+	.setsockopt	   = smc_inet_setsockopt,
+	.getsockopt	   = smc_inet_getsockopt,
+	.sendmsg	   = smc_inet_sendmsg,
+	.recvmsg	   = smc_inet_recvmsg,
+#ifdef CONFIG_MMU
+	.mmap		   = tcp_mmap,
+#endif
+	.splice_read	   = smc_inet_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.peek_len	   = tcp_peek_len,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet_compat_ioctl,
+#endif
+	.set_rcvlowat	   = tcp_set_rcvlowat,
+};
+
+struct inet_protosw smc_inet_protosw = {
+	.type       = SOCK_STREAM,
+	.protocol   = IPPROTO_SMC,
+	.prot   = &smc_inet_prot,
+	.ops    = &smc_inet_stream_ops,
+	.flags  = INET_PROTOSW_ICSK,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct proto smc_inet6_prot = {
+	.name			= "SMCv6",
+	.owner			= THIS_MODULE,
+	.close			= tcp_close,
+	.pre_connect		= NULL,
+	.connect		= NULL,
+	.disconnect		= tcp_disconnect,
+	.accept			= smc_inet_csk_accept,
+	.ioctl			= tcp_ioctl,
+	.init			= smc_inet_init_sock,
+	.destroy		= NULL,
+	.shutdown		= tcp_shutdown,
+	.setsockopt		= tcp_setsockopt,
+	.getsockopt		= tcp_getsockopt,
+	.keepalive		= tcp_set_keepalive,
+	.recvmsg		= tcp_recvmsg,
+	.sendmsg		= tcp_sendmsg,
+	.backlog_rcv		= NULL,
+	.release_cb		= smc_inet_sock_proto_release_cb,
+	.hash			= NULL,
+	.unhash			= inet_unhash,
+	.get_port		= inet_csk_get_port,
+	.enter_memory_pressure	= tcp_enter_memory_pressure,
+	.per_cpu_fw_alloc       = &tcp_memory_per_cpu_fw_alloc,
+	.leave_memory_pressure	= tcp_leave_memory_pressure,
+	.stream_memory_free	= tcp_stream_memory_free,
+	.sockets_allocated	= &tcp_sockets_allocated,
+	.memory_allocated	= &tcp_memory_allocated,
+	.memory_pressure	= &tcp_memory_pressure,
+	.orphan_count		= &tcp_orphan_count,
+	.sysctl_mem		= sysctl_tcp_mem,
+	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
+	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
+	.max_header		= MAX_TCP_HEADER,
+	.obj_size		= sizeof(struct smc_sock),
+	.ipv6_pinfo_offset	= offsetof(struct tcp6_sock, inet6),
+	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
+	.twsk_prot		= &smc6_timewait_sock_ops,
+	/* tcp_conn_request will use tcp_request_sock_ops */
+	.rsk_prot		= NULL,
+	.h.hashinfo		= &tcp_hashinfo,
+	.no_autobind		= true,
+	.diag_destroy		= tcp_abort,
+};
+EXPORT_SYMBOL_GPL(smc_inet6_prot);
+
+const struct proto_ops smc_inet6_stream_ops = {
+	.family		   = PF_INET6,
+	.owner		   = THIS_MODULE,
+	.release	   = smc_inet_release,
+	.bind		   = inet6_bind,
+	.connect	   = smc_inet_connect,	/* ok		*/
+	.socketpair	   = sock_no_socketpair,	/* a do nothing	*/
+	.accept		   = inet_accept,		/* ok		*/
+	.getname	   = inet6_getname,
+	.poll		   = smc_inet_poll,			/* ok		*/
+	.ioctl		   = smc_inet_ioctl,		/* must change  */
+	.gettstamp	   = sock_gettstamp,
+	.listen		   = smc_inet_listen,		/* ok		*/
+	.shutdown	   = smc_inet_shutdown,		/* ok		*/
+	.setsockopt	   = smc_inet_setsockopt,	/* ok		*/
+	.getsockopt	   = smc_inet_getsockopt,	/* ok		*/
+	.sendmsg	   = smc_inet_sendmsg,		/* retpoline's sake */
+	.recvmsg	   = smc_inet_recvmsg,		/* retpoline's sake */
+#ifdef CONFIG_MMU
+	.mmap		   = tcp_mmap,
+#endif
+	.sendmsg_locked    = tcp_sendmsg_locked,
+	.splice_read	   = smc_inet_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.peek_len	   = tcp_peek_len,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
+#endif
+	.set_rcvlowat	   = tcp_set_rcvlowat,
+};
+
+struct inet_protosw smc_inet6_protosw = {
+	.type       = SOCK_STREAM,
+	.protocol   = IPPROTO_SMC,
+	.prot   = &smc_inet6_prot,
+	.ops    = &smc_inet6_stream_ops,
+	.flags  = INET_PROTOSW_ICSK,
+};
+#endif
+
+int smc_inet_sock_init(void)
+{
+	struct proto *tcp_v4prot;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct proto *tcp_v6prot;
+#endif
+
+	tcp_v4prot = smc_inet_get_tcp_prot(PF_INET);
+	if (unlikely(!tcp_v4prot))
+		return -EINVAL;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	tcp_v6prot = smc_inet_get_tcp_prot(PF_INET6);
+	if (unlikely(!tcp_v6prot))
+		return -EINVAL;
+#endif
+
+	/* INET sock has a issues here. twsk will hold the reference of the this module,
+	 * so it may be found that the SMC module cannot be uninstalled after the test program ends,
+	 * But eventually, twsk will release the reference of the module.
+	 * This may affect some old test cases if they try to remove the module immediately after
+	 * completing their test.
+	 */
+
+	/* Complete the full prot and proto_ops to
+	 * ensure consistency with TCP. Some symbols here have not been exported,
+	 * so that we have to assign it here.
+	 */
+	smc_inet_prot.pre_connect = tcp_v4prot->pre_connect;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	smc_inet6_prot.pre_connect = tcp_v6prot->pre_connect;
+	smc_inet6_prot.connect = tcp_v6prot->connect;
+	smc_inet6_prot.destroy = tcp_v6prot->destroy;
+	smc_inet6_prot.backlog_rcv = tcp_v6prot->backlog_rcv;
+	smc_inet6_prot.hash = tcp_v6prot->hash;
+#endif
+	return 0;
+}
+
+int smc_inet_init_sock(struct sock *sk) { return  0; }
+
+void smc_inet_sock_proto_release_cb(struct sock *sk) {}
+
+int smc_inet_connect(struct socket *sock, struct sockaddr *addr,
+		     int alen, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_setsockopt(struct socket *sock, int level, int optname,
+			sockptr_t optval, unsigned int optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_getsockopt(struct socket *sock, int level, int optname,
+			char __user *optval, int __user *optlen)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_ioctl(struct socket *sock, unsigned int cmd,
+		   unsigned long arg)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+ssize_t smc_inet_splice_read(struct socket *sock, loff_t *ppos,
+			     struct pipe_inode_info *pipe, size_t len,
+			     unsigned int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+__poll_t smc_inet_poll(struct file *file, struct socket *sock, poll_table *wait)
+{
+	return 0;
+}
+
+struct sock *smc_inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
+{
+	return NULL;
+}
+
+int smc_inet_listen(struct socket *sock, int backlog)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_shutdown(struct socket *sock, int how)
+{
+	return -EOPNOTSUPP;
+}
+
+int smc_inet_release(struct socket *sock)
+{
+	return -EOPNOTSUPP;
+}
diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h
new file mode 100644
index 00000000..68ecfa0
--- /dev/null
+++ b/net/smc/smc_inet.h
@@ -0,0 +1,86 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the SMC module (socket related)
+ *
+ *  Copyright IBM Corp. 2016
+ *
+ */
+
+#ifndef __SMC_INET
+#define __SMC_INET
+
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+/* MUST after net/tcp.h or warning */
+#include <net/transp_v6.h>
+
+extern struct proto smc_inet_prot;
+extern struct proto smc_inet6_prot;
+
+extern const struct proto_ops smc_inet_stream_ops;
+extern const struct proto_ops smc_inet6_stream_ops;
+
+extern struct inet_protosw smc_inet_protosw;
+extern struct inet_protosw smc_inet6_protosw;
+
+/* obtain TCP proto via sock family */
+static __always_inline struct proto *smc_inet_get_tcp_prot(int family)
+{
+	switch (family) {
+	case AF_INET:
+		return &tcp_prot;
+	case AF_INET6:
+		return &tcpv6_prot;
+	default:
+		pr_warn_once("smc: %s(unknown family %d)\n", __func__, family);
+		break;
+	}
+	return NULL;
+}
+
+/* This function initializes the inet related structures.
+ * If initialization is successful, it returns 0;
+ * otherwise, it returns a non-zero value.
+ */
+int smc_inet_sock_init(void);
+
+int smc_inet_init_sock(struct sock *sk);
+void smc_inet_sock_proto_release_cb(struct sock *sk);
+
+int smc_inet_connect(struct socket *sock, struct sockaddr *addr,
+		     int alen, int flags);
+
+int smc_inet_setsockopt(struct socket *sock, int level, int optname,
+			sockptr_t optval, unsigned int optlen);
+
+int smc_inet_getsockopt(struct socket *sock, int level, int optname,
+			char __user *optval, int __user *optlen);
+
+int smc_inet_ioctl(struct socket *sock, unsigned int cmd,
+		   unsigned long arg);
+
+int smc_inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len);
+
+int smc_inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+		     int flags);
+
+ssize_t smc_inet_sendpage(struct socket *sock, struct page *page,
+			  int offset, size_t size, int flags);
+
+ssize_t smc_inet_splice_read(struct socket *sock, loff_t *ppos,
+			     struct pipe_inode_info *pipe, size_t len,
+			     unsigned int flags);
+
+__poll_t smc_inet_poll(struct file *file, struct socket *sock, poll_table *wait);
+
+struct sock *smc_inet_csk_accept(struct sock *sk, int flags, int *err, bool kern);
+int smc_inet_listen(struct socket *sock, int backlog);
+
+int smc_inet_shutdown(struct socket *sock, int how);
+int smc_inet_release(struct socket *sock);
+
+#endif // __SMC_INET