Message ID | 1716955147-88923-4-git-send-email-alibuda@linux.alibaba.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Introduce IPPROTO_SMC | expand |
On 2024-05-29 11:59:07, D. Wythe wrote: >From: "D. Wythe" <alibuda@linux.alibaba.com> > >This patch allows to create smc socket via AF_INET, >similar to the following code, > >/* create v4 smc sock */ >v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > >/* create v6 smc sock */ >v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > >There are several reasons why we believe it is appropriate here: > >1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) >address. There is no AF_SMC address at all. > >2. Create smc socket in the AF_INET(6) path, which allows us to reuse >the infrastructure of AF_INET(6) path, such as common ebpf hooks. >Otherwise, smc have to implement it again in AF_SMC path. > >Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> >--- > include/uapi/linux/in.h | 2 + > net/smc/Makefile | 2 +- > net/smc/af_smc.c | 36 ++++++++++++++++ > net/smc/inet_smc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ > net/smc/inet_smc.h | 34 +++++++++++++++ > 5 files changed, 181 insertions(+), 1 deletion(-) > create mode 100644 net/smc/inet_smc.c > create mode 100644 net/smc/inet_smc.h > >diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h >index e682ab6..0c6322b 100644 >--- a/include/uapi/linux/in.h >+++ b/include/uapi/linux/in.h >@@ -83,6 +83,8 @@ enum { > #define IPPROTO_RAW IPPROTO_RAW > IPPROTO_MPTCP = 262, /* Multipath TCP connection */ > #define IPPROTO_MPTCP IPPROTO_MPTCP >+ IPPROTO_SMC = 263, /* Shared Memory Communications */ >+#define IPPROTO_SMC IPPROTO_SMC > IPPROTO_MAX > }; > #endif >diff --git a/net/smc/Makefile b/net/smc/Makefile >index 2c510d54..472b9ee 100644 >--- a/net/smc/Makefile >+++ b/net/smc/Makefile >@@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o > obj-$(CONFIG_SMC_DIAG) += smc_diag.o > smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o > smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o >-smc-y += smc_tracepoint.o >+smc-y += smc_tracepoint.o inet_smc.o > smc-$(CONFIG_SYSCTL) += smc_sysctl.o > smc-$(CONFIG_SMC_LO) += smc_loopback.o >diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c >index 8e3ce76..320624c 100644 >--- a/net/smc/af_smc.c >+++ b/net/smc/af_smc.c >@@ -54,6 +54,7 @@ > #include "smc_tracepoint.h" > #include "smc_sysctl.h" > #include "smc_loopback.h" >+#include "inet_smc.h" > > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group > * creation on server >@@ -3594,9 +3595,31 @@ static int __init smc_init(void) > goto out_lo; > } > >+ rc = proto_register(&smc_inet_prot, 1); >+ if (rc) { >+ pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); >+ goto out_ulp; >+ } >+ inet_register_protosw(&smc_inet_protosw); >+#if IS_ENABLED(CONFIG_IPV6) >+ rc = proto_register(&smc_inet6_prot, 1); >+ if (rc) { >+ pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); >+ goto out_inet_prot; >+ } >+ inet6_register_protosw(&smc_inet6_protosw); >+#endif >+ What do you think of moving all those inet initialization code into something like smc_inet_init() and move it to smc_inet.c ? > static_branch_enable(&tcp_have_smc); > return 0; > >+#if IS_ENABLED(CONFIG_IPV6) >+out_inet_prot: >+ inet_unregister_protosw(&smc_inet_protosw); >+ proto_unregister(&smc_inet_prot); >+#endif >+out_ulp: >+ tcp_unregister_ulp(&smc_ulp_ops); > out_lo: > smc_loopback_exit(); > out_ib: >@@ -3633,6 +3656,10 @@ static int __init smc_init(void) > static void __exit smc_exit(void) > { > static_branch_disable(&tcp_have_smc); >+ inet_unregister_protosw(&smc_inet_protosw); >+#if IS_ENABLED(CONFIG_IPV6) >+ inet6_unregister_protosw(&smc_inet6_protosw); >+#endif > tcp_unregister_ulp(&smc_ulp_ops); > sock_unregister(PF_SMC); > smc_core_exit(); >@@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) > destroy_workqueue(smc_hs_wq); > proto_unregister(&smc_proto6); > proto_unregister(&smc_proto); >+ proto_unregister(&smc_inet_prot); >+#if IS_ENABLED(CONFIG_IPV6) >+ proto_unregister(&smc_inet6_prot); >+#endif > smc_pnet_exit(); > smc_nl_exit(); > smc_clc_exit(); >@@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) > MODULE_LICENSE("GPL"); > MODULE_ALIAS_NETPROTO(PF_SMC); > MODULE_ALIAS_TCP_ULP("smc"); >+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ >+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); >+#if IS_ENABLED(CONFIG_IPV6) >+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); >+#endif > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); >diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c >new file mode 100644 >index 00000000..1ba73d7 >--- /dev/null >+++ b/net/smc/inet_smc.c >@@ -0,0 +1,108 @@ >+// SPDX-License-Identifier: GPL-2.0-only >+/* >+ * Shared Memory Communications over RDMA (SMC-R) and RoCE >+ * >+ * Definitions for the IPPROTO_SMC (socket related) >+ * >+ * Copyright IBM Corp. 2016, 2018 >+ * Copyright (c) 2024, Alibaba Inc. >+ * >+ * Author: D. Wythe <alibuda@linux.alibaba.com> >+ */ >+ >+#include "inet_smc.h" >+#include "smc.h" >+ >+struct proto smc_inet_prot = { >+ .name = "INET_SMC", >+ .owner = THIS_MODULE, >+ .init = smc_inet_init_sock, >+ .hash = smc_hash_sk, >+ .unhash = smc_unhash_sk, >+ .release_cb = smc_release_cb, >+ .obj_size = sizeof(struct smc_sock), >+ .h.smc_hash = &smc_v4_hashinfo, >+ .slab_flags = SLAB_TYPESAFE_BY_RCU, >+}; >+ >+const struct proto_ops smc_inet_stream_ops = { >+ .family = PF_INET, >+ .owner = THIS_MODULE, >+ .release = smc_release, >+ .bind = smc_bind, >+ .connect = smc_connect, >+ .socketpair = sock_no_socketpair, >+ .accept = smc_accept, >+ .getname = smc_getname, >+ .poll = smc_poll, >+ .ioctl = smc_ioctl, >+ .listen = smc_listen, >+ .shutdown = smc_shutdown, >+ .setsockopt = smc_setsockopt, >+ .getsockopt = smc_getsockopt, >+ .sendmsg = smc_sendmsg, >+ .recvmsg = smc_recvmsg, >+ .mmap = sock_no_mmap, >+ .splice_read = smc_splice_read, >+}; >+ >+struct inet_protosw smc_inet_protosw = { >+ .type = SOCK_STREAM, >+ .protocol = IPPROTO_SMC, >+ .prot = &smc_inet_prot, >+ .ops = &smc_inet_stream_ops, >+ .flags = INET_PROTOSW_ICSK, >+}; >+ >+#if IS_ENABLED(CONFIG_IPV6) >+struct proto smc_inet6_prot = { >+ .name = "INET6_SMC", >+ .owner = THIS_MODULE, >+ .init = smc_inet_init_sock, >+ .hash = smc_hash_sk, >+ .unhash = smc_unhash_sk, >+ .release_cb = smc_release_cb, >+ .obj_size = sizeof(struct smc_sock), >+ .h.smc_hash = &smc_v6_hashinfo, >+ .slab_flags = SLAB_TYPESAFE_BY_RCU, >+}; >+ >+const struct proto_ops smc_inet6_stream_ops = { >+ .family = PF_INET6, >+ .owner = THIS_MODULE, >+ .release = smc_release, >+ .bind = smc_bind, >+ .connect = smc_connect, >+ .socketpair = sock_no_socketpair, >+ .accept = smc_accept, >+ .getname = smc_getname, >+ .poll = smc_poll, >+ .ioctl = smc_ioctl, >+ .listen = smc_listen, >+ .shutdown = smc_shutdown, >+ .setsockopt = smc_setsockopt, >+ .getsockopt = smc_getsockopt, >+ .sendmsg = smc_sendmsg, >+ .recvmsg = smc_recvmsg, >+ .mmap = sock_no_mmap, >+ .splice_read = smc_splice_read, >+}; >+ >+struct inet_protosw smc_inet6_protosw = { >+ .type = SOCK_STREAM, >+ .protocol = IPPROTO_SMC, >+ .prot = &smc_inet6_prot, >+ .ops = &smc_inet6_stream_ops, >+ .flags = INET_PROTOSW_ICSK, >+}; >+#endif >+ >+int smc_inet_init_sock(struct sock *sk) >+{ >+ struct net *net = sock_net(sk); >+ >+ /* init common smc sock */ >+ smc_sk_init(net, sk, IPPROTO_SMC); >+ /* create clcsock */ >+ return smc_create_clcsk(net, sk, sk->sk_family); >+} >diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h >new file mode 100644 >index 00000000..c55345d >--- /dev/null >+++ b/net/smc/inet_smc.h >@@ -0,0 +1,34 @@ >+/* SPDX-License-Identifier: GPL-2.0 */ >+/* >+ * Shared Memory Communications over RDMA (SMC-R) and RoCE >+ * >+ * Definitions for the IPPROTO_SMC (socket related) >+ >+ * Copyright IBM Corp. 2016 >+ * Copyright (c) 2024, Alibaba Inc. >+ * >+ * Author: D. Wythe <alibuda@linux.alibaba.com> >+ */ >+#ifndef __INET_SMC >+#define __INET_SMC >+ >+#include <net/protocol.h> >+#include <net/sock.h> >+#include <net/tcp.h> Why not put those 'include's in the .c file ? >+ >+extern struct proto smc_inet_prot; >+extern const struct proto_ops smc_inet_stream_ops; >+extern struct inet_protosw smc_inet_protosw; >+ >+#if IS_ENABLED(CONFIG_IPV6) >+#include <net/ipv6.h> >+/* MUST after net/tcp.h or warning */ >+#include <net/transp_v6.h> >+extern struct proto smc_inet6_prot; >+extern const struct proto_ops smc_inet6_stream_ops; >+extern struct inet_protosw smc_inet6_protosw; >+#endif >+ >+int smc_inet_init_sock(struct sock *sk); Seems smc_inet_init_sock() is only used in smc_inet.c, why not defined it as a static function ? Best regards, Dust >+ >+#endif /* __INET_SMC */ >-- >1.8.3.1 >
On 29.05.24 05:59, D. Wythe wrote: > From: "D. Wythe" <alibuda@linux.alibaba.com> > > This patch allows to create smc socket via AF_INET, > similar to the following code, > > /* create v4 smc sock */ > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > > /* create v6 smc sock */ > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > > There are several reasons why we believe it is appropriate here: > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) > address. There is no AF_SMC address at all. > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse > the infrastructure of AF_INET(6) path, such as common ebpf hooks. > Otherwise, smc have to implement it again in AF_SMC path. > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> > --- > include/uapi/linux/in.h | 2 + > net/smc/Makefile | 2 +- > net/smc/af_smc.c | 36 ++++++++++++++++ > net/smc/inet_smc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ > net/smc/inet_smc.h | 34 +++++++++++++++ > 5 files changed, 181 insertions(+), 1 deletion(-) > create mode 100644 net/smc/inet_smc.c > create mode 100644 net/smc/inet_smc.h > > diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h > index e682ab6..0c6322b 100644 > --- a/include/uapi/linux/in.h > +++ b/include/uapi/linux/in.h > @@ -83,6 +83,8 @@ enum { > #define IPPROTO_RAW IPPROTO_RAW > IPPROTO_MPTCP = 262, /* Multipath TCP connection */ > #define IPPROTO_MPTCP IPPROTO_MPTCP > + IPPROTO_SMC = 263, /* Shared Memory Communications */ > +#define IPPROTO_SMC IPPROTO_SMC > IPPROTO_MAX > }; > #endif > diff --git a/net/smc/Makefile b/net/smc/Makefile > index 2c510d54..472b9ee 100644 > --- a/net/smc/Makefile > +++ b/net/smc/Makefile > @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o > obj-$(CONFIG_SMC_DIAG) += smc_diag.o > smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o > smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o > -smc-y += smc_tracepoint.o > +smc-y += smc_tracepoint.o inet_smc.o > smc-$(CONFIG_SYSCTL) += smc_sysctl.o > smc-$(CONFIG_SMC_LO) += smc_loopback.o > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > index 8e3ce76..320624c 100644 > --- a/net/smc/af_smc.c > +++ b/net/smc/af_smc.c > @@ -54,6 +54,7 @@ > #include "smc_tracepoint.h" > #include "smc_sysctl.h" > #include "smc_loopback.h" > +#include "inet_smc.h" > > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group > * creation on server > @@ -3594,9 +3595,31 @@ static int __init smc_init(void) > goto out_lo; > } > > + rc = proto_register(&smc_inet_prot, 1); > + if (rc) { > + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); > + goto out_ulp; > + } > + inet_register_protosw(&smc_inet_protosw); > +#if IS_ENABLED(CONFIG_IPV6) > + rc = proto_register(&smc_inet6_prot, 1); > + if (rc) { > + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); > + goto out_inet_prot; > + } > + inet6_register_protosw(&smc_inet6_protosw); Comparing to inet_register_protosw(), the inet6_register_protosw() returns an integer. Thus, making error check and direct corresponding housekeeping here looks IMO much cleaner. > +#endif > + > static_branch_enable(&tcp_have_smc); > return 0; > > +#if IS_ENABLED(CONFIG_IPV6) > +out_inet_prot: > + inet_unregister_protosw(&smc_inet_protosw); > + proto_unregister(&smc_inet_prot); > +#endif > +out_ulp: > + tcp_unregister_ulp(&smc_ulp_ops); > out_lo: > smc_loopback_exit(); > out_ib: > @@ -3633,6 +3656,10 @@ static int __init smc_init(void) > static void __exit smc_exit(void) > { > static_branch_disable(&tcp_have_smc); > + inet_unregister_protosw(&smc_inet_protosw); > +#if IS_ENABLED(CONFIG_IPV6) > + inet6_unregister_protosw(&smc_inet6_protosw); > +#endif > tcp_unregister_ulp(&smc_ulp_ops); > sock_unregister(PF_SMC); > smc_core_exit(); > @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) > destroy_workqueue(smc_hs_wq); > proto_unregister(&smc_proto6); > proto_unregister(&smc_proto); > + proto_unregister(&smc_inet_prot); > +#if IS_ENABLED(CONFIG_IPV6) > + proto_unregister(&smc_inet6_prot); > +#end Since there is already inet_smc.c, I'd recommend to group these register and unregister stuff respectively in functions like e.g. smc_inet_init() and smc_inet_exit() in inet_smc.c > smc_pnet_exit(); > smc_nl_exit(); > smc_clc_exit(); > @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) > MODULE_LICENSE("GPL"); > MODULE_ALIAS_NETPROTO(PF_SMC); > MODULE_ALIAS_TCP_ULP("smc"); > +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); > +#if IS_ENABLED(CONFIG_IPV6) > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); > +#endif > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); > diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c > new file mode 100644 > index 00000000..1ba73d7 > --- /dev/null > +++ b/net/smc/inet_smc.c In order to keep the consistency with the structure and function names in the files, I'm wondering why not to use smc_inet.h and smc_inet.c instead of inet_smc.h and inet_smc.c respectively > @@ -0,0 +1,108 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Shared Memory Communications over RDMA (SMC-R) and RoCE > + * > + * Definitions for the IPPROTO_SMC (socket related) > + * > + * Copyright IBM Corp. 2016, 2018 > + * Copyright (c) 2024, Alibaba Inc. > + * > + * Author: D. Wythe <alibuda@linux.alibaba.com> > + */ > + > +#include "inet_smc.h" > +#include "smc.h" > + > +struct proto smc_inet_prot = { > + .name = "INET_SMC", > + .owner = THIS_MODULE, > + .init = smc_inet_init_sock, > + .hash = smc_hash_sk, > + .unhash = smc_unhash_sk, > + .release_cb = smc_release_cb, > + .obj_size = sizeof(struct smc_sock), > + .h.smc_hash = &smc_v4_hashinfo, > + .slab_flags = SLAB_TYPESAFE_BY_RCU, > +}; > + > +const struct proto_ops smc_inet_stream_ops = { > + .family = PF_INET, > + .owner = THIS_MODULE, > + .release = smc_release, > + .bind = smc_bind, > + .connect = smc_connect, > + .socketpair = sock_no_socketpair, > + .accept = smc_accept, > + .getname = smc_getname, > + .poll = smc_poll, > + .ioctl = smc_ioctl, > + .listen = smc_listen, > + .shutdown = smc_shutdown, > + .setsockopt = smc_setsockopt, > + .getsockopt = smc_getsockopt, > + .sendmsg = smc_sendmsg, > + .recvmsg = smc_recvmsg, > + .mmap = sock_no_mmap, > + .splice_read = smc_splice_read, > +}; > + > +struct inet_protosw smc_inet_protosw = { > + .type = SOCK_STREAM, > + .protocol = IPPROTO_SMC, > + .prot = &smc_inet_prot, > + .ops = &smc_inet_stream_ops, > + .flags = INET_PROTOSW_ICSK, > +}; > + > +#if IS_ENABLED(CONFIG_IPV6) > +struct proto smc_inet6_prot = { > + .name = "INET6_SMC", > + .owner = THIS_MODULE, > + .init = smc_inet_init_sock, > + .hash = smc_hash_sk, > + .unhash = smc_unhash_sk, > + .release_cb = smc_release_cb, > + .obj_size = sizeof(struct smc_sock), > + .h.smc_hash = &smc_v6_hashinfo, > + .slab_flags = SLAB_TYPESAFE_BY_RCU, > +}; > + > +const struct proto_ops smc_inet6_stream_ops = { > + .family = PF_INET6, > + .owner = THIS_MODULE, > + .release = smc_release, > + .bind = smc_bind, > + .connect = smc_connect, > + .socketpair = sock_no_socketpair, > + .accept = smc_accept, > + .getname = smc_getname, > + .poll = smc_poll, > + .ioctl = smc_ioctl, > + .listen = smc_listen, > + .shutdown = smc_shutdown, > + .setsockopt = smc_setsockopt, > + .getsockopt = smc_getsockopt, > + .sendmsg = smc_sendmsg, > + .recvmsg = smc_recvmsg, > + .mmap = sock_no_mmap, > + .splice_read = smc_splice_read, > +}; > + > +struct inet_protosw smc_inet6_protosw = { > + .type = SOCK_STREAM, > + .protocol = IPPROTO_SMC, > + .prot = &smc_inet6_prot, > + .ops = &smc_inet6_stream_ops, > + .flags = INET_PROTOSW_ICSK, > +}; > +#endif > + > +int smc_inet_init_sock(struct sock *sk) > +{ > + struct net *net = sock_net(sk); > + > + /* init common smc sock */ > + smc_sk_init(net, sk, IPPROTO_SMC); > + /* create clcsock */ > + return smc_create_clcsk(net, sk, sk->sk_family); > +} > diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h > new file mode 100644 > index 00000000..c55345d > --- /dev/null > +++ b/net/smc/inet_smc.h > @@ -0,0 +1,34 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Shared Memory Communications over RDMA (SMC-R) and RoCE > + * > + * Definitions for the IPPROTO_SMC (socket related) > + > + * Copyright IBM Corp. 2016 > + * Copyright (c) 2024, Alibaba Inc. > + * > + * Author: D. Wythe <alibuda@linux.alibaba.com> > + */ > +#ifndef __INET_SMC > +#define __INET_SMC > + > +#include <net/protocol.h> > +#include <net/sock.h> > +#include <net/tcp.h> > + > +extern struct proto smc_inet_prot; > +extern const struct proto_ops smc_inet_stream_ops; > +extern struct inet_protosw smc_inet_protosw; > + > +#if IS_ENABLED(CONFIG_IPV6) > +#include <net/ipv6.h> > +/* MUST after net/tcp.h or warning */ > +#include <net/transp_v6.h> > +extern struct proto smc_inet6_prot; > +extern const struct proto_ops smc_inet6_stream_ops; > +extern struct inet_protosw smc_inet6_protosw; > +#endif > + > +int smc_inet_init_sock(struct sock *sk); > + > +#endif /* __INET_SMC */
在 2024/5/29 5:59, D. Wythe 写道: > From: "D. Wythe" <alibuda@linux.alibaba.com> > > This patch allows to create smc socket via AF_INET, > similar to the following code, > > /* create v4 smc sock */ > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > > /* create v6 smc sock */ > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > > There are several reasons why we believe it is appropriate here: > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) > address. There is no AF_SMC address at all. > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse > the infrastructure of AF_INET(6) path, such as common ebpf hooks. > Otherwise, smc have to implement it again in AF_SMC path. > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> > --- > include/uapi/linux/in.h | 2 + > net/smc/Makefile | 2 +- > net/smc/af_smc.c | 36 ++++++++++++++++ > net/smc/inet_smc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ > net/smc/inet_smc.h | 34 +++++++++++++++ > 5 files changed, 181 insertions(+), 1 deletion(-) > create mode 100644 net/smc/inet_smc.c > create mode 100644 net/smc/inet_smc.h > > diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h > index e682ab6..0c6322b 100644 > --- a/include/uapi/linux/in.h > +++ b/include/uapi/linux/in.h > @@ -83,6 +83,8 @@ enum { > #define IPPROTO_RAW IPPROTO_RAW > IPPROTO_MPTCP = 262, /* Multipath TCP connection */ > #define IPPROTO_MPTCP IPPROTO_MPTCP > + IPPROTO_SMC = 263, /* Shared Memory Communications */ > +#define IPPROTO_SMC IPPROTO_SMC > IPPROTO_MAX > }; > #endif > diff --git a/net/smc/Makefile b/net/smc/Makefile > index 2c510d54..472b9ee 100644 > --- a/net/smc/Makefile > +++ b/net/smc/Makefile > @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o > obj-$(CONFIG_SMC_DIAG) += smc_diag.o > smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o > smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o > -smc-y += smc_tracepoint.o > +smc-y += smc_tracepoint.o inet_smc.o > smc-$(CONFIG_SYSCTL) += smc_sysctl.o > smc-$(CONFIG_SMC_LO) += smc_loopback.o > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > index 8e3ce76..320624c 100644 > --- a/net/smc/af_smc.c > +++ b/net/smc/af_smc.c > @@ -54,6 +54,7 @@ > #include "smc_tracepoint.h" > #include "smc_sysctl.h" > #include "smc_loopback.h" > +#include "inet_smc.h" > > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group > * creation on server > @@ -3594,9 +3595,31 @@ static int __init smc_init(void) > goto out_lo; > } > > + rc = proto_register(&smc_inet_prot, 1); > + if (rc) { > + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); > + goto out_ulp; > + } > + inet_register_protosw(&smc_inet_protosw); > +#if IS_ENABLED(CONFIG_IPV6) > + rc = proto_register(&smc_inet6_prot, 1); > + if (rc) { > + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); > + goto out_inet_prot; > + } > + inet6_register_protosw(&smc_inet6_protosw); > +#endif > + > static_branch_enable(&tcp_have_smc); > return 0; > > +#if IS_ENABLED(CONFIG_IPV6) > +out_inet_prot: > + inet_unregister_protosw(&smc_inet_protosw); > + proto_unregister(&smc_inet_prot); > +#endif > +out_ulp: > + tcp_unregister_ulp(&smc_ulp_ops); > out_lo: > smc_loopback_exit(); > out_ib: > @@ -3633,6 +3656,10 @@ static int __init smc_init(void) > static void __exit smc_exit(void) > { > static_branch_disable(&tcp_have_smc); > + inet_unregister_protosw(&smc_inet_protosw); > +#if IS_ENABLED(CONFIG_IPV6) > + inet6_unregister_protosw(&smc_inet6_protosw); > +#endif > tcp_unregister_ulp(&smc_ulp_ops); > sock_unregister(PF_SMC); > smc_core_exit(); > @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) > destroy_workqueue(smc_hs_wq); > proto_unregister(&smc_proto6); > proto_unregister(&smc_proto); > + proto_unregister(&smc_inet_prot); > +#if IS_ENABLED(CONFIG_IPV6) > + proto_unregister(&smc_inet6_prot); > +#endif > smc_pnet_exit(); > smc_nl_exit(); > smc_clc_exit(); > @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) > MODULE_LICENSE("GPL"); > MODULE_ALIAS_NETPROTO(PF_SMC); > MODULE_ALIAS_TCP_ULP("smc"); > +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); > +#if IS_ENABLED(CONFIG_IPV6) > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); > +#endif > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); > diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c > new file mode 100644 > index 00000000..1ba73d7 > --- /dev/null > +++ b/net/smc/inet_smc.c > @@ -0,0 +1,108 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Shared Memory Communications over RDMA (SMC-R) and RoCE > + * > + * Definitions for the IPPROTO_SMC (socket related) > + * > + * Copyright IBM Corp. 2016, 2018 > + * Copyright (c) 2024, Alibaba Inc. > + * > + * Author: D. Wythe <alibuda@linux.alibaba.com> > + */ > + > +#include "inet_smc.h" > +#include "smc.h" > + > +struct proto smc_inet_prot = { > + .name = "INET_SMC", > + .owner = THIS_MODULE, > + .init = smc_inet_init_sock, > + .hash = smc_hash_sk, > + .unhash = smc_unhash_sk, > + .release_cb = smc_release_cb, > + .obj_size = sizeof(struct smc_sock), > + .h.smc_hash = &smc_v4_hashinfo, > + .slab_flags = SLAB_TYPESAFE_BY_RCU, > +}; > + > +const struct proto_ops smc_inet_stream_ops = { > + .family = PF_INET, > + .owner = THIS_MODULE, > + .release = smc_release, > + .bind = smc_bind, > + .connect = smc_connect, > + .socketpair = sock_no_socketpair, > + .accept = smc_accept, > + .getname = smc_getname, > + .poll = smc_poll, > + .ioctl = smc_ioctl, > + .listen = smc_listen, > + .shutdown = smc_shutdown, > + .setsockopt = smc_setsockopt, > + .getsockopt = smc_getsockopt, > + .sendmsg = smc_sendmsg, > + .recvmsg = smc_recvmsg, > + .mmap = sock_no_mmap, > + .splice_read = smc_splice_read, > +}; > + > +struct inet_protosw smc_inet_protosw = { > + .type = SOCK_STREAM, > + .protocol = IPPROTO_SMC, > + .prot = &smc_inet_prot, > + .ops = &smc_inet_stream_ops, > + .flags = INET_PROTOSW_ICSK, > +}; > + > +#if IS_ENABLED(CONFIG_IPV6) > +struct proto smc_inet6_prot = { > + .name = "INET6_SMC", > + .owner = THIS_MODULE, > + .init = smc_inet_init_sock, > + .hash = smc_hash_sk, > + .unhash = smc_unhash_sk, > + .release_cb = smc_release_cb, > + .obj_size = sizeof(struct smc_sock), > + .h.smc_hash = &smc_v6_hashinfo, > + .slab_flags = SLAB_TYPESAFE_BY_RCU, > +}; > + > +const struct proto_ops smc_inet6_stream_ops = { > + .family = PF_INET6, > + .owner = THIS_MODULE, > + .release = smc_release, > + .bind = smc_bind, > + .connect = smc_connect, > + .socketpair = sock_no_socketpair, > + .accept = smc_accept, > + .getname = smc_getname, > + .poll = smc_poll, > + .ioctl = smc_ioctl, > + .listen = smc_listen, > + .shutdown = smc_shutdown, > + .setsockopt = smc_setsockopt, > + .getsockopt = smc_getsockopt, > + .sendmsg = smc_sendmsg, > + .recvmsg = smc_recvmsg, > + .mmap = sock_no_mmap, > + .splice_read = smc_splice_read, > +}; > + > +struct inet_protosw smc_inet6_protosw = { > + .type = SOCK_STREAM, > + .protocol = IPPROTO_SMC, > + .prot = &smc_inet6_prot, > + .ops = &smc_inet6_stream_ops, > + .flags = INET_PROTOSW_ICSK, > +}; > +#endif > + > +int smc_inet_init_sock(struct sock *sk) > +{ > + struct net *net = sock_net(sk); > + > + /* init common smc sock */ > + smc_sk_init(net, sk, IPPROTO_SMC); > + /* create clcsock */ > + return smc_create_clcsk(net, sk, sk->sk_family); > +} > diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h > new file mode 100644 > index 00000000..c55345d > --- /dev/null > +++ b/net/smc/inet_smc.h > @@ -0,0 +1,34 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Shared Memory Communications over RDMA (SMC-R) and RoCE > + * > + * Definitions for the IPPROTO_SMC (socket related) > + > + * Copyright IBM Corp. 2016 > + * Copyright (c) 2024, Alibaba Inc. > + * > + * Author: D. Wythe <alibuda@linux.alibaba.com> > + */ > +#ifndef __INET_SMC > +#define __INET_SMC > + > +#include <net/protocol.h> > +#include <net/sock.h> > +#include <net/tcp.h> > + > +extern struct proto smc_inet_prot; > +extern const struct proto_ops smc_inet_stream_ops; > +extern struct inet_protosw smc_inet_protosw; > + > +#if IS_ENABLED(CONFIG_IPV6) > +#include <net/ipv6.h> > +/* MUST after net/tcp.h or warning */ > +#include <net/transp_v6.h> > +extern struct proto smc_inet6_prot; > +extern const struct proto_ops smc_inet6_stream_ops; > +extern struct inet_protosw smc_inet6_protosw; > +#endif If we append /* CONFIG_IPV6 */ to #endif to indicate that it is the end of CONFIG_IPV6, it is a good habit. When browsing the source code, it is easy for us to know that it is the end of CONFIG_IPV6. Just my 2 cent suggestions. It is a trivial problem. You can ignore it. But if you fix it, it can make the source code more readable. Zhu Yanjun > + > +int smc_inet_init_sock(struct sock *sk); > + > +#endif /* __INET_SMC */
On 5/30/24 3:55 AM, Zhu Yanjun wrote: > 在 2024/5/29 5:59, D. Wythe 写道: >> From: "D. Wythe" <alibuda@linux.alibaba.com> >> >> This patch allows to create smc socket via AF_INET, >> similar to the following code, >> >> /* create v4 smc sock */ >> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); >> >> /* create v6 smc sock */ >> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); >> >> There are several reasons why we believe it is appropriate here: >> >> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) >> address. There is no AF_SMC address at all. >> >> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse >> the infrastructure of AF_INET(6) path, such as common ebpf hooks. >> Otherwise, smc have to implement it again in AF_SMC path. >> >> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> >> --- >> include/uapi/linux/in.h | 2 + >> net/smc/Makefile | 2 +- >> net/smc/af_smc.c | 36 ++++++++++++++++ >> net/smc/inet_smc.c | 108 >> ++++++++++++++++++++++++++++++++++++++++++++++++ >> net/smc/inet_smc.h | 34 +++++++++++++++ >> 5 files changed, 181 insertions(+), 1 deletion(-) >> create mode 100644 net/smc/inet_smc.c >> create mode 100644 net/smc/inet_smc.h >> >> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h >> index e682ab6..0c6322b 100644 >> --- a/include/uapi/linux/in.h >> +++ b/include/uapi/linux/in.h >> @@ -83,6 +83,8 @@ enum { >> #define IPPROTO_RAW IPPROTO_RAW >> IPPROTO_MPTCP = 262, /* Multipath TCP connection */ >> #define IPPROTO_MPTCP IPPROTO_MPTCP >> + IPPROTO_SMC = 263, /* Shared Memory Communications */ >> +#define IPPROTO_SMC IPPROTO_SMC >> IPPROTO_MAX >> }; >> #endif >> diff --git a/net/smc/Makefile b/net/smc/Makefile >> index 2c510d54..472b9ee 100644 >> --- a/net/smc/Makefile >> +++ b/net/smc/Makefile >> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o >> obj-$(CONFIG_SMC_DIAG) += smc_diag.o >> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o >> smc_llc.o >> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o >> smc_netlink.o smc_stats.o >> -smc-y += smc_tracepoint.o >> +smc-y += smc_tracepoint.o inet_smc.o >> smc-$(CONFIG_SYSCTL) += smc_sysctl.o >> smc-$(CONFIG_SMC_LO) += smc_loopback.o >> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c >> index 8e3ce76..320624c 100644 >> --- a/net/smc/af_smc.c >> +++ b/net/smc/af_smc.c >> @@ -54,6 +54,7 @@ >> #include "smc_tracepoint.h" >> #include "smc_sysctl.h" >> #include "smc_loopback.h" >> +#include "inet_smc.h" >> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link >> group >> * creation on server >> @@ -3594,9 +3595,31 @@ static int __init smc_init(void) >> goto out_lo; >> } >> + rc = proto_register(&smc_inet_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", >> __func__, rc); >> + goto out_ulp; >> + } >> + inet_register_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + rc = proto_register(&smc_inet6_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", >> __func__, rc); >> + goto out_inet_prot; >> + } >> + inet6_register_protosw(&smc_inet6_protosw); >> +#endif >> + >> static_branch_enable(&tcp_have_smc); >> return 0; >> +#if IS_ENABLED(CONFIG_IPV6) >> +out_inet_prot: >> + inet_unregister_protosw(&smc_inet_protosw); >> + proto_unregister(&smc_inet_prot); >> +#endif >> +out_ulp: >> + tcp_unregister_ulp(&smc_ulp_ops); >> out_lo: >> smc_loopback_exit(); >> out_ib: >> @@ -3633,6 +3656,10 @@ static int __init smc_init(void) >> static void __exit smc_exit(void) >> { >> static_branch_disable(&tcp_have_smc); >> + inet_unregister_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + inet6_unregister_protosw(&smc_inet6_protosw); >> +#endif >> tcp_unregister_ulp(&smc_ulp_ops); >> sock_unregister(PF_SMC); >> smc_core_exit(); >> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) >> destroy_workqueue(smc_hs_wq); >> proto_unregister(&smc_proto6); >> proto_unregister(&smc_proto); >> + proto_unregister(&smc_inet_prot); >> +#if IS_ENABLED(CONFIG_IPV6) >> + proto_unregister(&smc_inet6_prot); >> +#endif >> smc_pnet_exit(); >> smc_nl_exit(); >> smc_clc_exit(); >> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) >> MODULE_LICENSE("GPL"); >> MODULE_ALIAS_NETPROTO(PF_SMC); >> MODULE_ALIAS_TCP_ULP("smc"); >> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); >> +#if IS_ENABLED(CONFIG_IPV6) >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); >> +#endif >> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); >> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c >> new file mode 100644 >> index 00000000..1ba73d7 >> --- /dev/null >> +++ b/net/smc/inet_smc.c >> @@ -0,0 +1,108 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + * >> + * Copyright IBM Corp. 2016, 2018 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> + >> +#include "inet_smc.h" >> +#include "smc.h" >> + >> +struct proto smc_inet_prot = { >> + .name = "INET_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v4_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +const struct proto_ops smc_inet_stream_ops = { >> + .family = PF_INET, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +struct inet_protosw smc_inet_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet_prot, >> + .ops = &smc_inet_stream_ops, >> + .flags = INET_PROTOSW_ICSK, >> +}; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> +struct proto smc_inet6_prot = { >> + .name = "INET6_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v6_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +const struct proto_ops smc_inet6_stream_ops = { >> + .family = PF_INET6, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +struct inet_protosw smc_inet6_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet6_prot, >> + .ops = &smc_inet6_stream_ops, >> + .flags = INET_PROTOSW_ICSK, >> +}; >> +#endif >> + >> +int smc_inet_init_sock(struct sock *sk) >> +{ >> + struct net *net = sock_net(sk); >> + >> + /* init common smc sock */ >> + smc_sk_init(net, sk, IPPROTO_SMC); >> + /* create clcsock */ >> + return smc_create_clcsk(net, sk, sk->sk_family); >> +} >> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h >> new file mode 100644 >> index 00000000..c55345d >> --- /dev/null >> +++ b/net/smc/inet_smc.h >> @@ -0,0 +1,34 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + >> + * Copyright IBM Corp. 2016 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> +#ifndef __INET_SMC >> +#define __INET_SMC >> + >> +#include <net/protocol.h> >> +#include <net/sock.h> >> +#include <net/tcp.h> >> + >> +extern struct proto smc_inet_prot; >> +extern const struct proto_ops smc_inet_stream_ops; >> +extern struct inet_protosw smc_inet_protosw; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> +#include <net/ipv6.h> >> +/* MUST after net/tcp.h or warning */ >> +#include <net/transp_v6.h> >> +extern struct proto smc_inet6_prot; >> +extern const struct proto_ops smc_inet6_stream_ops; >> +extern struct inet_protosw smc_inet6_protosw; >> +#endif > > If we append /* CONFIG_IPV6 */ to #endif to indicate that it is the > end of CONFIG_IPV6, it is a good habit. When browsing the source code, > it is easy for us to know that it is the end of CONFIG_IPV6. > Just my 2 cent suggestions. It is a trivial problem. You can ignore it. > But if you fix it, it can make the source code more readable. > > Zhu Yanjun I really like the style you said, I will use it in the next version. Best wishes, D. Wythe > >> + >> +int smc_inet_init_sock(struct sock *sk); >> + >> +#endif /* __INET_SMC */
On 5/29/24 7:58 PM, Wenjia Zhang wrote: > > > On 29.05.24 05:59, D. Wythe wrote: >> From: "D. Wythe" <alibuda@linux.alibaba.com> >> >> This patch allows to create smc socket via AF_INET, >> similar to the following code, >> >> /* create v4 smc sock */ >> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); >> >> /* create v6 smc sock */ >> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); >> >> There are several reasons why we believe it is appropriate here: >> >> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) >> address. There is no AF_SMC address at all. >> >> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse >> the infrastructure of AF_INET(6) path, such as common ebpf hooks. >> Otherwise, smc have to implement it again in AF_SMC path. >> >> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> >> --- >> include/uapi/linux/in.h | 2 + >> net/smc/Makefile | 2 +- >> net/smc/af_smc.c | 36 ++++++++++++++++ >> net/smc/inet_smc.c | 108 >> ++++++++++++++++++++++++++++++++++++++++++++++++ >> net/smc/inet_smc.h | 34 +++++++++++++++ >> 5 files changed, 181 insertions(+), 1 deletion(-) >> create mode 100644 net/smc/inet_smc.c >> create mode 100644 net/smc/inet_smc.h >> >> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h >> index e682ab6..0c6322b 100644 >> --- a/include/uapi/linux/in.h >> +++ b/include/uapi/linux/in.h >> @@ -83,6 +83,8 @@ enum { >> #define IPPROTO_RAW IPPROTO_RAW >> IPPROTO_MPTCP = 262, /* Multipath TCP connection */ >> #define IPPROTO_MPTCP IPPROTO_MPTCP >> + IPPROTO_SMC = 263, /* Shared Memory Communications */ >> +#define IPPROTO_SMC IPPROTO_SMC >> IPPROTO_MAX >> }; >> #endif >> diff --git a/net/smc/Makefile b/net/smc/Makefile >> index 2c510d54..472b9ee 100644 >> --- a/net/smc/Makefile >> +++ b/net/smc/Makefile >> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o >> obj-$(CONFIG_SMC_DIAG) += smc_diag.o >> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o >> smc_llc.o >> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o >> smc_netlink.o smc_stats.o >> -smc-y += smc_tracepoint.o >> +smc-y += smc_tracepoint.o inet_smc.o >> smc-$(CONFIG_SYSCTL) += smc_sysctl.o >> smc-$(CONFIG_SMC_LO) += smc_loopback.o >> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c >> index 8e3ce76..320624c 100644 >> --- a/net/smc/af_smc.c >> +++ b/net/smc/af_smc.c >> @@ -54,6 +54,7 @@ >> #include "smc_tracepoint.h" >> #include "smc_sysctl.h" >> #include "smc_loopback.h" >> +#include "inet_smc.h" >> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link >> group >> * creation on server >> @@ -3594,9 +3595,31 @@ static int __init smc_init(void) >> goto out_lo; >> } >> + rc = proto_register(&smc_inet_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", >> __func__, rc); >> + goto out_ulp; >> + } >> + inet_register_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + rc = proto_register(&smc_inet6_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", >> __func__, rc); >> + goto out_inet_prot; >> + } >> + inet6_register_protosw(&smc_inet6_protosw); > > Comparing to inet_register_protosw(), the inet6_register_protosw() > returns an integer. Thus, making error check and direct corresponding > housekeeping here looks IMO much cleaner. > Oops... I was under the impression that it had no return. In the prior RFC, I even commented that it had no return. Quite the oversight on my part. >> +#endif >> + >> static_branch_enable(&tcp_have_smc); >> return 0; >> +#if IS_ENABLED(CONFIG_IPV6) >> +out_inet_prot: >> + inet_unregister_protosw(&smc_inet_protosw); >> + proto_unregister(&smc_inet_prot); >> +#endif >> +out_ulp: >> + tcp_unregister_ulp(&smc_ulp_ops); >> out_lo: >> smc_loopback_exit(); >> out_ib: >> @@ -3633,6 +3656,10 @@ static int __init smc_init(void) >> static void __exit smc_exit(void) >> { >> static_branch_disable(&tcp_have_smc); >> + inet_unregister_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + inet6_unregister_protosw(&smc_inet6_protosw); >> +#endif >> tcp_unregister_ulp(&smc_ulp_ops); >> sock_unregister(PF_SMC); >> smc_core_exit(); >> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) >> destroy_workqueue(smc_hs_wq); >> proto_unregister(&smc_proto6); >> proto_unregister(&smc_proto); >> + proto_unregister(&smc_inet_prot); >> +#if IS_ENABLED(CONFIG_IPV6) >> + proto_unregister(&smc_inet6_prot); >> +#end > > Since there is already inet_smc.c, I'd recommend to group these > register and unregister stuff respectively in functions like e.g. > smc_inet_init() and smc_inet_exit() in inet_smc.c > Agreed, I also see similar opinions from the community, and I will improve it in the next version. >> smc_pnet_exit(); >> smc_nl_exit(); >> smc_clc_exit(); >> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) >> MODULE_LICENSE("GPL"); >> MODULE_ALIAS_NETPROTO(PF_SMC); >> MODULE_ALIAS_TCP_ULP("smc"); >> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); >> +#if IS_ENABLED(CONFIG_IPV6) >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); >> +#endif >> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); >> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c >> new file mode 100644 >> index 00000000..1ba73d7 >> --- /dev/null >> +++ b/net/smc/inet_smc.c > > In order to keep the consistency with the structure and function names > in the files, I'm wondering why not to use smc_inet.h and smc_inet.c > instead of inet_smc.h and inet_smc.c respectively That's because I am trying to keep the name style to be consistent with af_smc.c. But I don't insist on this, using smc_inet is also good for me. Thanks, D. Wythe > >> @@ -0,0 +1,108 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + * >> + * Copyright IBM Corp. 2016, 2018 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> + >> +#include "inet_smc.h" >> +#include "smc.h" >> + >> +struct proto smc_inet_prot = { >> + .name = "INET_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v4_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +const struct proto_ops smc_inet_stream_ops = { >> + .family = PF_INET, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +struct inet_protosw smc_inet_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet_prot, >> + .ops = &smc_inet_stream_ops, >> + .flags = INET_PROTOSW_ICSK, >> +}; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> +struct proto smc_inet6_prot = { >> + .name = "INET6_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v6_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +const struct proto_ops smc_inet6_stream_ops = { >> + .family = PF_INET6, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +struct inet_protosw smc_inet6_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet6_prot, >> + .ops = &smc_inet6_stream_ops, >> + .flags = INET_PROTOSW_ICSK, >> +}; >> +#endif >> + >> +int smc_inet_init_sock(struct sock *sk) >> +{ >> + struct net *net = sock_net(sk); >> + >> + /* init common smc sock */ >> + smc_sk_init(net, sk, IPPROTO_SMC); >> + /* create clcsock */ >> + return smc_create_clcsk(net, sk, sk->sk_family); >> +} >> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h >> new file mode 100644 >> index 00000000..c55345d >> --- /dev/null >> +++ b/net/smc/inet_smc.h >> @@ -0,0 +1,34 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + >> + * Copyright IBM Corp. 2016 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> +#ifndef __INET_SMC >> +#define __INET_SMC >> + >> +#include <net/protocol.h> >> +#include <net/sock.h> >> +#include <net/tcp.h> >> + >> +extern struct proto smc_inet_prot; >> +extern const struct proto_ops smc_inet_stream_ops; >> +extern struct inet_protosw smc_inet_protosw; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> +#include <net/ipv6.h> >> +/* MUST after net/tcp.h or warning */ >> +#include <net/transp_v6.h> >> +extern struct proto smc_inet6_prot; >> +extern const struct proto_ops smc_inet6_stream_ops; >> +extern struct inet_protosw smc_inet6_protosw; >> +#endif >> + >> +int smc_inet_init_sock(struct sock *sk); >> + >> +#endif /* __INET_SMC */
On 5/29/24 7:12 PM, Dust Li wrote: > On 2024-05-29 11:59:07, D. Wythe wrote: >> From: "D. Wythe" <alibuda@linux.alibaba.com> >> >> This patch allows to create smc socket via AF_INET, >> similar to the following code, >> >> /* create v4 smc sock */ >> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); >> >> /* create v6 smc sock */ >> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); >> >> There are several reasons why we believe it is appropriate here: >> >> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) >> address. There is no AF_SMC address at all. >> >> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse >> the infrastructure of AF_INET(6) path, such as common ebpf hooks. >> Otherwise, smc have to implement it again in AF_SMC path. >> >> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> >> --- >> include/uapi/linux/in.h | 2 + >> net/smc/Makefile | 2 +- >> net/smc/af_smc.c | 36 ++++++++++++++++ >> net/smc/inet_smc.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++ >> net/smc/inet_smc.h | 34 +++++++++++++++ >> 5 files changed, 181 insertions(+), 1 deletion(-) >> create mode 100644 net/smc/inet_smc.c >> create mode 100644 net/smc/inet_smc.h >> >> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h >> index e682ab6..0c6322b 100644 >> --- a/include/uapi/linux/in.h >> +++ b/include/uapi/linux/in.h >> @@ -83,6 +83,8 @@ enum { >> #define IPPROTO_RAW IPPROTO_RAW >> IPPROTO_MPTCP = 262, /* Multipath TCP connection */ >> #define IPPROTO_MPTCP IPPROTO_MPTCP >> + IPPROTO_SMC = 263, /* Shared Memory Communications */ >> +#define IPPROTO_SMC IPPROTO_SMC >> IPPROTO_MAX >> }; >> #endif >> diff --git a/net/smc/Makefile b/net/smc/Makefile >> index 2c510d54..472b9ee 100644 >> --- a/net/smc/Makefile >> +++ b/net/smc/Makefile >> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o >> obj-$(CONFIG_SMC_DIAG) += smc_diag.o >> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o >> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o >> -smc-y += smc_tracepoint.o >> +smc-y += smc_tracepoint.o inet_smc.o >> smc-$(CONFIG_SYSCTL) += smc_sysctl.o >> smc-$(CONFIG_SMC_LO) += smc_loopback.o >> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c >> index 8e3ce76..320624c 100644 >> --- a/net/smc/af_smc.c >> +++ b/net/smc/af_smc.c >> @@ -54,6 +54,7 @@ >> #include "smc_tracepoint.h" >> #include "smc_sysctl.h" >> #include "smc_loopback.h" >> +#include "inet_smc.h" >> >> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group >> * creation on server >> @@ -3594,9 +3595,31 @@ static int __init smc_init(void) >> goto out_lo; >> } >> >> + rc = proto_register(&smc_inet_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); >> + goto out_ulp; >> + } >> + inet_register_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + rc = proto_register(&smc_inet6_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); >> + goto out_inet_prot; >> + } >> + inet6_register_protosw(&smc_inet6_protosw); >> +#endif >> + > What do you think of moving all those inet initialization code into > something like smc_inet_init() and move it to smc_inet.c ? > Agreed. >> static_branch_enable(&tcp_have_smc); >> return 0; >> >> +#if IS_ENABLED(CONFIG_IPV6) >> +out_inet_prot: >> + inet_unregister_protosw(&smc_inet_protosw); >> + proto_unregister(&smc_inet_prot); >> +#endif >> +out_ulp: >> + tcp_unregister_ulp(&smc_ulp_ops); >> out_lo: >> smc_loopback_exit(); >> out_ib: >> @@ -3633,6 +3656,10 @@ static int __init smc_init(void) >> static void __exit smc_exit(void) >> { >> static_branch_disable(&tcp_have_smc); >> + inet_unregister_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + inet6_unregister_protosw(&smc_inet6_protosw); >> +#endif >> tcp_unregister_ulp(&smc_ulp_ops); >> sock_unregister(PF_SMC); >> smc_core_exit(); >> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) >> destroy_workqueue(smc_hs_wq); >> proto_unregister(&smc_proto6); >> proto_unregister(&smc_proto); >> + proto_unregister(&smc_inet_prot); >> +#if IS_ENABLED(CONFIG_IPV6) >> + proto_unregister(&smc_inet6_prot); >> +#endif >> smc_pnet_exit(); >> smc_nl_exit(); >> smc_clc_exit(); >> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) >> MODULE_LICENSE("GPL"); >> MODULE_ALIAS_NETPROTO(PF_SMC); >> MODULE_ALIAS_TCP_ULP("smc"); >> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); >> +#if IS_ENABLED(CONFIG_IPV6) >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); >> +#endif >> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); >> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c >> new file mode 100644 >> index 00000000..1ba73d7 >> --- /dev/null >> +++ b/net/smc/inet_smc.c >> @@ -0,0 +1,108 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + * >> + * Copyright IBM Corp. 2016, 2018 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> + >> +#include "inet_smc.h" >> +#include "smc.h" >> + >> +struct proto smc_inet_prot = { >> + .name = "INET_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v4_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +const struct proto_ops smc_inet_stream_ops = { >> + .family = PF_INET, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +struct inet_protosw smc_inet_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet_prot, >> + .ops = &smc_inet_stream_ops, >> + .flags = INET_PROTOSW_ICSK, >> +}; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> +struct proto smc_inet6_prot = { >> + .name = "INET6_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v6_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +const struct proto_ops smc_inet6_stream_ops = { >> + .family = PF_INET6, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +struct inet_protosw smc_inet6_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet6_prot, >> + .ops = &smc_inet6_stream_ops, >> + .flags = INET_PROTOSW_ICSK, >> +}; >> +#endif >> + >> +int smc_inet_init_sock(struct sock *sk) >> +{ >> + struct net *net = sock_net(sk); >> + >> + /* init common smc sock */ >> + smc_sk_init(net, sk, IPPROTO_SMC); >> + /* create clcsock */ >> + return smc_create_clcsk(net, sk, sk->sk_family); >> +} >> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h >> new file mode 100644 >> index 00000000..c55345d >> --- /dev/null >> +++ b/net/smc/inet_smc.h >> @@ -0,0 +1,34 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + >> + * Copyright IBM Corp. 2016 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> +#ifndef __INET_SMC >> +#define __INET_SMC >> + >> +#include <net/protocol.h> >> +#include <net/sock.h> >> +#include <net/tcp.h> > Why not put those 'include's in the .c file ? Agreed. But I think that <net/protocol. h> is needed to ensure that the header file itself is complete. >> + >> +extern struct proto smc_inet_prot; >> +extern const struct proto_ops smc_inet_stream_ops; >> +extern struct inet_protosw smc_inet_protosw; >> + >> +#if IS_ENABLED(CONFIG_IPV6) >> +#include <net/ipv6.h> >> +/* MUST after net/tcp.h or warning */ >> +#include <net/transp_v6.h> >> +extern struct proto smc_inet6_prot; >> +extern const struct proto_ops smc_inet6_stream_ops; >> +extern struct inet_protosw smc_inet6_protosw; >> +#endif >> + >> +int smc_inet_init_sock(struct sock *sk); > Seems smc_inet_init_sock() is only used in smc_inet.c, > why not defined it as a static function ? > > Best regards, > Dust That's true, I will fix it. Best wishes, D. Wythe >> + >> +#endif /* __INET_SMC */ >> -- >> 1.8.3.1 >>
On Wed, May 29, 2024 at 11:59:07AM +0800, D. Wythe wrote: > From: "D. Wythe" <alibuda@linux.alibaba.com> > > This patch allows to create smc socket via AF_INET, > similar to the following code, > > /* create v4 smc sock */ > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > > /* create v6 smc sock */ > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > > There are several reasons why we believe it is appropriate here: > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) > address. There is no AF_SMC address at all. > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse > the infrastructure of AF_INET(6) path, such as common ebpf hooks. > Otherwise, smc have to implement it again in AF_SMC path. > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> ... > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c ... > @@ -3594,9 +3595,31 @@ static int __init smc_init(void) > goto out_lo; > } > > + rc = proto_register(&smc_inet_prot, 1); > + if (rc) { > + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); Hi, FWIIW, my feeling is that if a log message includes __func__ then it should be a debug level message, and even then I'm dubious about the value of __func__: we do have many tools including dynamic tracing or pinpointing problems. So I would suggest rephrasing this message and dropping __func__. Or maybe removing it entirely. Or if not, lowering the priority of this message to debug. If for some reason __func__ remains, please do consider wrapping the line to 80c columns or less, which can be trivially done here (please don't split the format string in any case). Flagged by checkpatch.pl --max-line-length=80 > + goto out_ulp; > + } > + inet_register_protosw(&smc_inet_protosw); > +#if IS_ENABLED(CONFIG_IPV6) > + rc = proto_register(&smc_inet6_prot, 1); > + if (rc) { > + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); Here too. > + goto out_inet_prot; > + } > + inet6_register_protosw(&smc_inet6_protosw); > +#endif ...
On 6/1/24 9:06 PM, Simon Horman wrote: > On Wed, May 29, 2024 at 11:59:07AM +0800, D. Wythe wrote: >> From: "D. Wythe" <alibuda@linux.alibaba.com> >> >> This patch allows to create smc socket via AF_INET, >> similar to the following code, >> >> /* create v4 smc sock */ >> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); >> >> /* create v6 smc sock */ >> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); >> >> There are several reasons why we believe it is appropriate here: >> >> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) >> address. There is no AF_SMC address at all. >> >> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse >> the infrastructure of AF_INET(6) path, such as common ebpf hooks. >> Otherwise, smc have to implement it again in AF_SMC path. >> >> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> > ... > >> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > ... > >> @@ -3594,9 +3595,31 @@ static int __init smc_init(void) >> goto out_lo; >> } >> >> + rc = proto_register(&smc_inet_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); > Hi, > > FWIIW, my feeling is that if a log message includes __func__ then it should > be a debug level message, and even then I'm dubious about the value of > __func__: we do have many tools including dynamic tracing or pinpointing > problems. > > So I would suggest rephrasing this message and dropping __func__. > Or maybe removing it entirely. > Or if not, lowering the priority of this message to debug. > > If for some reason __func__ remains, please do consider wrapping > the line to 80c columns or less, which can be trivially done here > (please don't split the format string in any case). > > Flagged by checkpatch.pl --max-line-length=80 Hi Simon, Thank you very much for your feedback. Allow me to briefly explain the reasons for using pr_err and __func__ here. Regarding pr_err, the failure here leads to the failure of the module loading, which is definitely an error-level message rather than a debug-level one. As for __func__, I must admit that the purpose here is simply to align with the format of other error messages in smc_init(). In fact, I also feel that the presence of __func__ doesn't hold significant value because this error will only occur within this function. It's meaningless information for both users and kernel developers. Perhaps a more suitable format would be “smc: xxx: %d”. However, if changes are needed, I think they should be made across the board in order to maintain a consistent style. Maybe this can be addressed by submitting a new patch after this patch. @Wenjia, what do you think? Therefore, for now, I would like to wrap this line to not exceed 80 characters, to ensure it can pass the checkpatch.pl. What do you think? Best wishes, D. Wythe > >> + goto out_ulp; >> + } >> + inet_register_protosw(&smc_inet_protosw); >> +#if IS_ENABLED(CONFIG_IPV6) >> + rc = proto_register(&smc_inet6_prot, 1); >> + if (rc) { >> + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); > Here too. > >> + goto out_inet_prot; >> + } >> + inet6_register_protosw(&smc_inet6_protosw); >> +#endif > ...
On Mon, Jun 03, 2024 at 10:57:55AM +0800, D. Wythe wrote: > > > On 6/1/24 9:06 PM, Simon Horman wrote: > > On Wed, May 29, 2024 at 11:59:07AM +0800, D. Wythe wrote: > > > From: "D. Wythe" <alibuda@linux.alibaba.com> > > > > > > This patch allows to create smc socket via AF_INET, > > > similar to the following code, > > > > > > /* create v4 smc sock */ > > > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > > > > > > /* create v6 smc sock */ > > > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > > > > > > There are several reasons why we believe it is appropriate here: > > > > > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) > > > address. There is no AF_SMC address at all. > > > > > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse > > > the infrastructure of AF_INET(6) path, such as common ebpf hooks. > > > Otherwise, smc have to implement it again in AF_SMC path. > > > > > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> > > ... > > > > > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > > ... > > > > > @@ -3594,9 +3595,31 @@ static int __init smc_init(void) > > > goto out_lo; > > > } > > > + rc = proto_register(&smc_inet_prot, 1); > > > + if (rc) { > > > + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); > > Hi, > > > > FWIIW, my feeling is that if a log message includes __func__ then it should > > be a debug level message, and even then I'm dubious about the value of > > __func__: we do have many tools including dynamic tracing or pinpointing > > problems. > > > > So I would suggest rephrasing this message and dropping __func__. > > Or maybe removing it entirely. > > Or if not, lowering the priority of this message to debug. > > > > If for some reason __func__ remains, please do consider wrapping > > the line to 80c columns or less, which can be trivially done here > > (please don't split the format string in any case). > > > > Flagged by checkpatch.pl --max-line-length=80 > > > Hi Simon, > > Thank you very much for your feedback. > > Allow me to briefly explain the reasons for using pr_err and __func__ here. > > Regarding pr_err, the failure here leads to the failure of the module > loading, which is definitely an error-level message rather than a > debug-level one. > > As for __func__, I must admit that the purpose here is simply to align with > the format of other error messages in smc_init(). In fact, I also feel that > the presence of > __func__ doesn't hold significant value because this error will only occur > within this function. It's meaningless information for both users and kernel > developers. > Perhaps a more suitable format would be “smc: xxx: %d”. > > However, if changes are needed, I think they should be made across the board > in order to maintain a consistent style. Maybe this can be addressed by > submitting a new patch after this patch. @Wenjia, what do you think? > > Therefore, for now, I would like to wrap this line to not exceed 80 > characters, to ensure it can pass the checkpatch.pl. > What do you think? Thanks, I agree with your reasoning. And I think this is a good approach for this patch. > > Best wishes, > D. Wythe > > > > > > + goto out_ulp; > > > + } > > > + inet_register_protosw(&smc_inet_protosw); > > > +#if IS_ENABLED(CONFIG_IPV6) > > > + rc = proto_register(&smc_inet6_prot, 1); > > > + if (rc) { > > > + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); > > Here too. > > > > > + goto out_inet_prot; > > > + } > > > + inet6_register_protosw(&smc_inet6_protosw); > > > +#endif > > ... >
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab6..0c6322b 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -83,6 +83,8 @@ enum { #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP + IPPROTO_SMC = 263, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MAX }; #endif diff --git a/net/smc/Makefile b/net/smc/Makefile index 2c510d54..472b9ee 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o inet_smc.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o smc-$(CONFIG_SMC_LO) += smc_loopback.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8e3ce76..320624c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -54,6 +54,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_loopback.h" +#include "inet_smc.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3594,9 +3595,31 @@ static int __init smc_init(void) goto out_lo; } + rc = proto_register(&smc_inet_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc); + goto out_ulp; + } + inet_register_protosw(&smc_inet_protosw); +#if IS_ENABLED(CONFIG_IPV6) + rc = proto_register(&smc_inet6_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc); + goto out_inet_prot; + } + inet6_register_protosw(&smc_inet6_protosw); +#endif + static_branch_enable(&tcp_have_smc); return 0; +#if IS_ENABLED(CONFIG_IPV6) +out_inet_prot: + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +#endif +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); out_lo: smc_loopback_exit(); out_ib: @@ -3633,6 +3656,10 @@ static int __init smc_init(void) static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + inet_unregister_protosw(&smc_inet_protosw); +#if IS_ENABLED(CONFIG_IPV6) + inet6_unregister_protosw(&smc_inet6_protosw); +#endif tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void) destroy_workqueue(smc_hs_wq); proto_unregister(&smc_proto6); proto_unregister(&smc_proto); + proto_unregister(&smc_inet_prot); +#if IS_ENABLED(CONFIG_IPV6) + proto_unregister(&smc_inet6_prot); +#endif smc_pnet_exit(); smc_nl_exit(); smc_clc_exit(); @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void) MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1); +#endif MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c new file mode 100644 index 00000000..1ba73d7 --- /dev/null +++ b/net/smc/inet_smc.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + * + * Copyright IBM Corp. 2016, 2018 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe <alibuda@linux.alibaba.com> + */ + +#include "inet_smc.h" +#include "smc.h" + +struct proto smc_inet_prot = { + .name = "INET_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v4_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +const struct proto_ops smc_inet_stream_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +struct inet_protosw smc_inet_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet_prot, + .ops = &smc_inet_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +#if IS_ENABLED(CONFIG_IPV6) +struct proto smc_inet6_prot = { + .name = "INET6_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +const struct proto_ops smc_inet6_stream_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +struct inet_protosw smc_inet6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet6_prot, + .ops = &smc_inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; +#endif + +int smc_inet_init_sock(struct sock *sk) +{ + struct net *net = sock_net(sk); + + /* init common smc sock */ + smc_sk_init(net, sk, IPPROTO_SMC); + /* create clcsock */ + return smc_create_clcsk(net, sk, sk->sk_family); +} diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h new file mode 100644 index 00000000..c55345d --- /dev/null +++ b/net/smc/inet_smc.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + + * Copyright IBM Corp. 2016 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe <alibuda@linux.alibaba.com> + */ +#ifndef __INET_SMC +#define __INET_SMC + +#include <net/protocol.h> +#include <net/sock.h> +#include <net/tcp.h> + +extern struct proto smc_inet_prot; +extern const struct proto_ops smc_inet_stream_ops; +extern struct inet_protosw smc_inet_protosw; + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +/* MUST after net/tcp.h or warning */ +#include <net/transp_v6.h> +extern struct proto smc_inet6_prot; +extern const struct proto_ops smc_inet6_stream_ops; +extern struct inet_protosw smc_inet6_protosw; +#endif + +int smc_inet_init_sock(struct sock *sk); + +#endif /* __INET_SMC */