Message ID | 1718301630-63692-4-git-send-email-alibuda@linux.alibaba.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | Introduce IPPROTO_SMC | expand |
On Thu, Jun 13, 2024 at 8:00 PM D. Wythe <alibuda@linux.alibaba.com> wrote: > > From: "D. Wythe" <alibuda@linux.alibaba.com> > > This patch allows to create smc socket via AF_INET, > similar to the following code, > > /* create v4 smc sock */ > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); > > /* create v6 smc sock */ > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); > > There are several reasons why we believe it is appropriate here: > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) > address. There is no AF_SMC address at all. > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse > the infrastructure of AF_INET(6) path, such as common ebpf hooks. > Otherwise, smc have to implement it again in AF_SMC path. > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> > Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> > Reviewed-by: Dust Li <dust.li@linux.alibaba.com> > Tested-by: Niklas Schnelle <schnelle@linux.ibm.com> > Tested-by: Wenjia Zhang <wenjia@linux.ibm.com> > --- > include/uapi/linux/in.h | 2 + > net/smc/Makefile | 2 +- > net/smc/af_smc.c | 16 ++++- > net/smc/smc_inet.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ > net/smc/smc_inet.h | 22 +++++++ > 5 files changed, 198 insertions(+), 3 deletions(-) > create mode 100644 net/smc/smc_inet.c > create mode 100644 net/smc/smc_inet.h > > diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h > index e682ab6..d358add 100644 > --- a/include/uapi/linux/in.h > +++ b/include/uapi/linux/in.h > @@ -81,6 +81,8 @@ enum { > #define IPPROTO_ETHERNET IPPROTO_ETHERNET > IPPROTO_RAW = 255, /* Raw IP packets */ > #define IPPROTO_RAW IPPROTO_RAW > + IPPROTO_SMC = 256, /* Shared Memory Communications */ > +#define IPPROTO_SMC IPPROTO_SMC > IPPROTO_MPTCP = 262, /* Multipath TCP connection */ > #define IPPROTO_MPTCP IPPROTO_MPTCP > IPPROTO_MAX > diff --git a/net/smc/Makefile b/net/smc/Makefile > index 2c510d54..60f1c87 100644 > --- a/net/smc/Makefile > +++ b/net/smc/Makefile > @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o > obj-$(CONFIG_SMC_DIAG) += smc_diag.o > smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o > smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o > -smc-y += smc_tracepoint.o > +smc-y += smc_tracepoint.o smc_inet.o > smc-$(CONFIG_SYSCTL) += smc_sysctl.o > smc-$(CONFIG_SMC_LO) += smc_loopback.o > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c > index 8e3ce76..435f38b 100644 > --- a/net/smc/af_smc.c > +++ b/net/smc/af_smc.c > @@ -54,6 +54,7 @@ > #include "smc_tracepoint.h" > #include "smc_sysctl.h" > #include "smc_loopback.h" > +#include "smc_inet.h" > > static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group > * creation on server > @@ -3593,10 +3594,15 @@ static int __init smc_init(void) > pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); > goto out_lo; > } > - > + rc = smc_inet_init(); > + if (rc) { > + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); > + goto out_ulp; > + } > static_branch_enable(&tcp_have_smc); > return 0; > - > +out_ulp: > + tcp_unregister_ulp(&smc_ulp_ops); > out_lo: > smc_loopback_exit(); > out_ib: > @@ -3633,6 +3639,7 @@ static int __init smc_init(void) > static void __exit smc_exit(void) > { > static_branch_disable(&tcp_have_smc); > + smc_inet_exit(); > tcp_unregister_ulp(&smc_ulp_ops); > sock_unregister(PF_SMC); > smc_core_exit(); > @@ -3660,4 +3667,9 @@ static void __exit smc_exit(void) > MODULE_LICENSE("GPL"); > MODULE_ALIAS_NETPROTO(PF_SMC); > MODULE_ALIAS_TCP_ULP("smc"); > +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); > +#if IS_ENABLED(CONFIG_IPV6) > +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); > +#endif /* CONFIG_IPV6 */ > MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); > diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c > new file mode 100644 > index 00000000..bece346 > --- /dev/null > +++ b/net/smc/smc_inet.c > @@ -0,0 +1,159 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Shared Memory Communications over RDMA (SMC-R) and RoCE > + * > + * Definitions for the IPPROTO_SMC (socket related) > + * > + * Copyright IBM Corp. 2016, 2018 > + * Copyright (c) 2024, Alibaba Inc. > + * > + * Author: D. Wythe <alibuda@linux.alibaba.com> > + */ > + > +#include <net/protocol.h> > +#include <net/sock.h> > + > +#include "smc_inet.h" > +#include "smc.h" > + > +static int smc_inet_init_sock(struct sock *sk); > + > +static struct proto smc_inet_prot = { > + .name = "INET_SMC", > + .owner = THIS_MODULE, > + .init = smc_inet_init_sock, > + .hash = smc_hash_sk, > + .unhash = smc_unhash_sk, > + .release_cb = smc_release_cb, > + .obj_size = sizeof(struct smc_sock), > + .h.smc_hash = &smc_v4_hashinfo, > + .slab_flags = SLAB_TYPESAFE_BY_RCU, > +}; > + > +static const struct proto_ops smc_inet_stream_ops = { > + .family = PF_INET, > + .owner = THIS_MODULE, > + .release = smc_release, > + .bind = smc_bind, > + .connect = smc_connect, > + .socketpair = sock_no_socketpair, > + .accept = smc_accept, > + .getname = smc_getname, > + .poll = smc_poll, > + .ioctl = smc_ioctl, > + .listen = smc_listen, > + .shutdown = smc_shutdown, > + .setsockopt = smc_setsockopt, > + .getsockopt = smc_getsockopt, > + .sendmsg = smc_sendmsg, > + .recvmsg = smc_recvmsg, > + .mmap = sock_no_mmap, > + .splice_read = smc_splice_read, > +}; > + > +static struct inet_protosw smc_inet_protosw = { > + .type = SOCK_STREAM, > + .protocol = IPPROTO_SMC, > + .prot = &smc_inet_prot, > + .ops = &smc_inet_stream_ops, > + .flags = INET_PROTOSW_ICSK, When this flag is set, icsk->icsk_sync_mss must be set. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000005 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000001195d1000 [0000000000000000] pgd=0800000109c46003, p4d=0800000109c46003, pud=0000000000000000 Internal error: Oops: 0000000086000005 [#1] PREEMPT SMP Modules linked in: CPU: 1 UID: 0 PID: 8037 Comm: syz.3.265 Not tainted 6.11.0-rc7-syzkaller-g5f5673607153 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : cipso_v4_sock_setattr+0x2a8/0x3c0 net/ipv4/cipso_ipv4.c:1910 sp : ffff80009b887a90 x29: ffff80009b887aa0 x28: ffff80008db94050 x27: 0000000000000000 x26: 1fffe0001aa6f5b3 x25: dfff800000000000 x24: ffff0000db75da00 x23: 0000000000000000 x22: ffff0000d8b78518 x21: 0000000000000000 x20: ffff0000d537ad80 x19: ffff0000d8b78000 x18: 1fffe000366d79ee x17: ffff8000800614a8 x16: ffff800080569b84 x15: 0000000000000001 x14: 000000008b336894 x13: 00000000cd96feaa x12: 0000000000000003 x11: 0000000000040000 x10: 00000000000020a3 x9 : 1fffe0001b16f0f1 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f x5 : 0000000000000040 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000002 x1 : 0000000000000000 x0 : ffff0000d8b78000 Call trace: 0x0 netlbl_sock_setattr+0x2e4/0x338 net/netlabel/netlabel_kapi.c:1000 smack_netlbl_add+0xa4/0x154 security/smack/smack_lsm.c:2593 smack_socket_post_create+0xa8/0x14c security/smack/smack_lsm.c:2973 security_socket_post_create+0x94/0xd4 security/security.c:4425 __sock_create+0x4c8/0x884 net/socket.c:1587 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x134/0x340 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1718 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Code: ???????? ???????? ???????? ???????? (????????) ---[ end trace 0000000000000000 ]---
On 9/27/24 10:56 PM, Eric Dumazet wrote: > On Thu, Jun 13, 2024 at 8:00 PM D. Wythe <alibuda@linux.alibaba.com> wrote: >> >> From: "D. Wythe" <alibuda@linux.alibaba.com> >> >> This patch allows to create smc socket via AF_INET, >> similar to the following code, >> >> /* create v4 smc sock */ >> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); >> >> /* create v6 smc sock */ >> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); >> >> There are several reasons why we believe it is appropriate here: >> >> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) >> address. There is no AF_SMC address at all. >> >> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse >> the infrastructure of AF_INET(6) path, such as common ebpf hooks. >> Otherwise, smc have to implement it again in AF_SMC path. >> >> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com> >> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com> >> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> >> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com> >> Tested-by: Wenjia Zhang <wenjia@linux.ibm.com> >> --- >> include/uapi/linux/in.h | 2 + >> net/smc/Makefile | 2 +- >> net/smc/af_smc.c | 16 ++++- >> net/smc/smc_inet.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ >> net/smc/smc_inet.h | 22 +++++++ >> 5 files changed, 198 insertions(+), 3 deletions(-) >> create mode 100644 net/smc/smc_inet.c >> create mode 100644 net/smc/smc_inet.h >> >> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h >> index e682ab6..d358add 100644 >> --- a/include/uapi/linux/in.h >> +++ b/include/uapi/linux/in.h >> @@ -81,6 +81,8 @@ enum { >> #define IPPROTO_ETHERNET IPPROTO_ETHERNET >> IPPROTO_RAW = 255, /* Raw IP packets */ >> #define IPPROTO_RAW IPPROTO_RAW >> + IPPROTO_SMC = 256, /* Shared Memory Communications */ >> +#define IPPROTO_SMC IPPROTO_SMC >> IPPROTO_MPTCP = 262, /* Multipath TCP connection */ >> #define IPPROTO_MPTCP IPPROTO_MPTCP >> IPPROTO_MAX >> diff --git a/net/smc/Makefile b/net/smc/Makefile >> index 2c510d54..60f1c87 100644 >> --- a/net/smc/Makefile >> +++ b/net/smc/Makefile >> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o >> obj-$(CONFIG_SMC_DIAG) += smc_diag.o >> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o >> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o >> -smc-y += smc_tracepoint.o >> +smc-y += smc_tracepoint.o smc_inet.o >> smc-$(CONFIG_SYSCTL) += smc_sysctl.o >> smc-$(CONFIG_SMC_LO) += smc_loopback.o >> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c >> index 8e3ce76..435f38b 100644 >> --- a/net/smc/af_smc.c >> +++ b/net/smc/af_smc.c >> @@ -54,6 +54,7 @@ >> #include "smc_tracepoint.h" >> #include "smc_sysctl.h" >> #include "smc_loopback.h" >> +#include "smc_inet.h" >> >> static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group >> * creation on server >> @@ -3593,10 +3594,15 @@ static int __init smc_init(void) >> pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); >> goto out_lo; >> } >> - >> + rc = smc_inet_init(); >> + if (rc) { >> + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); >> + goto out_ulp; >> + } >> static_branch_enable(&tcp_have_smc); >> return 0; >> - >> +out_ulp: >> + tcp_unregister_ulp(&smc_ulp_ops); >> out_lo: >> smc_loopback_exit(); >> out_ib: >> @@ -3633,6 +3639,7 @@ static int __init smc_init(void) >> static void __exit smc_exit(void) >> { >> static_branch_disable(&tcp_have_smc); >> + smc_inet_exit(); >> tcp_unregister_ulp(&smc_ulp_ops); >> sock_unregister(PF_SMC); >> smc_core_exit(); >> @@ -3660,4 +3667,9 @@ static void __exit smc_exit(void) >> MODULE_LICENSE("GPL"); >> MODULE_ALIAS_NETPROTO(PF_SMC); >> MODULE_ALIAS_TCP_ULP("smc"); >> +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); >> +#if IS_ENABLED(CONFIG_IPV6) >> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); >> +#endif /* CONFIG_IPV6 */ >> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); >> diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c >> new file mode 100644 >> index 00000000..bece346 >> --- /dev/null >> +++ b/net/smc/smc_inet.c >> @@ -0,0 +1,159 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Shared Memory Communications over RDMA (SMC-R) and RoCE >> + * >> + * Definitions for the IPPROTO_SMC (socket related) >> + * >> + * Copyright IBM Corp. 2016, 2018 >> + * Copyright (c) 2024, Alibaba Inc. >> + * >> + * Author: D. Wythe <alibuda@linux.alibaba.com> >> + */ >> + >> +#include <net/protocol.h> >> +#include <net/sock.h> >> + >> +#include "smc_inet.h" >> +#include "smc.h" >> + >> +static int smc_inet_init_sock(struct sock *sk); >> + >> +static struct proto smc_inet_prot = { >> + .name = "INET_SMC", >> + .owner = THIS_MODULE, >> + .init = smc_inet_init_sock, >> + .hash = smc_hash_sk, >> + .unhash = smc_unhash_sk, >> + .release_cb = smc_release_cb, >> + .obj_size = sizeof(struct smc_sock), >> + .h.smc_hash = &smc_v4_hashinfo, >> + .slab_flags = SLAB_TYPESAFE_BY_RCU, >> +}; >> + >> +static const struct proto_ops smc_inet_stream_ops = { >> + .family = PF_INET, >> + .owner = THIS_MODULE, >> + .release = smc_release, >> + .bind = smc_bind, >> + .connect = smc_connect, >> + .socketpair = sock_no_socketpair, >> + .accept = smc_accept, >> + .getname = smc_getname, >> + .poll = smc_poll, >> + .ioctl = smc_ioctl, >> + .listen = smc_listen, >> + .shutdown = smc_shutdown, >> + .setsockopt = smc_setsockopt, >> + .getsockopt = smc_getsockopt, >> + .sendmsg = smc_sendmsg, >> + .recvmsg = smc_recvmsg, >> + .mmap = sock_no_mmap, >> + .splice_read = smc_splice_read, >> +}; >> + >> +static struct inet_protosw smc_inet_protosw = { >> + .type = SOCK_STREAM, >> + .protocol = IPPROTO_SMC, >> + .prot = &smc_inet_prot, >> + .ops = &smc_inet_stream_ops, >> + .flags = INET_PROTOSW_ICSK, > > When this flag is set, icsk->icsk_sync_mss must be set. > Hi Eric, Thanks for your report. I will fix this issue ASAP. Best wishes, D. Wythe > Unable to handle kernel NULL pointer dereference at virtual address > 0000000000000000 > Mem abort info: > ESR = 0x0000000086000005 > EC = 0x21: IABT (current EL), IL = 32 bits > SET = 0, FnV = 0 > EA = 0, S1PTW = 0 > FSC = 0x05: level 1 translation fault > user pgtable: 4k pages, 48-bit VAs, pgdp=00000001195d1000 > [0000000000000000] pgd=0800000109c46003, p4d=0800000109c46003, > pud=0000000000000000 > Internal error: Oops: 0000000086000005 [#1] PREEMPT SMP > Modules linked in: > CPU: 1 UID: 0 PID: 8037 Comm: syz.3.265 Not tainted > 6.11.0-rc7-syzkaller-g5f5673607153 #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, > BIOS Google 08/06/2024 > pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > pc : 0x0 > lr : cipso_v4_sock_setattr+0x2a8/0x3c0 net/ipv4/cipso_ipv4.c:1910 > sp : ffff80009b887a90 > x29: ffff80009b887aa0 x28: ffff80008db94050 x27: 0000000000000000 > x26: 1fffe0001aa6f5b3 x25: dfff800000000000 x24: ffff0000db75da00 > x23: 0000000000000000 x22: ffff0000d8b78518 x21: 0000000000000000 > x20: ffff0000d537ad80 x19: ffff0000d8b78000 x18: 1fffe000366d79ee > x17: ffff8000800614a8 x16: ffff800080569b84 x15: 0000000000000001 > x14: 000000008b336894 x13: 00000000cd96feaa x12: 0000000000000003 > x11: 0000000000040000 x10: 00000000000020a3 x9 : 1fffe0001b16f0f1 > x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f > x5 : 0000000000000040 x4 : 0000000000000001 x3 : 0000000000000000 > x2 : 0000000000000002 x1 : 0000000000000000 x0 : ffff0000d8b78000 > Call trace: > 0x0 > netlbl_sock_setattr+0x2e4/0x338 net/netlabel/netlabel_kapi.c:1000 > smack_netlbl_add+0xa4/0x154 security/smack/smack_lsm.c:2593 > smack_socket_post_create+0xa8/0x14c security/smack/smack_lsm.c:2973 > security_socket_post_create+0x94/0xd4 security/security.c:4425 > __sock_create+0x4c8/0x884 net/socket.c:1587 > sock_create net/socket.c:1622 [inline] > __sys_socket_create net/socket.c:1659 [inline] > __sys_socket+0x134/0x340 net/socket.c:1706 > __do_sys_socket net/socket.c:1720 [inline] > __se_sys_socket net/socket.c:1718 [inline] > __arm64_sys_socket+0x7c/0x94 net/socket.c:1718 > __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] > invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 > el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 > do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 > el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 > el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 > el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 > Code: ???????? ???????? ???????? ???????? (????????) > ---[ end trace 0000000000000000 ]---
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab6..d358add 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/net/smc/Makefile b/net/smc/Makefile index 2c510d54..60f1c87 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o smc-$(CONFIG_SMC_LO) += smc_loopback.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8e3ce76..435f38b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -54,6 +54,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_loopback.h" +#include "smc_inet.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3593,10 +3594,15 @@ static int __init smc_init(void) pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); goto out_lo; } - + rc = smc_inet_init(); + if (rc) { + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); + goto out_ulp; + } static_branch_enable(&tcp_have_smc); return 0; - +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); out_lo: smc_loopback_exit(); out_ib: @@ -3633,6 +3639,7 @@ static int __init smc_init(void) static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + smc_inet_exit(); tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); @@ -3660,4 +3667,9 @@ static void __exit smc_exit(void) MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); +#endif /* CONFIG_IPV6 */ MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c new file mode 100644 index 00000000..bece346 --- /dev/null +++ b/net/smc/smc_inet.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + * + * Copyright IBM Corp. 2016, 2018 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe <alibuda@linux.alibaba.com> + */ + +#include <net/protocol.h> +#include <net/sock.h> + +#include "smc_inet.h" +#include "smc.h" + +static int smc_inet_init_sock(struct sock *sk); + +static struct proto smc_inet_prot = { + .name = "INET_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v4_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet_stream_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet_prot, + .ops = &smc_inet_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static struct proto smc_inet6_prot = { + .name = "INET6_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet6_stream_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet6_prot, + .ops = &smc_inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; +#endif /* CONFIG_IPV6 */ + +static int smc_inet_init_sock(struct sock *sk) +{ + struct net *net = sock_net(sk); + + /* init common smc sock */ + smc_sk_init(net, sk, IPPROTO_SMC); + /* create clcsock */ + return smc_create_clcsk(net, sk, sk->sk_family); +} + +int __init smc_inet_init(void) +{ + int rc; + + rc = proto_register(&smc_inet_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet_prot fails with %d\n", + __func__, rc); + return rc; + } + /* no return value */ + inet_register_protosw(&smc_inet_protosw); + +#if IS_ENABLED(CONFIG_IPV6) + rc = proto_register(&smc_inet6_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", + __func__, rc); + goto out_inet6_prot; + } + rc = inet6_register_protosw(&smc_inet6_protosw); + if (rc) { + pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n", + __func__, rc); + goto out_inet6_protosw; + } + return rc; +out_inet6_protosw: + proto_unregister(&smc_inet6_prot); +out_inet6_prot: + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +#endif /* CONFIG_IPV6 */ + return rc; +} + +void smc_inet_exit(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + inet6_unregister_protosw(&smc_inet6_protosw); + proto_unregister(&smc_inet6_prot); +#endif /* CONFIG_IPV6 */ + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +} diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h new file mode 100644 index 00000000..a489c8a --- /dev/null +++ b/net/smc/smc_inet.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + + * Copyright IBM Corp. 2016 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe <alibuda@linux.alibaba.com> + */ +#ifndef __INET_SMC +#define __INET_SMC + +/* Initialize protocol registration on IPPROTO_SMC, + * @return 0 on success + */ +int smc_inet_init(void); + +void smc_inet_exit(void); + +#endif /* __INET_SMC */