diff mbox series

[RFC,net-next,v2,1/5] net/smc: introduce SMC-D loopback device

Message ID 1671506505-104676-2-git-send-email-guwen@linux.alibaba.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series net/smc:Introduce SMC-D based loopback acceleration | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Wen Gu Dec. 20, 2022, 3:21 a.m. UTC
This patch introduces a kind of loopback device for SMC-D, thus
enabling the SMC communication between two local sockets in one
kernel.

The loopback device supports basic capabilities defined by SMC-D,
including registering DMB, unregistering DMB and moving data.

Considering that there is no ism device on other servers expect
IBM z13, the loopback device can be used as a dummy device to
test SMC-D logic for the broad community.

Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
 include/net/smc.h      |   1 +
 net/smc/Makefile       |   2 +-
 net/smc/af_smc.c       |  12 ++-
 net/smc/smc_cdc.c      |   6 ++
 net/smc/smc_cdc.h      |   1 +
 net/smc/smc_loopback.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_loopback.h |  59 +++++++++++
 7 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 net/smc/smc_loopback.c
 create mode 100644 net/smc/smc_loopback.h

Comments

Alexandra Winter Jan. 19, 2023, 4:25 p.m. UTC | #1
On 20.12.22 04:21, Wen Gu wrote:
> This patch introduces a kind of loopback device for SMC-D, thus
> enabling the SMC communication between two local sockets in one
> kernel.
> 
> The loopback device supports basic capabilities defined by SMC-D,
> including registering DMB, unregistering DMB and moving data.
> 
> Considering that there is no ism device on other servers expect
> IBM z13, 

Please use the wording 'on other architectures except s390'.
That is how IBM Z is referred to in the Linux kernel.


> the loopback device can be used as a dummy device to
> test SMC-D logic for the broad community.
> 
> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
> ---

Hello Wen Gu,

as the general design discussions are ongoing, I didn't
do a thorough review. But here are some general remarks
that you may want to consider for future versions.

I would propose to add a module parameter (default off) to enable
SMC-D loopback.

>  include/net/smc.h      |   1 +
>  net/smc/Makefile       |   2 +-
>  net/smc/af_smc.c       |  12 ++-
>  net/smc/smc_cdc.c      |   6 ++
>  net/smc/smc_cdc.h      |   1 +
>  net/smc/smc_loopback.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++
>  net/smc/smc_loopback.h |  59 +++++++++++
>  7 files changed, 361 insertions(+), 2 deletions(-)
>  create mode 100644 net/smc/smc_loopback.c
>  create mode 100644 net/smc/smc_loopback.h
> 

I am not convinced that this warrants a separate file.

[...]
>
> +}
> +
> +static int lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
> +{
> +	return 0;
> +}
> +
> +static int lo_del_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
> +{
> +	return 0;
> +}
> +
> +static int lo_set_vlan_required(struct smcd_dev *smcd)
> +{
> +	return 0;
> +}
> +
> +static int lo_reset_vlan_required(struct smcd_dev *smcd)
> +{
> +	return 0;
> +}

The VLAN functions are only required for SMC-Dv1
Seems you want to provide v1 support for loopback?
May be nice for testing v1 VLAN support.
But then you need proper VLAN support.

[...]
> +
> +static u8 *lo_get_system_eid(void)
> +{
> +	return &LO_SYSTEM_EID.seid_string[0];
> +}
SEID is for the whole system not per device.
We probably need to register a different function
for each architecture.

> +
> +static u16 lo_get_chid(struct smcd_dev *smcd)
> +{
> +	return 0;
> +}
> +

Shouldn't this return 0xFFFF in your current concept?
Wen Gu Jan. 30, 2023, 4:30 p.m. UTC | #2
On 2023/1/20 00:25, Alexandra Winter wrote:
> 
> 
> On 20.12.22 04:21, Wen Gu wrote:
>> This patch introduces a kind of loopback device for SMC-D, thus
>> enabling the SMC communication between two local sockets in one
>> kernel.
>>
>> The loopback device supports basic capabilities defined by SMC-D,
>> including registering DMB, unregistering DMB and moving data.
>>
>> Considering that there is no ism device on other servers expect
>> IBM z13,
> 
> Please use the wording 'on other architectures except s390'.
> That is how IBM Z is referred to in the Linux kernel.
> 

Thanks, will use words consistent with this.

> 
>> the loopback device can be used as a dummy device to
>> test SMC-D logic for the broad community.
>>
>> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
>> ---
> 
> Hello Wen Gu,
> 
> as the general design discussions are ongoing, I didn't
> do a thorough review. But here are some general remarks
> that you may want to consider for future versions.
> I would propose to add a module parameter (default off) to enable
> SMC-D loopback.
> 

OK, will add a module parameter in the future version.

>>   include/net/smc.h      |   1 +
>>   net/smc/Makefile       |   2 +-
>>   net/smc/af_smc.c       |  12 ++-
>>   net/smc/smc_cdc.c      |   6 ++
>>   net/smc/smc_cdc.h      |   1 +
>>   net/smc/smc_loopback.c | 282 +++++++++++++++++++++++++++++++++++++++++++++++++
>>   net/smc/smc_loopback.h |  59 +++++++++++
>>   7 files changed, 361 insertions(+), 2 deletions(-)
>>   create mode 100644 net/smc/smc_loopback.c
>>   create mode 100644 net/smc/smc_loopback.h
>>
> 
> I am not convinced that this warrants a separate file.

IMHO, the dummy device used by smcd loopback is corresponding to ISM device.
So I put the dummy device implementation in smc_loopback.c alone, imitating
drivers/s390/net/ism_drv.c. I think it maybe clearer to do so.

> 
> [...]
>>
>> +}
>> +
>> +static int lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
>> +{
>> +	return 0;
>> +}
>> +
>> +static int lo_del_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
>> +{
>> +	return 0;
>> +}
>> +
>> +static int lo_set_vlan_required(struct smcd_dev *smcd)
>> +{
>> +	return 0;
>> +}
>> +
>> +static int lo_reset_vlan_required(struct smcd_dev *smcd)
>> +{
>> +	return 0;
>> +}
> 
> The VLAN functions are only required for SMC-Dv1
> Seems you want to provide v1 support for loopback?
> May be nice for testing v1 VLAN support.
> But then you need proper VLAN support.
> 
Based on the current discussion, I tend to only provide v2 support for loopback
since v2 is the general trend. So I will fix this in the future version.

> [...]
>> +
>> +static u8 *lo_get_system_eid(void)
>> +{
>> +	return &LO_SYSTEM_EID.seid_string[0];
>> +}
> SEID is for the whole system not per device.
> We probably need to register a different function
> for each architecture.
> 

Yes, I agree.


>> +
>> +static u16 lo_get_chid(struct smcd_dev *smcd)
>> +{
>> +	return 0;
>> +}
>> +
> 
> Shouldn't this return 0xFFFF in your current concept?
> 
> 

Yes, this should return 0xFFFF.
I supplemented a patch as attachment in this earlier reply:
https://lore.kernel.org/netdev/b25f56de-7913-2a56-950f-dfe0defd6936@linux.alibaba.com/
and have amended this.
diff mbox series

Patch

diff --git a/include/net/smc.h b/include/net/smc.h
index c926d33..7699f97 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -93,6 +93,7 @@  struct smcd_dev {
 	atomic_t lgr_cnt;
 	wait_queue_head_t lgrs_deleted;
 	u8 going_away : 1;
+	u8 is_loopback : 1;
 };
 
 struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 875efcd..a8c3711 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -4,5 +4,5 @@  obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
 smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
-smc-y += smc_tracepoint.o
+smc-y += smc_tracepoint.o smc_loopback.o
 smc-$(CONFIG_SYSCTL) += smc_sysctl.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e12d4fa..9546c02 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -52,6 +52,7 @@ 
 #include "smc_stats.h"
 #include "smc_tracepoint.h"
 #include "smc_sysctl.h"
+#include "smc_loopback.h"
 
 static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
 						 * creation on server
@@ -3451,15 +3452,23 @@  static int __init smc_init(void)
 		goto out_sock;
 	}
 
+	rc = smc_loopback_init();
+	if (rc) {
+		pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc);
+		goto out_ib;
+	}
+
 	rc = tcp_register_ulp(&smc_ulp_ops);
 	if (rc) {
 		pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
-		goto out_ib;
+		goto out_lo;
 	}
 
 	static_branch_enable(&tcp_have_smc);
 	return 0;
 
+out_lo:
+	smc_loopback_exit();
 out_ib:
 	smc_ib_unregister_client();
 out_sock:
@@ -3494,6 +3503,7 @@  static void __exit smc_exit(void)
 	tcp_unregister_ulp(&smc_ulp_ops);
 	sock_unregister(PF_SMC);
 	smc_core_exit();
+	smc_loopback_exit();
 	smc_ib_unregister_client();
 	destroy_workqueue(smc_close_wq);
 	destroy_workqueue(smc_tcp_ls_wq);
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 53f63bf..61f5ff7 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -408,6 +408,12 @@  static void smc_cdc_msg_recv(struct smc_sock *smc, struct smc_cdc_msg *cdc)
 static void smcd_cdc_rx_tsklet(struct tasklet_struct *t)
 {
 	struct smc_connection *conn = from_tasklet(conn, t, rx_tsklet);
+
+	smcd_cdc_rx_handler(conn);
+}
+
+void smcd_cdc_rx_handler(struct smc_connection *conn)
+{
 	struct smcd_cdc_msg *data_cdc;
 	struct smcd_cdc_msg cdc;
 	struct smc_sock *smc;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 696cc11..11559d4 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -301,5 +301,6 @@  int smcr_cdc_msg_send_validation(struct smc_connection *conn,
 				 struct smc_wr_buf *wr_buf);
 int smc_cdc_init(void) __init;
 void smcd_cdc_rx_init(struct smc_connection *conn);
+void smcd_cdc_rx_handler(struct smc_connection *conn);
 
 #endif /* SMC_CDC_H */
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
new file mode 100644
index 0000000..973382a
--- /dev/null
+++ b/net/smc/smc_loopback.c
@@ -0,0 +1,282 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Shared Memory Communications Direct over loopback device.
+ *
+ *  Provide a SMC-D loopback dummy device.
+ *
+ *  Copyright (c) 2022, Alibaba Inc.
+ *
+ *  Author: Wen Gu <guwen@linux.alibaba.com>
+ *          Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/types.h>
+#include <net/smc.h>
+
+#include "smc_cdc.h"
+#include "smc_loopback.h"
+
+#define DRV_NAME "smc_lodev"
+
+struct lo_dev *lo_dev;
+
+static struct lo_systemeid LO_SYSTEM_EID = {
+	.seid_string = "SMC-SYSZ-LOSEID000000000",
+	.serial_number = "0000",
+	.type = "0000",
+};
+
+static int lo_query_rgid(struct smcd_dev *smcd, u64 rgid, u32 vid_valid,
+			 u32 vid)
+{
+	struct lo_dev *ldev = smcd->priv;
+
+	/* return local gid */
+	if (!ldev || rgid != ldev->lgid)
+		return -ENETUNREACH;
+	return 0;
+}
+
+static int lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+	struct lo_dev *ldev = smcd->priv;
+	struct lo_dmb_node *dmb_node;
+	int sba_idx, rc;
+
+	/* check space for new dmb */
+	for_each_clear_bit(sba_idx, ldev->sba_idx_mask, LODEV_MAX_DMBS) {
+		if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask))
+			break;
+	}
+	if (sba_idx == LODEV_MAX_DMBS)
+		return -ENOSPC;
+
+	dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL);
+	if (!dmb_node) {
+		rc = -ENOMEM;
+		goto err_bit;
+	}
+
+	dmb_node->sba_idx = sba_idx;
+	dmb_node->cpu_addr = kzalloc(dmb->dmb_len, GFP_KERNEL |
+			     __GFP_NOWARN | __GFP_NORETRY |
+			     __GFP_NOMEMALLOC);
+	if (!dmb_node->cpu_addr) {
+		rc = -ENOMEM;
+		goto err_node;
+	}
+	dmb_node->len = dmb->dmb_len;
+
+	/* TODO: token is random but not exclusive !
+	 * suppose to find token in dmb hask table, if has this token
+	 * already, then generate another one.
+	 */
+	/* add new dmb into hash table */
+	get_random_bytes(&dmb_node->token, sizeof(dmb_node->token));
+	write_lock(&ldev->dmb_ht_lock);
+	hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
+	write_unlock(&ldev->dmb_ht_lock);
+
+	dmb->sba_idx = dmb_node->sba_idx;
+	dmb->dmb_tok = dmb_node->token;
+	dmb->cpu_addr = dmb_node->cpu_addr;
+	dmb->dma_addr = dmb_node->dma_addr;
+	dmb->dmb_len = dmb_node->len;
+
+	return 0;
+
+err_node:
+	kfree(dmb_node);
+err_bit:
+	clear_bit(sba_idx, ldev->sba_idx_mask);
+	return rc;
+}
+
+static int lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
+{
+	struct lo_dmb_node *dmb_node = NULL, *tmp_node;
+	struct lo_dev *ldev = smcd->priv;
+
+	/* remove dmb from hash table */
+	write_lock(&ldev->dmb_ht_lock);
+	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
+		if (tmp_node->token == dmb->dmb_tok) {
+			dmb_node = tmp_node;
+			break;
+		}
+	}
+	if (!dmb_node) {
+		write_unlock(&ldev->dmb_ht_lock);
+		return -EINVAL;
+	}
+	hash_del(&dmb_node->list);
+	write_unlock(&ldev->dmb_ht_lock);
+
+	clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
+	kfree(dmb_node->cpu_addr);
+	kfree(dmb_node);
+
+	return 0;
+}
+
+static int lo_add_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
+{
+	return 0;
+}
+
+static int lo_del_vlan_id(struct smcd_dev *smcd, u64 vlan_id)
+{
+	return 0;
+}
+
+static int lo_set_vlan_required(struct smcd_dev *smcd)
+{
+	return 0;
+}
+
+static int lo_reset_vlan_required(struct smcd_dev *smcd)
+{
+	return 0;
+}
+
+static int lo_signal_ieq(struct smcd_dev *smcd, u64 rgid, u32 trigger_irq,
+			 u32 event_code, u64 info)
+{
+	return 0;
+}
+
+static int lo_move_data(struct smcd_dev *smcd, u64 dmb_tok, unsigned int idx,
+			bool sf, unsigned int offset, void *data,
+			unsigned int size)
+{
+	struct lo_dmb_node *rmb_node = NULL, *tmp_node;
+	struct lo_dev *ldev = smcd->priv;
+
+	read_lock(&ldev->dmb_ht_lock);
+	hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
+		if (tmp_node->token == dmb_tok) {
+			rmb_node = tmp_node;
+			break;
+		}
+	}
+	if (!rmb_node) {
+		read_unlock(&ldev->dmb_ht_lock);
+		return -EINVAL;
+	}
+	read_unlock(&ldev->dmb_ht_lock);
+
+	memcpy((char *)rmb_node->cpu_addr + offset, data, size);
+
+	if (sf) {
+		struct smc_connection *conn =
+			smcd->conn[rmb_node->sba_idx];
+
+		if (conn && !conn->killed)
+			smcd_cdc_rx_handler(conn);
+	}
+	return 0;
+}
+
+static u8 *lo_get_system_eid(void)
+{
+	return &LO_SYSTEM_EID.seid_string[0];
+}
+
+static u16 lo_get_chid(struct smcd_dev *smcd)
+{
+	return 0;
+}
+
+static const struct smcd_ops lo_ops = {
+	.query_remote_gid = lo_query_rgid,
+	.register_dmb = lo_register_dmb,
+	.unregister_dmb = lo_unregister_dmb,
+	.add_vlan_id = lo_add_vlan_id,
+	.del_vlan_id = lo_del_vlan_id,
+	.set_vlan_required = lo_set_vlan_required,
+	.reset_vlan_required = lo_reset_vlan_required,
+	.signal_event = lo_signal_ieq,
+	.move_data = lo_move_data,
+	.get_system_eid = lo_get_system_eid,
+	.get_chid = lo_get_chid,
+};
+
+static int lo_dev_init(struct lo_dev *ldev)
+{
+	struct smcd_dev *smcd = ldev->smcd;
+
+	/* smcd related */
+	smcd->is_loopback = 1;
+	smcd->priv = ldev;
+	get_random_bytes(&smcd->local_gid, sizeof(smcd->local_gid));
+
+	/* ldev related */
+	/* TODO: lgid is random but not exclusive !
+	 */
+	ldev->lgid = smcd->local_gid;
+	rwlock_init(&ldev->dmb_ht_lock);
+	hash_init(ldev->dmb_ht);
+
+	return smcd_register_dev(smcd);
+}
+
+static int lo_dev_probe(void)
+{
+	struct lo_dev *ldev;
+	int ret;
+
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev)
+		return -ENOMEM;
+
+	ldev->smcd = smcd_alloc_dev(NULL, "smcd-loopback-dev",
+				    &lo_ops, LODEV_MAX_DMBS);
+	if (!ldev->smcd) {
+		ret = -ENOMEM;
+		goto err_ldev;
+	}
+
+	ret = lo_dev_init(ldev);
+	if (ret)
+		goto err_smcd;
+
+	lo_dev = ldev;
+	return 0;
+
+err_smcd:
+	smcd_free_dev(ldev->smcd);
+err_ldev:
+	kfree(ldev);
+	return ret;
+}
+
+static void lo_dev_exit(struct lo_dev *ldev)
+{
+	smcd_unregister_dev(ldev->smcd);
+}
+
+static void lo_dev_remove(void)
+{
+	if (!lo_dev)
+		return;
+
+	lo_dev_exit(lo_dev);
+	smcd_free_dev(lo_dev->smcd);
+	kfree(lo_dev);
+}
+
+int smc_loopback_init(void)
+{
+	/* TODO: now lo_dev is a global device shared by
+	 * the whole kernel, and can't be referred to by
+	 * smc-tools command 'smcd dev'. Need to be improved.
+	 */
+	return lo_dev_probe();
+}
+
+void smc_loopback_exit(void)
+{
+	lo_dev_remove();
+}
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
new file mode 100644
index 0000000..d7f7815
--- /dev/null
+++ b/net/smc/smc_loopback.h
@@ -0,0 +1,59 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Shared Memory Communications Direct over loopback device.
+ *
+ *  Provide a SMC-D loopback dummy device.
+ *
+ *  Copyright (c) 2022, Alibaba Inc.
+ *
+ *  Author: Wen Gu <guwen@linux.alibaba.com>
+ *          Tony Lu <tonylu@linux.alibaba.com>
+ *
+ */
+
+#ifndef _SMC_LOOPBACK_H
+#define _SMC_LOOPBACK_H
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <net/smc.h>
+
+#include "smc_core.h"
+
+#define LODEV_MAX_DMBS 5000
+#define LODEV_MAX_DMBS_BUCKETS 16
+
+struct lo_dmb_node {
+	struct hlist_node list;
+	u64 token;
+	u32 len;
+	u32 sba_idx;
+	void *cpu_addr;
+	dma_addr_t dma_addr;
+};
+
+struct lo_dev {
+	struct smcd_dev *smcd;
+	/* priv data */
+	u64 lgid;
+	DECLARE_BITMAP(sba_idx_mask, LODEV_MAX_DMBS);
+	rwlock_t dmb_ht_lock;
+	DECLARE_HASHTABLE(dmb_ht, LODEV_MAX_DMBS_BUCKETS);
+};
+
+struct lo_systemeid {
+	u8 seid_string[24];
+	u8 serial_number[4];
+	u8 type[4];
+};
+
+/* smcd loopback dev*/
+extern struct lo_dev *lo_dev;
+
+int smc_loopback_init(void);
+void smc_loopback_exit(void);
+
+#endif /* _SMC_LOOPBACK_H */
+