diff mbox series

[net-next,14/15] net/smc: introduce loopback-ism DMB data copy control

Message ID 20240111120036.109903-15-guwen@linux.alibaba.com (mailing list archive)
State Deferred
Delegated to: Netdev Maintainers
Headers show
Series net/smc: implement loopback-ism used by SMC-D | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1090 this patch: 1090
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1108 this patch: 1108
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1114 this patch: 1114
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 93 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Wen Gu Jan. 11, 2024, noon UTC
This provides a way to {get|set} whether loopback-ism device supports
merging sndbuf with peer DMB to eliminate data copies between them.

echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # support
echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # not support

The settings take effect after re-activating loopback-ism by:

echo 0 > /sys/devices/virtual/smc/loopback-ism/active
echo 1 > /sys/devices/virtual/smc/loopback-ism/active

After this, the link group related to loopback-ism will be flushed and
the sndbufs of subsequent connections will be merged or not merged with
peer DMB.

The motivation of this control is that the bandwidth will be highly
improved when sndbuf and DMB are merged, but when virtually contiguous
DMB is provided and merged with sndbuf, it will be concurrently accessed
on Tx and Rx, then there will be a bottleneck caused by lock contention
of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
is set (see link below). So an option is provided.

Link: https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---
 net/smc/smc_loopback.c | 46 ++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_loopback.h |  8 +++++++-
 2 files changed, 53 insertions(+), 1 deletion(-)

Comments

Niklas Schnelle Jan. 12, 2024, 4:24 p.m. UTC | #1
On Thu, 2024-01-11 at 20:00 +0800, Wen Gu wrote:
> This provides a way to {get|set} whether loopback-ism device supports
> merging sndbuf with peer DMB to eliminate data copies between them.
> 
> echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # support
> echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # not support

The two support/no support remarks are a bit confusing because support
here seems to mean "support no-copy mode" while the attribute is more
like "force copy mode". How about:

echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # one DMB mode
echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # copy mode

> 
> The settings take effect after re-activating loopback-ism by:
> 
> echo 0 > /sys/devices/virtual/smc/loopback-ism/active
> echo 1 > /sys/devices/virtual/smc/loopback-ism/active
> 
> After this, the link group related to loopback-ism will be flushed and
> the sndbufs of subsequent connections will be merged or not merged with
> peer DMB.
> 
> The motivation of this control is that the bandwidth will be highly
> improved when sndbuf and DMB are merged, but when virtually contiguous
> DMB is provided and merged with sndbuf, it will be concurrently accessed
> on Tx and Rx, then there will be a bottleneck caused by lock contention
> of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
> is set (see link below). So an option is provided.
> 
> Link: https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
---8<---
Wen Gu Jan. 13, 2024, 7:12 a.m. UTC | #2
On 2024/1/13 00:24, Niklas Schnelle wrote:
> On Thu, 2024-01-11 at 20:00 +0800, Wen Gu wrote:
>> This provides a way to {get|set} whether loopback-ism device supports
>> merging sndbuf with peer DMB to eliminate data copies between them.
>>
>> echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # support
>> echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # not support
> 
> The two support/no support remarks are a bit confusing because support
> here seems to mean "support no-copy mode" while the attribute is more
> like "force copy mode". How about:
> 
> echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # one DMB mode
> echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # copy mode
> 

Thank you! Niklas.
That makes it much clearer. It will be improved in next version.

>>
>> The settings take effect after re-activating loopback-ism by:
>>
>> echo 0 > /sys/devices/virtual/smc/loopback-ism/active
>> echo 1 > /sys/devices/virtual/smc/loopback-ism/active
>>
>> After this, the link group related to loopback-ism will be flushed and
>> the sndbufs of subsequent connections will be merged or not merged with
>> peer DMB.
>>
>> The motivation of this control is that the bandwidth will be highly
>> improved when sndbuf and DMB are merged, but when virtually contiguous
>> DMB is provided and merged with sndbuf, it will be concurrently accessed
>> on Tx and Rx, then there will be a bottleneck caused by lock contention
>> of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
>> is set (see link below). So an option is provided.
>>
>> Link: https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
>> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
> ---8<---
Wenjia Zhang Feb. 16, 2024, 2:25 p.m. UTC | #3
On 11.01.24 13:00, Wen Gu wrote:
> This provides a way to {get|set} whether loopback-ism device supports
> merging sndbuf with peer DMB to eliminate data copies between them.
> 
> echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # support
> echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # not support
> 
Besides the same confusing as Niklas already mentioned, the name of the 
option looks not clear enough to what it means. What about:
echo 1 > /sys/devices/virtual/smc/loopback-ism/nocopy_support # merge mode
echo 0 > /sys/devices/virtual/smc/loopback-ism/nocopy_support # copy mode

> The settings take effect after re-activating loopback-ism by:
> 
> echo 0 > /sys/devices/virtual/smc/loopback-ism/active
> echo 1 > /sys/devices/virtual/smc/loopback-ism/active
> 
> After this, the link group related to loopback-ism will be flushed and
> the sndbufs of subsequent connections will be merged or not merged with
> peer DMB.
> 
> The motivation of this control is that the bandwidth will be highly
> improved when sndbuf and DMB are merged, but when virtually contiguous
> DMB is provided and merged with sndbuf, it will be concurrently accessed
> on Tx and Rx, then there will be a bottleneck caused by lock contention
> of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
> is set (see link below). So an option is provided.
> 
> Link: https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
> ---
We tried some simple workloads, and the performance of the no-copy case 
was remarkable. Thus, we're wondering if it is necessary to have the 
tunable setting in this loopback case? Or rather, why do we need the 
copy option? Is that because of the bottleneck caused by using the 
combination of the no-copy and virtually contiguours DMA? Or at least 
let no-copy as the default one.
Wen Gu Feb. 20, 2024, 3:36 a.m. UTC | #4
On 2024/2/16 22:25, Wenjia Zhang wrote:
> 
> 
> On 11.01.24 13:00, Wen Gu wrote:
>> This provides a way to {get|set} whether loopback-ism device supports
>> merging sndbuf with peer DMB to eliminate data copies between them.
>>
>> echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # support
>> echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # not support
>>
> Besides the same confusing as Niklas already mentioned, the name of the option looks not clear enough to what it means. 
> What about:
> echo 1 > /sys/devices/virtual/smc/loopback-ism/nocopy_support # merge mode
> echo 0 > /sys/devices/virtual/smc/loopback-ism/nocopy_support # copy mode
>

OK, if we decide to keep the knobs, I will improve the name. Thanks!

>> The settings take effect after re-activating loopback-ism by:
>>
>> echo 0 > /sys/devices/virtual/smc/loopback-ism/active
>> echo 1 > /sys/devices/virtual/smc/loopback-ism/active
>>
>> After this, the link group related to loopback-ism will be flushed and
>> the sndbufs of subsequent connections will be merged or not merged with
>> peer DMB.
>>
>> The motivation of this control is that the bandwidth will be highly
>> improved when sndbuf and DMB are merged, but when virtually contiguous
>> DMB is provided and merged with sndbuf, it will be concurrently accessed
>> on Tx and Rx, then there will be a bottleneck caused by lock contention
>> of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
>> is set (see link below). So an option is provided.
>>
>> Link: https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
>> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
>> ---
> We tried some simple workloads, and the performance of the no-copy case was remarkable. Thus, we're wondering if it is 
> necessary to have the tunable setting in this loopback case? Or rather, why do we need the copy option? Is that because 
> of the bottleneck caused by using the combination of the no-copy and virtually contiguours DMA? Or at least let no-copy 
> as the default one.

Yes, it is because the bottleneck caused by using the combination of the no-copy
and virtual-DMB. If we have to use virtual-DMB and CONFIG_HARDENED_USERCOPY is
set, then we may be forced to use copy mode in many CPUs environment, to get the
good latency performance (the bandwidth performance still drop because of copy mode).

But if we agree that physical-DMB is acceptable (it costs 1 physical buffer per conn side
in loopback-ism no-copy mode, same as what sndbuf costs when using s390 ISM), then
there is no such performance issue and the two knobs can be removed. (see also the reply
for 13/15 patch [1]).

[1] https://lore.kernel.org/netdev/442061eb-107a-421d-bc2e-13c8defb0f7b@linux.alibaba.com/

Thanks!
Wenjia Zhang Feb. 23, 2024, 2:42 p.m. UTC | #5
On 20.02.24 04:36, Wen Gu wrote:
> 
> 
> On 2024/2/16 22:25, Wenjia Zhang wrote:
>>
>>
>> On 11.01.24 13:00, Wen Gu wrote:
>>> This provides a way to {get|set} whether loopback-ism device supports
>>> merging sndbuf with peer DMB to eliminate data copies between them.
>>>
>>> echo 0 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # support
>>> echo 1 > /sys/devices/virtual/smc/loopback-ism/dmb_copy # not support
>>>
>> Besides the same confusing as Niklas already mentioned, the name of 
>> the option looks not clear enough to what it means. What about:
>> echo 1 > /sys/devices/virtual/smc/loopback-ism/nocopy_support # merge 
>> mode
>> echo 0 > /sys/devices/virtual/smc/loopback-ism/nocopy_support # copy mode
>>
> 
> OK, if we decide to keep the knobs, I will improve the name. Thanks!
> 
>>> The settings take effect after re-activating loopback-ism by:
>>>
>>> echo 0 > /sys/devices/virtual/smc/loopback-ism/active
>>> echo 1 > /sys/devices/virtual/smc/loopback-ism/active
>>>
>>> After this, the link group related to loopback-ism will be flushed and
>>> the sndbufs of subsequent connections will be merged or not merged with
>>> peer DMB.
>>>
>>> The motivation of this control is that the bandwidth will be highly
>>> improved when sndbuf and DMB are merged, but when virtually contiguous
>>> DMB is provided and merged with sndbuf, it will be concurrently accessed
>>> on Tx and Rx, then there will be a bottleneck caused by lock contention
>>> of find_vmap_area when there are many CPUs and CONFIG_HARDENED_USERCOPY
>>> is set (see link below). So an option is provided.
>>>
>>> Link: 
>>> https://lore.kernel.org/all/238e63cd-e0e8-4fbf-852f-bc4d5bc35d5a@linux.alibaba.com/
>>> Signed-off-by: Wen Gu <guwen@linux.alibaba.com>
>>> ---
>> We tried some simple workloads, and the performance of the no-copy 
>> case was remarkable. Thus, we're wondering if it is necessary to have 
>> the tunable setting in this loopback case? Or rather, why do we need 
>> the copy option? Is that because of the bottleneck caused by using the 
>> combination of the no-copy and virtually contiguours DMA? Or at least 
>> let no-copy as the default one.
> 
> Yes, it is because the bottleneck caused by using the combination of the 
> no-copy
> and virtual-DMB. If we have to use virtual-DMB and 
> CONFIG_HARDENED_USERCOPY is
> set, then we may be forced to use copy mode in many CPUs environment, to 
> get the
> good latency performance (the bandwidth performance still drop because 
> of copy mode).
> 
> But if we agree that physical-DMB is acceptable (it costs 1 physical 
> buffer per conn side
> in loopback-ism no-copy mode, same as what sndbuf costs when using s390 
> ISM), then
> there is no such performance issue and the two knobs can be removed. 
> (see also the reply
> for 13/15 patch [1]).
> 
> [1] 
> https://lore.kernel.org/netdev/442061eb-107a-421d-bc2e-13c8defb0f7b@linux.alibaba.com/
> 
> Thanks!
Thank you, Wen, for the elaboration! As I said, though we did see some 
better performance on using the virtually contiguous memory with a 
simple test, the improvement was not really significant. Additionally, 
our environment ist very different as your 48 CPUs qemu environment, and 
it also depends on the workload. I think I can understand why you see 
better performance by using physically contiguous memory. Anyway, I 
don't have any objection on using physical-DMB only. But I still want to 
see if there is any other opinion.
diff mbox series

Patch

diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
index 2e734f8e08f5..bfbb346ef01a 100644
--- a/net/smc/smc_loopback.c
+++ b/net/smc/smc_loopback.c
@@ -26,6 +26,7 @@ 
 
 static const char smc_lo_dev_name[] = "loopback-ism";
 static unsigned int smc_lo_dmb_type = SMC_LO_DMB_PHYS;
+static unsigned int smc_lo_dmb_copy = SMC_LO_DMB_NOCOPY;
 static struct smc_lo_dev *lo_dev;
 static struct class *smc_class;
 
@@ -167,9 +168,52 @@  static ssize_t dmb_type_store(struct device *dev,
 	return count;
 }
 static DEVICE_ATTR_RW(dmb_type);
+
+static ssize_t dmb_copy_show(struct device *dev,
+			     struct device_attribute *attr, char *buf)
+{
+	struct smc_lo_dev *ldev =
+		container_of(dev, struct smc_lo_dev, dev);
+	const char *copy;
+
+	switch (ldev->dmb_copy) {
+	case SMC_LO_DMB_NOCOPY:
+		copy = "sndbuf and DMB merged and no data copied";
+		break;
+	case SMC_LO_DMB_COPY:
+		copy = "sndbuf and DMB separated and data copied";
+		break;
+	default:
+		copy = "Unknown setting";
+	}
+
+	return sysfs_emit(buf, "%d: %s\n", ldev->dmb_copy, copy);
+}
+
+static ssize_t dmb_copy_store(struct device *dev,
+			      struct device_attribute *attr,
+			      const char *buf, size_t count)
+{
+	unsigned int dmb_copy;
+	int ret;
+
+	ret = kstrtouint(buf, 0, &dmb_copy);
+	if (ret)
+		return ret;
+
+	if (dmb_copy != SMC_LO_DMB_NOCOPY &&
+	    dmb_copy != SMC_LO_DMB_COPY)
+		return -EINVAL;
+
+	smc_lo_dmb_copy = dmb_copy; /* re-activate to take effect */
+	return count;
+}
+static DEVICE_ATTR_RW(dmb_copy);
+
 static struct attribute *smc_lo_attrs[] = {
 	&dev_attr_active.attr,
 	&dev_attr_dmb_type.attr,
+	&dev_attr_dmb_copy.attr,
 	&dev_attr_xfer_bytes.attr,
 	&dev_attr_dmbs_cnt.attr,
 	NULL,
@@ -451,6 +495,7 @@  static int smcd_lo_register_dev(struct smc_lo_dev *ldev)
 	smcd->priv = ldev;
 	smc_ism_set_v2_capable();
 	ldev->dmb_type = smc_lo_dmb_type;
+	ldev->dmb_copy = smc_lo_dmb_copy;
 	mutex_lock(&smcd_dev_list.mutex);
 	list_add(&smcd->list, &smcd_dev_list.list);
 	mutex_unlock(&smcd_dev_list.mutex);
@@ -475,6 +520,7 @@  static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev)
 	kfree(smcd->conn);
 	kfree(smcd);
 	ldev->dmb_type = smc_lo_dmb_type;
+	ldev->dmb_copy = smc_lo_dmb_copy;
 	smc_lo_clear_stats(ldev);
 }
 
diff --git a/net/smc/smc_loopback.h b/net/smc/smc_loopback.h
index 8ee5c6805fc4..7ecb4a35eb36 100644
--- a/net/smc/smc_loopback.h
+++ b/net/smc/smc_loopback.h
@@ -28,6 +28,11 @@  enum {
 	SMC_LO_DMB_VIRT,
 };
 
+enum {
+	SMC_LO_DMB_NOCOPY,
+	SMC_LO_DMB_COPY,
+};
+
 struct smc_lo_dmb_node {
 	struct hlist_node list;
 	u64 token;
@@ -45,7 +50,8 @@  struct smc_lo_dev_stats64 {
 struct smc_lo_dev {
 	struct smcd_dev *smcd;
 	struct device dev;
-	u8 active;
+	u8 active : 1;
+	u8 dmb_copy : 1;
 	u8 dmb_type;
 	u16 chid;
 	struct smcd_gid local_gid;