diff mbox series

[net-next,2/6] net/smc: optimize for smc_sndbuf_sync_sg_for_device and smc_rmb_sync_sg_for_cpu

Message ID 1657626690-60367-3-git-send-email-guwen@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net/smc: Introduce virtually contiguous buffers for SMC-R | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 94 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Wen Gu July 12, 2022, 11:51 a.m. UTC
From: Guangguan Wang <guangguan.wang@linux.alibaba.com>

Some CPU, such as Xeon, can guarantee DMA cache coherency.
So it is no need to use dma sync APIs to flush cache on such CPUs.
In order to avoid calling dma sync APIs on the IO path, use the
dma_need_sync to check whether smc_buf_desc needs dma sync when
creating smc_buf_desc.

Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com>
---
 net/smc/smc_core.c |  8 ++++++++
 net/smc/smc_core.h |  1 +
 net/smc/smc_ib.c   | 29 +++++++++++++++++++++++++++++
 net/smc/smc_ib.h   |  2 ++
 4 files changed, 40 insertions(+)
diff mbox series

Patch

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1faa0cb..fa3a7a8 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2016,6 +2016,9 @@  static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
 		goto free_table;
 	}
 
+	buf_desc->is_dma_need_sync |=
+		smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
+
 	/* create a new memory region for the RMB */
 	if (is_rmb) {
 		rc = smc_ib_get_memory_region(lnk->roce_pd,
@@ -2234,6 +2237,7 @@  static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 		/* check for reusable slot in the link group */
 		buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
 		if (buf_desc) {
+			buf_desc->is_dma_need_sync = 0;
 			SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
 			SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
 			break; /* found reusable slot */
@@ -2292,6 +2296,8 @@  static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
+	if (!conn->sndbuf_desc->is_dma_need_sync)
+		return;
 	if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
 	    !smc_link_active(conn->lnk))
 		return;
@@ -2302,6 +2308,8 @@  void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
 	int i;
 
+	if (!conn->rmb_desc->is_dma_need_sync)
+		return;
 	if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
 		return;
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c441dfe..46ddec5 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -180,6 +180,7 @@  struct smc_buf_desc {
 					/* mem region registered */
 			u8		is_map_ib[SMC_LINKS_PER_LGR_MAX];
 					/* mem region mapped to lnk */
+			u8		is_dma_need_sync;
 			u8		is_reg_err;
 					/* buffer registration err */
 		};
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index dcda416..60e5095 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -729,6 +729,29 @@  int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
 	return 0;
 }
 
+bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
+			    struct smc_buf_desc *buf_slot)
+{
+	struct scatterlist *sg;
+	unsigned int i;
+	bool ret = false;
+
+	/* for now there is just one DMA address */
+	for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+		    buf_slot->sgt[lnk->link_idx].nents, i) {
+		if (!sg_dma_len(sg))
+			break;
+		if (dma_need_sync(lnk->smcibdev->ibdev->dma_device,
+				  sg_dma_address(sg))) {
+			ret = true;
+			goto out;
+		}
+	}
+
+out:
+	return ret;
+}
+
 /* synchronize buffer usage for cpu access */
 void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
 			    struct smc_buf_desc *buf_slot,
@@ -737,6 +760,9 @@  void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
 	struct scatterlist *sg;
 	unsigned int i;
 
+	if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx)))
+		return;
+
 	/* for now there is just one DMA address */
 	for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
 		    buf_slot->sgt[lnk->link_idx].nents, i) {
@@ -757,6 +783,9 @@  void smc_ib_sync_sg_for_device(struct smc_link *lnk,
 	struct scatterlist *sg;
 	unsigned int i;
 
+	if (!(buf_slot->is_dma_need_sync & (1U << lnk->link_idx)))
+		return;
+
 	/* for now there is just one DMA address */
 	for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
 		    buf_slot->sgt[lnk->link_idx].nents, i) {
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 5d8b49c..03429567 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -102,6 +102,8 @@  void smc_ib_buf_unmap_sg(struct smc_link *lnk,
 int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
 			     struct smc_buf_desc *buf_slot, u8 link_idx);
 void smc_ib_put_memory_region(struct ib_mr *mr);
+bool smc_ib_is_sg_need_sync(struct smc_link *lnk,
+			    struct smc_buf_desc *buf_slot);
 void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
 			    struct smc_buf_desc *buf_slot,
 			    enum dma_data_direction data_direction);