Message ID | 20220513071551.22065-3-guangguan.wang@linux.alibaba.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net/smc: send and write inline optimization for smc | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Clearly marked for net-next |
netdev/fixes_present | success | Fixes tag not required for -next series |
netdev/subject_prefix | success | Link |
netdev/cover_letter | success | Series has a cover letter |
netdev/patch_count | success | Link |
netdev/header_inline | success | No static functions without inline keyword in header files |
netdev/build_32bit | fail | Errors and warnings before: 0 this patch: 1 |
netdev/cc_maintainers | success | CCed 7 of 7 maintainers |
netdev/build_clang | success | Errors and warnings before: 0 this patch: 0 |
netdev/module_param | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Signed-off-by tag matches author and committer |
netdev/verify_fixes | success | No Fixes tag |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 0 this patch: 0 |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 32 lines checked |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/source_inline | success | Was 0 now: 0 |
Hi Guangguan, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on net-next/master] url: https://github.com/intel-lab-lkp/linux/commits/Guangguan-Wang/net-smc-send-and-write-inline-optimization-for-smc/20220513-151715 base: https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git b67fd3d9d94223b424674f45eeadeff58b4b03ef config: nios2-allyesconfig (https://download.01.org/0day-ci/archive/20220513/202205131912.bHaVZP7f-lkp@intel.com/config) compiler: nios2-linux-gcc (GCC) 11.3.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/1e1003898ecdb92b0339075c7501e486bda2d8e8 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Guangguan-Wang/net-smc-send-and-write-inline-optimization-for-smc/20220513-151715 git checkout 1e1003898ecdb92b0339075c7501e486bda2d8e8 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.3.0 make.cross W=1 O=build_dir ARCH=nios2 SHELL=/bin/bash net/smc/ If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All warnings (new ones prefixed by >>): net/smc/smc_tx.c: In function 'smcr_tx_rdma_writes': >> net/smc/smc_tx.c:399:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] 399 | base_addr = (u64)conn->sndbuf_desc->cpu_addr; | ^ vim +399 net/smc/smc_tx.c 376 377 /* SMC-R helper for smc_tx_rdma_writes() */ 378 static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, 379 size_t src_off, size_t src_len, 380 size_t dst_off, size_t dst_len, 381 struct smc_rdma_wr *wr_rdma_buf) 382 { 383 struct smc_link *link = conn->lnk; 384 385 dma_addr_t dma_addr = 386 sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl); 387 int src_len_sum = src_len, dst_len_sum = dst_len; 388 int sent_count = src_off; 389 int srcchunk, dstchunk; 390 int num_sges; 391 int rc; 392 393 for (dstchunk = 0; dstchunk < 2; dstchunk++) { 394 struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk]; 395 struct ib_sge *sge = wr->wr.sg_list; 396 u64 base_addr = dma_addr; 397 398 if (dst_len <= link->qp_attr.cap.max_inline_data) { > 399 base_addr = (u64)conn->sndbuf_desc->cpu_addr; 400 wr->wr.send_flags |= IB_SEND_INLINE; 401 } else { 402 wr->wr.send_flags &= ~IB_SEND_INLINE; 403 } 404 405 num_sges = 0; 406 for (srcchunk = 0; srcchunk < 2; srcchunk++) { 407 sge[srcchunk].addr = base_addr + src_off; 408 sge[srcchunk].length = src_len; 409 num_sges++; 410 411 src_off += src_len; 412 if (src_off >= conn->sndbuf_desc->len) 413 src_off -= conn->sndbuf_desc->len; 414 /* modulo in send ring */ 415 if (src_len_sum == dst_len) 416 break; /* either on 1st or 2nd iteration */ 417 /* prepare next (== 2nd) iteration */ 418 src_len = dst_len - src_len; /* remainder */ 419 src_len_sum += src_len; 420 } 421 rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr); 422 if (rc) 423 return rc; 424 if (dst_len_sum == len) 425 break; /* either on 1st or 2nd iteration */ 426 /* prepare next (== 2nd) iteration */ 427 dst_off = 0; /* modulo offset in RMBE ring buffer */ 428 dst_len = len - dst_len; /* remainder */ 429 dst_len_sum += dst_len; 430 src_len = min_t(int, dst_len, conn->sndbuf_desc->len - 431 sent_count); 432 src_len_sum = src_len; 433 } 434 return 0; 435 } 436
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 98ca9229fe87..4294259b3588 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -391,12 +391,20 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, int rc; for (dstchunk = 0; dstchunk < 2; dstchunk++) { - struct ib_sge *sge = - wr_rdma_buf->wr_tx_rdma[dstchunk].wr.sg_list; + struct ib_rdma_wr *wr = &wr_rdma_buf->wr_tx_rdma[dstchunk]; + struct ib_sge *sge = wr->wr.sg_list; + u64 base_addr = dma_addr; + + if (dst_len <= link->qp_attr.cap.max_inline_data) { + base_addr = (u64)conn->sndbuf_desc->cpu_addr; + wr->wr.send_flags |= IB_SEND_INLINE; + } else { + wr->wr.send_flags &= ~IB_SEND_INLINE; + } num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sge[srcchunk].addr = dma_addr + src_off; + sge[srcchunk].addr = base_addr + src_off; sge[srcchunk].length = src_len; num_sges++; @@ -410,8 +418,7 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len, src_len = dst_len - src_len; /* remainder */ src_len_sum += src_len; } - rc = smc_tx_rdma_write(conn, dst_off, num_sges, - &wr_rdma_buf->wr_tx_rdma[dstchunk]); + rc = smc_tx_rdma_write(conn, dst_off, num_sges, wr); if (rc) return rc; if (dst_len_sum == len)
Rdma write with inline flag when sending small packages, whose length is shorter than the qp's max_inline_data, can help reducing latency. In my test environment, which are 2 VMs running on the same physical host and whose NICs(ConnectX-4Lx) are working on SR-IOV mode, qperf shows 0.5us-0.7us improvement in latency. Test command: server: smc_run taskset -c 1 qperf client: smc_run taskset -c 1 qperf <server ip> -oo \ msg_size:1:2K:*2 -t 30 -vu tcp_lat The results shown below: msgsize before after 1B 11.2 us 10.6 us (-0.6 us) 2B 11.2 us 10.7 us (-0.5 us) 4B 11.3 us 10.7 us (-0.6 us) 8B 11.2 us 10.6 us (-0.6 us) 16B 11.3 us 10.7 us (-0.6 us) 32B 11.3 us 10.6 us (-0.7 us) 64B 11.2 us 11.2 us (0 us) 128B 11.2 us 11.2 us (0 us) 256B 11.2 us 11.2 us (0 us) 512B 11.4 us 11.3 us (-0.1 us) 1KB 11.4 us 11.5 us (0.1 us) 2KB 11.5 us 11.5 us (0 us) Signed-off-by: Guangguan Wang <guangguan.wang@linux.alibaba.com> --- net/smc/smc_tx.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)