From patchwork Tue Jun 11 15:52:37 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987609 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8E49A13AF for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7D96B286A0 for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 71205287F5; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A988D28867 for ; Tue, 11 Jun 2019 15:53:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2390411AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33033 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391582AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:52:59 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL3023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 01/21] RDMA/core: Introduce new header file for signature operations Date: Tue, 11 Jun 2019 18:52:37 +0300 Message-Id: <1560268377-26560-2-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Ease the exhausted ib_verbs.h file and make the code more readable. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig --- include/rdma/ib_verbs.h | 112 +------------------------------------------ include/rdma/signature.h | 120 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 111 deletions(-) create mode 100644 include/rdma/signature.h diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 54873085f2da..55862328538e 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -275,17 +276,6 @@ enum ib_device_cap_flags { IB_DEVICE_ALLOW_USER_UNREG = (1ULL << 37), }; -enum ib_signature_prot_cap { - IB_PROT_T10DIF_TYPE_1 = 1, - IB_PROT_T10DIF_TYPE_2 = 1 << 1, - IB_PROT_T10DIF_TYPE_3 = 1 << 2, -}; - -enum ib_signature_guard_cap { - IB_GUARD_T10DIF_CRC = 1, - IB_GUARD_T10DIF_CSUM = 1 << 1, -}; - enum ib_atomic_cap { IB_ATOMIC_NONE, IB_ATOMIC_HCA, @@ -810,106 +800,6 @@ enum ib_mr_type { IB_MR_TYPE_SG_GAPS, }; -/** - * Signature types - * IB_SIG_TYPE_NONE: Unprotected. - * IB_SIG_TYPE_T10_DIF: Type T10-DIF - */ -enum ib_signature_type { - IB_SIG_TYPE_NONE, - IB_SIG_TYPE_T10_DIF, -}; - -/** - * Signature T10-DIF block-guard types - * IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. - * IB_T10DIF_CSUM: Corresponds to IP checksum rules. - */ -enum ib_t10_dif_bg_type { - IB_T10DIF_CRC, - IB_T10DIF_CSUM -}; - -/** - * struct ib_t10_dif_domain - Parameters specific for T10-DIF - * domain. - * @bg_type: T10-DIF block guard type (CRC|CSUM) - * @pi_interval: protection information interval. - * @bg: seed of guard computation. - * @app_tag: application tag of guard block - * @ref_tag: initial guard block reference tag. - * @ref_remap: Indicate wethear the reftag increments each block - * @app_escape: Indicate to skip block check if apptag=0xffff - * @ref_escape: Indicate to skip block check if reftag=0xffffffff - * @apptag_check_mask: check bitmask of application tag. - */ -struct ib_t10_dif_domain { - enum ib_t10_dif_bg_type bg_type; - u16 pi_interval; - u16 bg; - u16 app_tag; - u32 ref_tag; - bool ref_remap; - bool app_escape; - bool ref_escape; - u16 apptag_check_mask; -}; - -/** - * struct ib_sig_domain - Parameters for signature domain - * @sig_type: specific signauture type - * @sig: union of all signature domain attributes that may - * be used to set domain layout. - */ -struct ib_sig_domain { - enum ib_signature_type sig_type; - union { - struct ib_t10_dif_domain dif; - } sig; -}; - -/** - * struct ib_sig_attrs - Parameters for signature handover operation - * @check_mask: bitmask for signature byte check (8 bytes) - * @mem: memory domain layout desciptor. - * @wire: wire domain layout desciptor. - */ -struct ib_sig_attrs { - u8 check_mask; - struct ib_sig_domain mem; - struct ib_sig_domain wire; -}; - -enum ib_sig_err_type { - IB_SIG_BAD_GUARD, - IB_SIG_BAD_REFTAG, - IB_SIG_BAD_APPTAG, -}; - -/** - * Signature check masks (8 bytes in total) according to the T10-PI standard: - * -------- -------- ------------ - * | GUARD | APPTAG | REFTAG | - * | 2B | 2B | 4B | - * -------- -------- ------------ - */ -enum { - IB_SIG_CHECK_GUARD = 0xc0, - IB_SIG_CHECK_APPTAG = 0x30, - IB_SIG_CHECK_REFTAG = 0x0f, -}; - -/** - * struct ib_sig_err - signature error descriptor - */ -struct ib_sig_err { - enum ib_sig_err_type err_type; - u32 expected; - u32 actual; - u64 sig_err_offset; - u32 key; -}; - enum ib_mr_status_check { IB_MR_CHECK_SIG_STATUS = 1, }; diff --git a/include/rdma/signature.h b/include/rdma/signature.h new file mode 100644 index 000000000000..5998fe94dfd4 --- /dev/null +++ b/include/rdma/signature.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) */ +/* + * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved. + */ + +#ifndef _RDMA_SIGNATURE_H_ +#define _RDMA_SIGNATURE_H_ + +enum ib_signature_prot_cap { + IB_PROT_T10DIF_TYPE_1 = 1, + IB_PROT_T10DIF_TYPE_2 = 1 << 1, + IB_PROT_T10DIF_TYPE_3 = 1 << 2, +}; + +enum ib_signature_guard_cap { + IB_GUARD_T10DIF_CRC = 1, + IB_GUARD_T10DIF_CSUM = 1 << 1, +}; + +/** + * enum ib_signature_type - Signature types + * @IB_SIG_TYPE_NONE: Unprotected. + * @IB_SIG_TYPE_T10_DIF: Type T10-DIF + */ +enum ib_signature_type { + IB_SIG_TYPE_NONE, + IB_SIG_TYPE_T10_DIF, +}; + +/** + * enum ib_t10_dif_bg_type - Signature T10-DIF block-guard types + * @IB_T10DIF_CRC: Corresponds to T10-PI mandated CRC checksum rules. + * @IB_T10DIF_CSUM: Corresponds to IP checksum rules. + */ +enum ib_t10_dif_bg_type { + IB_T10DIF_CRC, + IB_T10DIF_CSUM, +}; + +/** + * struct ib_t10_dif_domain - Parameters specific for T10-DIF + * domain. + * @bg_type: T10-DIF block guard type (CRC|CSUM) + * @pi_interval: protection information interval. + * @bg: seed of guard computation. + * @app_tag: application tag of guard block + * @ref_tag: initial guard block reference tag. + * @ref_remap: Indicate wethear the reftag increments each block + * @app_escape: Indicate to skip block check if apptag=0xffff + * @ref_escape: Indicate to skip block check if reftag=0xffffffff + * @apptag_check_mask: check bitmask of application tag. + */ +struct ib_t10_dif_domain { + enum ib_t10_dif_bg_type bg_type; + u16 pi_interval; + u16 bg; + u16 app_tag; + u32 ref_tag; + bool ref_remap; + bool app_escape; + bool ref_escape; + u16 apptag_check_mask; +}; + +/** + * struct ib_sig_domain - Parameters for signature domain + * @sig_type: specific signauture type + * @sig: union of all signature domain attributes that may + * be used to set domain layout. + */ +struct ib_sig_domain { + enum ib_signature_type sig_type; + union { + struct ib_t10_dif_domain dif; + } sig; +}; + +/** + * struct ib_sig_attrs - Parameters for signature handover operation + * @check_mask: bitmask for signature byte check (8 bytes) + * @mem: memory domain layout descriptor. + * @wire: wire domain layout descriptor. + */ +struct ib_sig_attrs { + u8 check_mask; + struct ib_sig_domain mem; + struct ib_sig_domain wire; +}; + +enum ib_sig_err_type { + IB_SIG_BAD_GUARD, + IB_SIG_BAD_REFTAG, + IB_SIG_BAD_APPTAG, +}; + +/* + * Signature check masks (8 bytes in total) according to the T10-PI standard: + * -------- -------- ------------ + * | GUARD | APPTAG | REFTAG | + * | 2B | 2B | 4B | + * -------- -------- ------------ + */ +enum { + IB_SIG_CHECK_GUARD = 0xc0, + IB_SIG_CHECK_APPTAG = 0x30, + IB_SIG_CHECK_REFTAG = 0x0f, +}; + +/* + * struct ib_sig_err - signature error descriptor + */ +struct ib_sig_err { + enum ib_sig_err_type err_type; + u32 expected; + u32 actual; + u64 sig_err_offset; + u32 key; +}; + +#endif /* _RDMA_SIGNATURE_H_ */ From patchwork Tue Jun 11 15:52:38 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987607 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 554F314BB for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 4402B286A0 for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 381B9286DD; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 89D02286A0 for ; Tue, 11 Jun 2019 15:53:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391637AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33034 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391545AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:52:59 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL4023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 02/21] RDMA/core: Save the MR type in the ib_mr structure Date: Tue, 11 Jun 2019 18:52:38 +0300 Message-Id: <1560268377-26560-3-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This is a preparation for the signature verbs API change. This change is needed since the MR type will define, in the upcoming patches, the need for allocating internal resources in LLD for signature handover related operations. It will also help to make sure that signature related functions are called with an appropriate MR type and fail otherwise. Also introduce new mr types IB_MR_TYPE_USER, IB_MR_TYPE_DMA and IB_MR_TYPE_DM for correctness. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/infiniband/core/uverbs_cmd.c | 1 + drivers/infiniband/core/uverbs_std_types_mr.c | 1 + drivers/infiniband/core/verbs.c | 2 ++ include/rdma/ib_verbs.h | 10 ++++++++++ 4 files changed, 14 insertions(+) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 63fe14c7c68f..e0c0c40c7084 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -756,6 +756,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_USER; mr->dm = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c index 997f7a3a558a..c1286a52dc84 100644 --- a/drivers/infiniband/core/uverbs_std_types_mr.c +++ b/drivers/infiniband/core/uverbs_std_types_mr.c @@ -128,6 +128,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)( mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DM; mr->dm = dm; mr->uobject = uobj; atomic_inc(&pd->usecnt); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 4fd5aad890d2..e080202ff673 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -299,6 +299,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, mr->device = pd->device; mr->pd = pd; + mr->type = IB_MR_TYPE_DMA; mr->uobject = NULL; mr->need_inval = false; @@ -2011,6 +2012,7 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->need_inval = false; mr->res.type = RDMA_RESTRACK_MR; rdma_restrack_kadd(&mr->res); + mr->type = mr_type; } return mr; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 55862328538e..9d925b1346c4 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -793,11 +793,20 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * register any arbitrary sg lists (without * the normal mr constraints - see * ib_map_mr_sg) + * @IB_MR_TYPE_DM: memory region that is used for device + * memory registration + * @IB_MR_TYPE_USER: memory region that is used for the user-space + * application + * @IB_MR_TYPE_DMA: memory region that is used for DMA operations + * without address translations (VA=PA) */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, IB_MR_TYPE_SIGNATURE, IB_MR_TYPE_SG_GAPS, + IB_MR_TYPE_DM, + IB_MR_TYPE_USER, + IB_MR_TYPE_DMA, }; enum ib_mr_status_check { @@ -1730,6 +1739,7 @@ struct ib_mr { u64 iova; u64 length; unsigned int page_size; + enum ib_mr_type type; bool need_inval; union { struct ib_uobject *uobject; /* user */ From patchwork Tue Jun 11 15:52:39 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987619 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 4825A18E8 for ; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 352BC26E1A for ; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 299AB287E8; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 963B626E1A for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391545AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33035 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391826AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:52:59 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL5023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 03/21] RDMA/core: Introduce IB_MR_TYPE_INTEGRITY and ib_alloc_mr_integrity API Date: Tue, 11 Jun 2019 18:52:39 +0300 Message-Id: <1560268377-26560-4-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin This is a preparation for signature verbs API re-design. In the new design a single MR with IB_MR_TYPE_INTEGRITY type will be used to perform the needed mapping for data integrity operations. Signed-off-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 46 ++++++++++++++++++++++++++++++++++++++++ include/rdma/ib_verbs.h | 10 +++++++++ 3 files changed, 57 insertions(+) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 29f7b15c81d9..e698ad34e528 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2350,6 +2350,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, alloc_fmr); SET_DEVICE_OP(dev_ops, alloc_hw_stats); SET_DEVICE_OP(dev_ops, alloc_mr); + SET_DEVICE_OP(dev_ops, alloc_mr_integrity); SET_DEVICE_OP(dev_ops, alloc_mw); SET_DEVICE_OP(dev_ops, alloc_pd); SET_DEVICE_OP(dev_ops, alloc_rdma_netdev); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e080202ff673..80b9dc0dbd6a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2002,6 +2002,9 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, if (!pd->device->ops.alloc_mr) return ERR_PTR(-EOPNOTSUPP); + if (WARN_ON_ONCE(mr_type == IB_MR_TYPE_INTEGRITY)) + return ERR_PTR(-EINVAL); + mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg, udata); if (!IS_ERR(mr)) { mr->device = pd->device; @@ -2019,6 +2022,49 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, } EXPORT_SYMBOL(ib_alloc_mr_user); +/** + * ib_alloc_mr_integrity() - Allocates an integrity memory region + * @pd: protection domain associated with the region + * @max_num_data_sg: maximum data sg entries available for registration + * @max_num_meta_sg: maximum metadata sg entries available for + * registration + * + * Notes: + * Memory registration page/sg lists must not exceed max_num_sg, + * also the integrity page/sg lists must not exceed max_num_meta_sg. + * + */ +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg) +{ + struct ib_mr *mr; + + if (!pd->device->ops.alloc_mr_integrity) + return ERR_PTR(-EOPNOTSUPP); + + if (!max_num_meta_sg) + return ERR_PTR(-EINVAL); + + mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, + max_num_meta_sg); + if (IS_ERR(mr)) + return mr; + + mr->device = pd->device; + mr->pd = pd; + mr->dm = NULL; + mr->uobject = NULL; + atomic_inc(&pd->usecnt); + mr->need_inval = false; + mr->res.type = RDMA_RESTRACK_MR; + rdma_restrack_kadd(&mr->res); + mr->type = IB_MR_TYPE_INTEGRITY; + + return mr; +} +EXPORT_SYMBOL(ib_alloc_mr_integrity); + /* "Fast" memory regions */ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 9d925b1346c4..1f0442545aae 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -799,6 +799,8 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * application * @IB_MR_TYPE_DMA: memory region that is used for DMA operations * without address translations (VA=PA) + * @IB_MR_TYPE_INTEGRITY: memory region that is used for + * data integrity operations */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, @@ -807,6 +809,7 @@ enum ib_mr_type { IB_MR_TYPE_DM, IB_MR_TYPE_USER, IB_MR_TYPE_DMA, + IB_MR_TYPE_INTEGRITY, }; enum ib_mr_status_check { @@ -2370,6 +2373,9 @@ struct ib_device_ops { int (*dereg_mr)(struct ib_mr *mr, struct ib_udata *udata); struct ib_mr *(*alloc_mr)(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); + struct ib_mr *(*alloc_mr_integrity)(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg); int (*advise_mr)(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge, @@ -4048,6 +4054,10 @@ static inline struct ib_mr *ib_alloc_mr(struct ib_pd *pd, return ib_alloc_mr_user(pd, mr_type, max_num_sg, NULL); } +struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_data_sg, + u32 max_num_meta_sg); + /** * ib_update_fast_reg_key - updates the key portion of the fast_reg MR * R_Key and L_Key. From patchwork Tue Jun 11 15:52:40 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987613 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9676813AF for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 87720287CD for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 7BA0828868; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D8391287C8 for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391826AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33031 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2390411AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:52:59 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL6023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 04/21] RDMA/core: Introduce ib_map_mr_sg_pi to map data/protection sgl's Date: Tue, 11 Jun 2019 18:52:40 +0300 Message-Id: <1560268377-26560-5-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This function will map the previously dma mapped SG lists for PI (protection information) and data to an appropriate memory region for future registration. The given MR must be allocated as IB_MR_TYPE_INTEGRITY. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 40 +++++++++++++++++++++++++++++++++++++++- include/rdma/ib_verbs.h | 9 +++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index e698ad34e528..73cbe0913801 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2410,6 +2410,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, iw_reject); SET_DEVICE_OP(dev_ops, iw_rem_ref); SET_DEVICE_OP(dev_ops, map_mr_sg); + SET_DEVICE_OP(dev_ops, map_mr_sg_pi); SET_DEVICE_OP(dev_ops, map_phys_fmr); SET_DEVICE_OP(dev_ops, mmap); SET_DEVICE_OP(dev_ops, modify_ah); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 80b9dc0dbd6a..76a574f20f57 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2040,7 +2040,8 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, { struct ib_mr *mr; - if (!pd->device->ops.alloc_mr_integrity) + if (!pd->device->ops.alloc_mr_integrity || + !pd->device->ops.map_mr_sg_pi) return ERR_PTR(-EOPNOTSUPP); if (!max_num_meta_sg) @@ -2423,6 +2424,43 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +/** + * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection + * information) and set an appropriate memory region for registration. + * @mr: memory region + * @data_sg: dma mapped scatterlist for data + * @data_sg_nents: number of entries in data_sg + * @data_sg_offset: offset in bytes into data_sg + * @meta_sg: dma mapped scatterlist for metadata + * @meta_sg_nents: number of entries in meta_sg + * @meta_sg_offset: offset in bytes into meta_sg + * @page_size: page vector desired page size + * + * Constraints: + * - The MR must be allocated with type IB_MR_TYPE_INTEGRITY. + * + * Return: 0 on success. + * + * After this completes successfully, the memory region + * is ready for registration. + */ +int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset, unsigned int page_size) +{ + if (unlikely(!mr->device->ops.map_mr_sg_pi || + WARN_ON_ONCE(mr->type != IB_MR_TYPE_INTEGRITY))) + return -EOPNOTSUPP; + + mr->page_size = page_size; + + return mr->device->ops.map_mr_sg_pi(mr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, + meta_sg_nents, meta_sg_offset); +} +EXPORT_SYMBOL(ib_map_mr_sg_pi); + /** * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list * and set it the memory region. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1f0442545aae..2797a58ce6c9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2444,6 +2444,11 @@ struct ib_device_ops { int (*read_counters)(struct ib_counters *counters, struct ib_counters_read_attr *counters_read_attr, struct uverbs_attr_bundle *attrs); + int (*map_mr_sg_pi)(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset); + /** * alloc_hw_stats - Allocate a struct rdma_hw_stats and fill in the * driver initialized data. The struct is kfree()'ed by the sysfs @@ -4242,6 +4247,10 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); +int ib_map_mr_sg_pi(struct ib_mr *mr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset, unsigned int page_size); static inline int ib_map_mr_sg_zbva(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, From patchwork Tue Jun 11 15:52:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987605 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3005F924 for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 1EE39286DD for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 1316628852; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6985E286C6 for ; Tue, 11 Jun 2019 15:53:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391828AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33032 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391637AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:52:59 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL7023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 05/21] RDMA/core: Add signature attrs element for ib_mr structure Date: Tue, 11 Jun 2019 18:52:41 +0300 Message-Id: <1560268377-26560-6-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This element will describe the needed characteristics for the signature operation per signature enabled memory region (type IB_MR_TYPE_INTEGRITY). Also add meta_length attribute to ib_sig_attrs structure for saving the mapped metadata length (needed for the new API implementation). Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/infiniband/core/uverbs_cmd.c | 1 + drivers/infiniband/core/verbs.c | 13 ++++++++++++- include/rdma/ib_verbs.h | 2 +- include/rdma/signature.h | 2 ++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index e0c0c40c7084..9ab2393ac681 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -758,6 +758,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->pd = pd; mr->type = IB_MR_TYPE_USER; mr->dm = NULL; + mr->sig_attrs = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 76a574f20f57..54b25adc65d3 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1967,6 +1967,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) { struct ib_pd *pd = mr->pd; struct ib_dm *dm = mr->dm; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; int ret; rdma_restrack_del(&mr->res); @@ -1975,6 +1976,7 @@ int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata) atomic_dec(&pd->usecnt); if (dm) atomic_dec(&dm->usecnt); + kfree(sig_attrs); } return ret; @@ -2016,6 +2018,7 @@ struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type, mr->res.type = RDMA_RESTRACK_MR; rdma_restrack_kadd(&mr->res); mr->type = mr_type; + mr->sig_attrs = NULL; } return mr; @@ -2039,6 +2042,7 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, u32 max_num_meta_sg) { struct ib_mr *mr; + struct ib_sig_attrs *sig_attrs; if (!pd->device->ops.alloc_mr_integrity || !pd->device->ops.map_mr_sg_pi) @@ -2047,10 +2051,16 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, if (!max_num_meta_sg) return ERR_PTR(-EINVAL); + sig_attrs = kzalloc(sizeof(struct ib_sig_attrs), GFP_KERNEL); + if (!sig_attrs) + return ERR_PTR(-ENOMEM); + mr = pd->device->ops.alloc_mr_integrity(pd, max_num_data_sg, max_num_meta_sg); - if (IS_ERR(mr)) + if (IS_ERR(mr)) { + kfree(sig_attrs); return mr; + } mr->device = pd->device; mr->pd = pd; @@ -2061,6 +2071,7 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->res.type = RDMA_RESTRACK_MR; rdma_restrack_kadd(&mr->res); mr->type = IB_MR_TYPE_INTEGRITY; + mr->sig_attrs = sig_attrs; return mr; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2797a58ce6c9..88f8970a4b16 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1750,7 +1750,7 @@ struct ib_mr { }; struct ib_dm *dm; - + struct ib_sig_attrs *sig_attrs; /* only for IB_MR_TYPE_INTEGRITY MRs */ /* * Implementation details of the RDMA core, don't use in drivers: */ diff --git a/include/rdma/signature.h b/include/rdma/signature.h index 5998fe94dfd4..f24cc2a1d3c5 100644 --- a/include/rdma/signature.h +++ b/include/rdma/signature.h @@ -80,11 +80,13 @@ struct ib_sig_domain { * @check_mask: bitmask for signature byte check (8 bytes) * @mem: memory domain layout descriptor. * @wire: wire domain layout descriptor. + * @meta_length: metadata length */ struct ib_sig_attrs { u8 check_mask; struct ib_sig_domain mem; struct ib_sig_domain wire; + int meta_length; }; enum ib_sig_err_type { From patchwork Tue Jun 11 15:52:42 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987615 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DFFB7924 for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id CB638286C6 for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id BF37C287C8; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id AD9E6286C6 for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391830AbfFKPxH (ORCPT ); Tue, 11 Jun 2019 11:53:07 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33036 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391579AbfFKPxH (ORCPT ); Tue, 11 Jun 2019 11:53:07 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:52:59 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL8023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 06/21] RDMA/mlx5: Implement mlx5_ib_map_mr_sg_pi and mlx5_ib_alloc_mr_integrity Date: Tue, 11 Jun 2019 18:52:42 +0300 Message-Id: <1560268377-26560-7-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP mlx5_ib_map_mr_sg_pi() will map the PI and data dma mapped SG lists to the mlx5 memory region prior to the registration operation. In the new API, the mlx5 driver will allocate an internal memory region for the UMR operation to register both PI and data SG lists. The internal MR will use KLM mode in order to map 2 (possibly non-contiguous/non-align) SG lists using 1 memory key. In the new API, each ULP will use 1 memory region for the signature operation (instead of 3 in the old API). This memory region will have a key that will be exposed to remote server to perform RDMA operation. The internal memory key that will map the SG lists will stay private. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/main.c | 2 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 11 +++ drivers/infiniband/hw/mlx5/mr.c | 187 ++++++++++++++++++++++++++++++++--- 3 files changed, 189 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 340290b883fe..f4880259c8ac 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6126,6 +6126,7 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_ops = { .add_gid = mlx5_ib_add_gid, .alloc_mr = mlx5_ib_alloc_mr, + .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, .alloc_pd = mlx5_ib_alloc_pd, .alloc_ucontext = mlx5_ib_alloc_ucontext, .attach_mcast = mlx5_ib_mcg_attach, @@ -6155,6 +6156,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, .map_mr_sg = mlx5_ib_map_mr_sg, + .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, .mmap = mlx5_ib_mmap, .modify_cq = mlx5_ib_modify_cq, .modify_device = mlx5_ib_modify_device, diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 40eb8be482e4..07bac37c3450 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -587,6 +587,9 @@ struct mlx5_ib_mr { void *descs; dma_addr_t desc_map; int ndescs; + int data_length; + int meta_ndescs; + int meta_length; int max_descs; int desc_size; int access_mode; @@ -605,6 +608,7 @@ struct mlx5_ib_mr { int access_flags; /* Needed for rereg MR */ struct mlx5_ib_mr *parent; + struct mlx5_ib_mr *pi_mr; /* Needed for IB_MR_TYPE_INTEGRITY */ atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; @@ -1148,8 +1152,15 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, struct ib_udata *udata); +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, + u32 max_num_meta_sg); int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset); int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5f09699fab98..1ae1846256b8 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1639,16 +1639,22 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { - dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr)); + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + + if (ibmr->type == IB_MR_TYPE_INTEGRITY) + dereg_mr(to_mdev(mmr->pi_mr->ibmr.device), mmr->pi_mr); + + dereg_mr(to_mdev(ibmr->device), mmr); + return 0; } -struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, - u32 max_num_sg, struct ib_udata *udata) +static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - int ndescs = ALIGN(max_num_sg, 4); + int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1670,8 +1676,72 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + + err = mlx5_alloc_priv_descs(pd->device, mr, + ndescs, sizeof(struct mlx5_klm)); + if (err) + goto err_free_in; + mr->desc_size = sizeof(struct mlx5_klm); + mr->max_descs = ndescs; + + MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, umr_en, 1); + + mr->ibmr.pd = pd; + mr->ibmr.device = pd->device; + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto err_priv_descs; + + mr->mmkey.type = MLX5_MKEY_MR; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + mr->umem = NULL; + kfree(in); + + return mr; + +err_priv_descs: + mlx5_free_priv_descs(mr); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, + enum ib_mr_type mr_type, u32 max_num_sg, + u32 max_num_meta_sg) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; + int err; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + if (mr_type == IB_MR_TYPE_MEM_REG) { mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_mtt)); @@ -1682,6 +1752,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_klm)); @@ -1689,11 +1760,13 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_free_in; mr->desc_size = sizeof(struct mlx5_klm); mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SIGNATURE) { + } else if (mr_type == IB_MR_TYPE_SIGNATURE || + mr_type == IB_MR_TYPE_INTEGRITY) { u32 psv_index[2]; MLX5_SET(mkc, mkc, bsf_en, 1); MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); + MLX5_SET(mkc, mkc, translations_octword_size, 4); mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); if (!mr->sig) { err = -ENOMEM; @@ -1714,6 +1787,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->sig->sig_err_exists = false; /* Next UMR, Arm SIGERR */ ++mr->sig->sigerr_count; + if (mr_type == IB_MR_TYPE_INTEGRITY) { + mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, + max_num_meta_sg); + if (IS_ERR(mr->pi_mr)) { + err = PTR_ERR(mr->pi_mr); + goto err_destroy_psv; + } + } } else { mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; @@ -1727,7 +1808,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->ibmr.device = pd->device; err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) - goto err_destroy_psv; + goto err_free_pi_mr; mr->mmkey.type = MLX5_MKEY_MR; mr->ibmr.lkey = mr->mmkey.key; @@ -1737,6 +1818,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return &mr->ibmr; +err_free_pi_mr: + if (mr->pi_mr) { + dereg_mr(to_mdev(mr->pi_mr->ibmr.device), mr->pi_mr); + mr->pi_mr = NULL; + } err_destroy_psv: if (mr->sig) { if (mlx5_core_destroy_psv(dev->mdev, @@ -1758,6 +1844,19 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(err); } +struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg, struct ib_udata *udata) +{ + return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); +} + +struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, + u32 max_num_sg, u32 max_num_meta_sg) +{ + return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, + max_num_meta_sg); +} + struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { @@ -1890,13 +1989,16 @@ static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, unsigned short sg_nents, - unsigned int *sg_offset_p) + unsigned int *sg_offset_p, + struct scatterlist *meta_sgl, + unsigned short meta_sg_nents, + unsigned int *meta_sg_offset_p) { struct scatterlist *sg = sgl; struct mlx5_klm *klms = mr->descs; unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; u32 lkey = mr->ibmr.pd->local_dma_lkey; - int i; + int i, j = 0; mr->ibmr.iova = sg_dma_address(sg) + sg_offset; mr->ibmr.length = 0; @@ -1911,12 +2013,36 @@ mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, sg_offset = 0; } - mr->ndescs = i; if (sg_offset_p) *sg_offset_p = sg_offset; - return i; + mr->ndescs = i; + mr->data_length = mr->ibmr.length; + + if (meta_sg_nents) { + sg = meta_sgl; + sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; + for_each_sg(meta_sgl, sg, meta_sg_nents, j) { + if (unlikely(i + j >= mr->max_descs)) + break; + klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + + sg_offset); + klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - + sg_offset); + klms[i + j].key = cpu_to_be32(lkey); + mr->ibmr.length += sg_dma_len(sg) - sg_offset; + + sg_offset = 0; + } + if (meta_sg_offset_p) + *meta_sg_offset_p = sg_offset; + + mr->meta_ndescs = j; + mr->meta_length = mr->ibmr.length - mr->data_length; + } + + return i + j; } static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) @@ -1933,6 +2059,44 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) return 0; } +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + int n; + + WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + pi_mr->meta_length = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, + meta_sg, meta_sg_nents, meta_sg_offset); + + /* This is zero-based memory region */ + pi_mr->ibmr.iova = 0; + ibmr->length = pi_mr->ibmr.length; + ibmr->iova = pi_mr->ibmr.iova; + ibmr->sig_attrs->meta_length = pi_mr->meta_length; + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + if (unlikely(n != data_sg_nents + meta_sg_nents)) + return -ENOMEM; + + return 0; +} + int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { @@ -1946,7 +2110,8 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, DMA_TO_DEVICE); if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) - n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); + n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, + NULL); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx5_set_page); From patchwork Tue Jun 11 15:52:43 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987611 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1E0C514BB for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 0D1C82881E for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 018E1287F5; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 12DA32881E for ; Tue, 11 Jun 2019 15:53:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391012AbfFKPxH (ORCPT ); Tue, 11 Jun 2019 11:53:07 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33064 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391827AbfFKPxG (ORCPT ); Tue, 11 Jun 2019 11:53:06 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxL9023036; Tue, 11 Jun 2019 18:52:59 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 07/21] RDMA/mlx5: Add attr for max number page list length for PI operation Date: Tue, 11 Jun 2019 18:52:43 +0300 Message-Id: <1560268377-26560-8-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP PI offload (protection information) is a feature that each RDMA provider can implement differently. Thus, introduce new device attribute to define the maximal length of the page list for PI fast registration operation. For example, mlx5 driver uses a single internal MR to map both data and protection SGL's, so it's equal to max_fast_reg_page_list_len / 2. Signed-off-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Reviewed-by: Christoph Hellwig --- drivers/infiniband/hw/mlx5/main.c | 2 ++ include/rdma/ib_verbs.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f4880259c8ac..1458f1b18d19 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1008,6 +1008,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_srq_sge = max_rq_sg - 1; props->max_fast_reg_page_list_len = 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); + props->max_pi_fast_reg_page_list_len = + props->max_fast_reg_page_list_len / 2; get_atomic_caps_qp(dev, props); props->masked_atomic_cap = IB_ATOMIC_NONE; props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 88f8970a4b16..3ca33bf12b30 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -401,6 +401,7 @@ struct ib_device_attr { int max_srq_wr; int max_srq_sge; unsigned int max_fast_reg_page_list_len; + unsigned int max_pi_fast_reg_page_list_len; u16 max_pkeys; u8 local_ca_ack_delay; int sig_prot_cap; From patchwork Tue Jun 11 15:52:44 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987617 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id EDA4F18A6 for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id DCEF1287C8 for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id D04EB28832; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 50CF62881E for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391579AbfFKPxI (ORCPT ); Tue, 11 Jun 2019 11:53:08 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33076 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391582AbfFKPxH (ORCPT ); Tue, 11 Jun 2019 11:53:07 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLA023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 08/21] RDMA/mlx5: Pass UMR segment flags instead of boolean Date: Tue, 11 Jun 2019 18:52:44 +0300 Message-Id: <1560268377-26560-9-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP UMR ctrl segment flags can vary between UMR operations. for example, using inline UMR or adding free/not-free checks for a memory key. This is a preparation commit before adding new signature API that will not need not-free checks for the internal memory key during the UMR operation. Signed-off-by: Max Gurtovoy Reviewed-by: Leon Romanovsky Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/qp.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index f6623c77443a..10b9f79e2c4d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4170,15 +4170,13 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, bool umr_inline) + struct mlx5_ib_mr *mr, u8 flags) { int size = mr->ndescs * mr->desc_size; memset(umr, 0, sizeof(*umr)); - umr->flags = MLX5_UMR_CHECK_NOT_FREE; - if (umr_inline) - umr->flags |= MLX5_UMR_INLINE; + umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(); } @@ -4759,12 +4757,14 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size, void **cur_edge) + void **seg, int *size, void **cur_edge, + bool check_not_free) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); size_t mr_list_size = mr->ndescs * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + u8 flags = 0; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), @@ -4772,7 +4772,12 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return -EINVAL; } - set_reg_umr_seg(*seg, mr, umr_inline); + if (check_not_free) + flags |= MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + flags |= MLX5_UMR_INLINE; + + set_reg_umr_seg(*seg, mr, flags); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); @@ -5003,7 +5008,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); err = set_reg_wr(qp, reg_wr(wr), &seg, &size, - &cur_edge); + &cur_edge, true); if (err) { *bad_wr = wr; goto out; From patchwork Tue Jun 11 15:52:45 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987623 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 67CD41908 for ; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 575A6286C6 for ; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4BC0926E1A; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id DC090287AE for ; Tue, 11 Jun 2019 15:53:09 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391582AbfFKPxI (ORCPT ); Tue, 11 Jun 2019 11:53:08 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33090 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391827AbfFKPxI (ORCPT ); Tue, 11 Jun 2019 11:53:08 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLB023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 09/21] RDMA/mlx5: Update set_sig_data_segment attribute for new signature API Date: Tue, 11 Jun 2019 18:52:45 +0300 Message-Id: <1560268377-26560-10-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Explicitly pass the sig_mr and the access flags for the mkey segment configuration. This function will be used also in the new signature API, so modify it in order to use it in both APIs. This is a preparation commit before adding new signature API. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/qp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 10b9f79e2c4d..65d82e40871c 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4648,17 +4648,15 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, } static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, - const struct ib_sig_handover_wr *wr, u32 size, - u32 length, u32 pdn) + struct ib_mr *sig_mr, int access_flags, + u32 size, u32 length, u32 pdn) { - struct ib_mr *sig_mr = wr->sig_mr; u32 sig_key = sig_mr->rkey; u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; memset(seg, 0, sizeof(*seg)); - seg->flags = get_umr_flags(wr->access_flags) | - MLX5_MKC_ACCESS_MODE_KLMS; + seg->flags = get_umr_flags(access_flags) | MLX5_MKC_ACCESS_MODE_KLMS; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); @@ -4715,7 +4713,8 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); + set_sig_mkey_segment(*seg, wr->sig_mr, wr->access_flags, xlt_size, + region_len, pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); From patchwork Tue Jun 11 15:52:46 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987627 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DCA6513AF for ; Tue, 11 Jun 2019 15:53:12 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C976F287AE for ; Tue, 11 Jun 2019 15:53:12 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id BCD3B287EF; Tue, 11 Jun 2019 15:53:12 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id BFF05286C6 for ; Tue, 11 Jun 2019 15:53:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391832AbfFKPxL (ORCPT ); Tue, 11 Jun 2019 11:53:11 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33119 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391827AbfFKPxK (ORCPT ); Tue, 11 Jun 2019 11:53:10 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLC023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 10/21] RDMA/mlx5: Introduce and implement new IB_WR_REG_MR_INTEGRITY work request Date: Tue, 11 Jun 2019 18:52:46 +0300 Message-Id: <1560268377-26560-11-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This new WR will be used to perform PI (protection information) handover using the new API. Using the new API, the user will post a single WR that will internally perform all the needed actions to complete PI operation. This new WR will use a memory region that was allocated as IB_MR_TYPE_INTEGRITY and was mapped using ib_map_mr_sg_pi to perform the registration. In the old API, in order to perform a signature handover operation, each ULP should perform the following: 1. Map and register the data buffers. 2. Map and register the protection buffers. 3. Post a special reg WR to configure the signature handover operation layout. 4. Invalidate the signature memory key. 5. Invalidate protection buffers memory key. 6. Invalidate data buffers memory key. In the new API, the mapping of both data and protection buffers is performed using a single call to ib_map_mr_sg_pi function. Also the registration of the buffers and the configuration of the signature operation layout is done by a single new work request called IB_WR_REG_MR_INTEGRITY. This patch implements this operation for mlx5 devices that are capable to offload data integrity generation/validation while performing the actual buffer transfer. This patch will not remove the old signature API that is used by the iSER initiator and target drivers. This will be done in the future. In the internal implementation, for each IB_WR_REG_MR_INTEGRITY work request, we are using a single UMR operation to register both data and protection buffers using KLM's. Afterwards, another UMR operation will describe the strided block format. These will be followed by 2 SET_PSV operations to set the memory/wire domains initial signature parameters passed by the user. In the end of the whole transaction, only the signature memory key (the one that exposed for the RDMA operation) will be invalidated. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/qp.c | 218 ++++++++++++++++++++++++++++++++++++---- include/linux/mlx5/qp.h | 3 +- include/rdma/ib_verbs.h | 1 + 3 files changed, 201 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 65d82e40871c..7c9fd335d43d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4172,7 +4172,7 @@ static __be64 sig_mkey_mask(void) static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, struct mlx5_ib_mr *mr, u8 flags) { - int size = mr->ndescs * mr->desc_size; + int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; memset(umr, 0, sizeof(*umr)); @@ -4303,7 +4303,7 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, struct mlx5_ib_mr *mr, u32 key, int access) { - int ndescs = ALIGN(mr->ndescs, 8) >> 1; + int ndescs = ALIGN(mr->ndescs + mr->meta_ndescs, 8) >> 1; memset(seg, 0, sizeof(*seg)); @@ -4354,7 +4354,7 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, struct mlx5_ib_mr *mr, struct mlx5_ib_pd *pd) { - int bcount = mr->desc_size * mr->ndescs; + int bcount = mr->desc_size * (mr->ndescs + mr->meta_ndescs); dseg->addr = cpu_to_be64(mr->desc_map); dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64)); @@ -4547,23 +4547,52 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, return 0; } -static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, - int *size, void **cur_edge) +static int set_sig_data_segment(const struct ib_send_wr *send_wr, + struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { - struct ib_sig_attrs *sig_attrs = wr->sig_attrs; - struct ib_mr *sig_mr = wr->sig_mr; struct mlx5_bsf *bsf; - u32 data_len = wr->wr.sg_list->length; - u32 data_key = wr->wr.sg_list->lkey; - u64 data_va = wr->wr.sg_list->addr; + u32 data_len; + u32 data_key; + u64 data_va; + u32 prot_len = 0; + u32 prot_key = 0; + u64 prot_va = 0; + bool prot = false; int ret; int wqe_size; - if (!wr->prot || - (data_key == wr->prot->lkey && - data_va == wr->prot->addr && - data_len == wr->prot->length)) { + if (send_wr->opcode == IB_WR_REG_SIG_MR) { + const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); + + data_len = wr->wr.sg_list->length; + data_key = wr->wr.sg_list->lkey; + data_va = wr->wr.sg_list->addr; + if (wr->prot) { + prot_len = wr->prot->length; + prot_key = wr->prot->lkey; + prot_va = wr->prot->addr; + prot = true; + } + } else { + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; + + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->ibmr.iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->ibmr.iova + data_len; + prot = true; + } + } + + if (!prot || (data_key == prot_key && data_va == prot_va && + data_len == prot_len)) { /** * Source domain doesn't contain signature information * or data and protection are interleaved in memory. @@ -4597,8 +4626,6 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, struct mlx5_stride_block_ctrl_seg *sblock_ctrl; struct mlx5_stride_block_entry *data_sentry; struct mlx5_stride_block_entry *prot_sentry; - u32 prot_key = wr->prot->lkey; - u64 prot_va = wr->prot->addr; u16 block_size = sig_attrs->mem.sig.dif.pi_interval; int prot_size; @@ -4676,6 +4703,56 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, umr->mkey_mask = sig_mkey_mask(); } +static int set_pi_umr_wr(const struct ib_send_wr *send_wr, + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) +{ + const struct ib_reg_wr *wr = reg_wr(send_wr); + struct mlx5_ib_mr *sig_mr = to_mmr(wr->mr); + struct mlx5_ib_mr *pi_mr = sig_mr->pi_mr; + struct ib_sig_attrs *sig_attrs = sig_mr->ibmr.sig_attrs; + u32 pdn = get_pd(qp)->pdn; + u32 xlt_size; + int region_len, ret; + + if (unlikely(send_wr->num_sge != 0) || + unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = pi_mr->ibmr.length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + if (sig_attrs->mem.sig_type != IB_SIG_TYPE_NONE) + xlt_size = 0x30; + else + xlt_size = sizeof(struct mlx5_klm); + + set_sig_umr_segment(*seg, xlt_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + set_sig_mkey_segment(*seg, wr->mr, wr->access, xlt_size, region_len, + pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); + + ret = set_sig_data_segment(send_wr, wr->mr, sig_attrs, qp, seg, size, + cur_edge); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} static int set_sig_umr_wr(const struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp, void **seg, int *size, @@ -4719,7 +4796,8 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); + ret = set_sig_data_segment(send_wr, wr->sig_mr, wr->sig_attrs, qp, seg, + size, cur_edge); if (ret) return ret; @@ -4761,7 +4839,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - size_t mr_list_size = mr->ndescs * mr->desc_size; + int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; u8 flags = 0; @@ -4902,8 +4980,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */ struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = dev->mdev; + struct ib_reg_wr reg_pi_wr; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; + struct mlx5_ib_mr *pi_mr; + struct ib_sig_attrs *sig_attrs; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; void *cur_edge; @@ -4957,7 +5038,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - if (wr->opcode == IB_WR_REG_MR) { + if (wr->opcode == IB_WR_REG_MR || + wr->opcode == IB_WR_REG_MR_INTEGRITY) { fence = dev->umr_fence; next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; } else { @@ -5015,6 +5097,102 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_MR_INTEGRITY: + memset(®_pi_wr, 0, sizeof(struct ib_reg_wr)); + + mr = to_mmr(reg_wr(wr)->mr); + pi_mr = mr->pi_mr; + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; + ctrl->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + protection registration */ + err = set_reg_wr(qp, ®_pi_wr, &seg, &size, + &cur_edge, false); + if (err) { + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); + + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, &cur_edge, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + /* UMR for sig MR */ + err = set_pi_umr_wr(wr, qp, &seg, &size, + &cur_edge); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); + + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + sig_attrs = mr->ibmr.sig_attrs; + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, &cur_edge, nreq, false, + true); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + err = set_psv_wr(&sig_attrs->mem, + mr->sig->psv_memory.psv_idx, + &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, next_fence, + MLX5_OPCODE_SET_PSV); + + err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, + &size, &cur_edge, nreq, false, + true); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + err = set_psv_wr(&sig_attrs->wire, + mr->sig->psv_wire.psv_idx, + &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, next_fence, + MLX5_OPCODE_SET_PSV); + + qp->next_fence = + MLX5_FENCE_MODE_INITIATOR_SMALL; + num_sge = 0; + goto skip_psv; + case IB_WR_REG_SIG_MR: qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; mr = to_mmr(sig_handover_wr(wr)->sig_mr); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3ba4edbd17a6..08e43cd9e742 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -37,7 +37,8 @@ #include #define MLX5_INVALID_LKEY 0x100 -#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 5) +/* UMR (3 WQE_BB's) + SIG (3 WQE_BB's) + PSV (mem) + PSV (wire) */ +#define MLX5_SIG_WQE_SIZE (MLX5_SEND_WQE_BB * 8) #define MLX5_DIF_SIZE 8 #define MLX5_STRIDE_BLOCK_OP 0x400 #define MLX5_CPY_GRD_MASK 0xc0 diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3ca33bf12b30..8fd13356c12b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1247,6 +1247,7 @@ enum ib_wr_opcode { /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, IB_WR_REG_SIG_MR, + IB_WR_REG_MR_INTEGRITY, /* reserve values for low level drivers' internal use. * These values will not be used at all in the ib core layer. From patchwork Tue Jun 11 15:52:47 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987625 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D5E3B13AF for ; Tue, 11 Jun 2019 15:53:11 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C27E92881E for ; Tue, 11 Jun 2019 15:53:11 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id B69A5287F5; Tue, 11 Jun 2019 15:53:11 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 882DA2881E for ; Tue, 11 Jun 2019 15:53:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391676AbfFKPxJ (ORCPT ); Tue, 11 Jun 2019 11:53:09 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33105 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391831AbfFKPxJ (ORCPT ); Tue, 11 Jun 2019 11:53:09 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLD023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 11/21] IB/iser: Use IB_WR_REG_MR_INTEGRITY for PI handover Date: Tue, 11 Jun 2019 18:52:47 +0300 Message-Id: <1560268377-26560-12-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin Using this new API reduces iSER code complexity. It also reduces the maximum number of work requests per task and the need of dealing with multiple MRs (and their registrations and invalidations) per task. It is done by using a single WR and a special MR type (IB_MR_TYPE_INTEGRITY) for PI operation. The setup of the tested benchmark: - 2 servers with 24 cores (1 initiator and 1 target) - 24 target sessions with 1 LUN each - ramdisk backstore - PI active Performance results running fio (24 jobs, 128 iodepth) using write_generate=0 and read_verify=0 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1236.6K/1164.3K 1357.2K/1332.8K 1k 1196.5K/1163.8K 1348.4K/1262.7K 2k 1016.7K/921950 1003.7K/931230 4k 662728/600545 595423/501513 8k 385954/384345 333775/277090 16k 222864/222820 170317/170671 32k 116869/114896 82331/82244 64k 55205/54931 40264/40021 Using write_generate=1 and read_verify=1 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1090.1K/1030.9K 1303.9K/1101.4K 1k 1057.7K/904583 1318.4K/988085 2k 965226/638799 1008.6K/692514 4k 555479/410151 542414/414517 8k 298675/224964 264729/237508 16k 133485/122481 164625/138647 32k 74329/67615 80143/78743 64k 35716/35519 39294/37334 We get performance improvement at all block sizes. The most significant improvement is when writing 4k bs (almost 30% more iops). Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/infiniband/ulp/iser/iscsi_iser.h | 38 ++------ drivers/infiniband/ulp/iser/iser_initiator.c | 12 ++- drivers/infiniband/ulp/iser/iser_memory.c | 98 +++++++------------ drivers/infiniband/ulp/iser/iser_verbs.c | 140 +++++++++------------------ 4 files changed, 95 insertions(+), 193 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 36d525110fd2..6bf9eaa8ec96 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -225,13 +225,11 @@ enum iser_desc_type { ISCSI_TX_DATAOUT }; -/* Maximum number of work requests per task: - * Data memory region local invalidate + fast registration - * Protection memory region local invalidate + fast registration - * Signature memory region local invalidate + fast registration - * PDU send +/* + * Maximum number of work requests per task + * (invalidate, registration, send) */ -#define ISER_MAX_WRS 7 +#define ISER_MAX_WRS 3 /** * struct iser_tx_desc - iSER TX descriptor @@ -247,9 +245,6 @@ enum iser_desc_type { * @mapped: Is the task header mapped * @wr_idx: Current WR index * @wrs: Array of WRs per task - * @data_reg: Data buffer registration details - * @prot_reg: Protection buffer registration details - * @sig_attrs: Signature attributes */ struct iser_tx_desc { struct iser_ctrl iser_header; @@ -264,11 +259,7 @@ struct iser_tx_desc { union iser_wr { struct ib_send_wr send; struct ib_reg_wr fast_reg; - struct ib_sig_handover_wr sig; } wrs[ISER_MAX_WRS]; - struct iser_mem_reg data_reg; - struct iser_mem_reg prot_reg; - struct ib_sig_attrs sig_attrs; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ @@ -388,6 +379,7 @@ struct iser_device { * * @mr: memory region * @fmr_pool: pool of fmrs + * @sig_mr: signature memory region * @page_vec: fast reg page list used by fmr pool * @mr_valid: is mr valid indicator */ @@ -396,36 +388,22 @@ struct iser_reg_resources { struct ib_mr *mr; struct ib_fmr_pool *fmr_pool; }; + struct ib_mr *sig_mr; struct iser_page_vec *page_vec; u8 mr_valid:1; }; -/** - * struct iser_pi_context - Protection information context - * - * @rsc: protection buffer registration resources - * @sig_mr: signature enable memory region - * @sig_mr_valid: is sig_mr valid indicator - * @sig_protected: is region protected indicator - */ -struct iser_pi_context { - struct iser_reg_resources rsc; - struct ib_mr *sig_mr; - u8 sig_mr_valid:1; - u8 sig_protected:1; -}; - /** * struct iser_fr_desc - Fast registration descriptor * * @list: entry in connection fastreg pool * @rsc: data buffer registration resources - * @pi_ctx: protection information context + * @sig_protected: is region protected indicator */ struct iser_fr_desc { struct list_head list; struct iser_reg_resources rsc; - struct iser_pi_context *pi_ctx; + bool sig_protected; struct list_head all_list; }; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 96af06cfe0af..5cbb4b3a0566 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -592,15 +592,14 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) { - if (likely(rkey == desc->rsc.mr->rkey)) { - desc->rsc.mr_valid = 0; - } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) { - desc->pi_ctx->sig_mr_valid = 0; - } else { + if (unlikely((!desc->sig_protected && rkey != desc->rsc.mr->rkey) || + (desc->sig_protected && rkey != desc->rsc.sig_mr->rkey))) { iser_err("Bogus remote invalidation for rkey %#x\n", rkey); return -EINVAL; } + desc->rsc.mr_valid = 0; + return 0; } @@ -750,6 +749,9 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) iser_task->prot[ISER_DIR_IN].data_len = 0; iser_task->prot[ISER_DIR_OUT].data_len = 0; + iser_task->prot[ISER_DIR_IN].dma_nents = 0; + iser_task->prot[ISER_DIR_OUT].dma_nents = 0; + memset(&iser_task->rdma_reg[ISER_DIR_IN], 0, sizeof(struct iser_mem_reg)); memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0, diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index f431c9b4065c..d66e17c2a085 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -376,16 +376,16 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, static int iser_reg_sig_mr(struct iscsi_iser_task *iser_task, - struct iser_pi_context *pi_ctx, - struct iser_mem_reg *data_reg, - struct iser_mem_reg *prot_reg, + struct iser_data_buf *mem, + struct iser_data_buf *sig_mem, + struct iser_reg_resources *rsc, struct iser_mem_reg *sig_reg) { struct iser_tx_desc *tx_desc = &iser_task->desc; - struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; - struct ib_sig_handover_wr *wr; - struct ib_mr *mr = pi_ctx->sig_mr; + struct ib_mr *mr = rsc->sig_mr; + struct ib_sig_attrs *sig_attrs = mr->sig_attrs; + struct ib_reg_wr *wr; int ret; memset(sig_attrs, 0, sizeof(*sig_attrs)); @@ -395,33 +395,36 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); - if (pi_ctx->sig_mr_valid) + if (rsc->mr_valid) iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr, - wr); - wr->wr.opcode = IB_WR_REG_SIG_MR; + ret = ib_map_mr_sg_pi(mr, mem->sg, mem->dma_nents, NULL, + sig_mem->sg, sig_mem->dma_nents, NULL, SZ_4K); + if (unlikely(ret)) { + iser_err("failed to map PI sg (%d)\n", + mem->dma_nents + sig_mem->dma_nents); + goto err; + } + + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; wr->wr.wr_cqe = cqe; - wr->wr.sg_list = &data_reg->sge; - wr->wr.num_sge = 1; + wr->wr.num_sge = 0; wr->wr.send_flags = 0; - wr->sig_attrs = sig_attrs; - wr->sig_mr = mr; - if (scsi_prot_sg_count(iser_task->sc)) - wr->prot = &prot_reg->sge; - else - wr->prot = NULL; - wr->access_flags = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE; - pi_ctx->sig_mr_valid = 1; + wr->mr = mr; + wr->key = mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + rsc->mr_valid = 1; sig_reg->sge.lkey = mr->lkey; sig_reg->rkey = mr->rkey; - sig_reg->sge.addr = 0; - sig_reg->sge.length = scsi_transfer_length(iser_task->sc); + sig_reg->sge.addr = mr->iova; + sig_reg->sge.length = mr->length; iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n", sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr, @@ -477,21 +480,6 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return 0; } -static int -iser_reg_prot_sg(struct iscsi_iser_task *task, - struct iser_data_buf *mem, - struct iser_fr_desc *desc, - bool use_dma_key, - struct iser_mem_reg *reg) -{ - struct iser_device *device = task->iser_conn->ib_conn.device; - - if (use_dma_key) - return iser_reg_dma(device, mem, reg); - - return device->reg_ops->reg_mem(task, mem, &desc->pi_ctx->rsc, reg); -} - static int iser_reg_data_sg(struct iscsi_iser_task *task, struct iser_data_buf *mem, @@ -515,7 +503,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &task->data[dir]; struct iser_mem_reg *reg = &task->rdma_reg[dir]; - struct iser_mem_reg *data_reg; struct iser_fr_desc *desc = NULL; bool use_dma_key; int err; @@ -528,32 +515,17 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task, reg->mem_h = desc; } - if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) - data_reg = reg; - else - data_reg = &task->desc.data_reg; - - err = iser_reg_data_sg(task, mem, desc, use_dma_key, data_reg); - if (unlikely(err)) - goto err_reg; - - if (scsi_get_prot_op(task->sc) != SCSI_PROT_NORMAL) { - struct iser_mem_reg *prot_reg = &task->desc.prot_reg; - - if (scsi_prot_sg_count(task->sc)) { - mem = &task->prot[dir]; - err = iser_reg_prot_sg(task, mem, desc, - use_dma_key, prot_reg); - if (unlikely(err)) - goto err_reg; - } - - err = iser_reg_sig_mr(task, desc->pi_ctx, data_reg, - prot_reg, reg); + if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) { + err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg); + if (unlikely(err)) + goto err_reg; + } else { + err = iser_reg_sig_mr(task, mem, &task->prot[dir], + &desc->rsc, reg); if (unlikely(err)) goto err_reg; - desc->pi_ctx->sig_protected = 1; + desc->sig_protected = 1; } return 0; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 4ff3d98fa6a4..ffd6bbc819f7 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -233,116 +233,63 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn) kfree(desc); } -static int -iser_alloc_reg_res(struct iser_device *device, - struct ib_pd *pd, - struct iser_reg_resources *res, - unsigned int size) +static struct iser_fr_desc * +iser_create_fastreg_desc(struct iser_device *device, + struct ib_pd *pd, + bool pi_enable, + unsigned int size) { + struct iser_fr_desc *desc; struct ib_device *ib_dev = device->ib_device; enum ib_mr_type mr_type; int ret; + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); + if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG) mr_type = IB_MR_TYPE_SG_GAPS; else mr_type = IB_MR_TYPE_MEM_REG; - res->mr = ib_alloc_mr(pd, mr_type, size); - if (IS_ERR(res->mr)) { - ret = PTR_ERR(res->mr); + desc->rsc.mr = ib_alloc_mr(pd, mr_type, size); + if (IS_ERR(desc->rsc.mr)) { + ret = PTR_ERR(desc->rsc.mr); iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret); - return ret; - } - res->mr_valid = 0; - - return 0; -} - -static void -iser_free_reg_res(struct iser_reg_resources *rsc) -{ - ib_dereg_mr(rsc->mr); -} - -static int -iser_alloc_pi_ctx(struct iser_device *device, - struct ib_pd *pd, - struct iser_fr_desc *desc, - unsigned int size) -{ - struct iser_pi_context *pi_ctx = NULL; - int ret; - - desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL); - if (!desc->pi_ctx) - return -ENOMEM; - - pi_ctx = desc->pi_ctx; - - ret = iser_alloc_reg_res(device, pd, &pi_ctx->rsc, size); - if (ret) { - iser_err("failed to allocate reg_resources\n"); - goto alloc_reg_res_err; + goto err_alloc_mr; } - pi_ctx->sig_mr = ib_alloc_mr(pd, IB_MR_TYPE_SIGNATURE, 2); - if (IS_ERR(pi_ctx->sig_mr)) { - ret = PTR_ERR(pi_ctx->sig_mr); - goto sig_mr_failure; + if (pi_enable) { + desc->rsc.sig_mr = ib_alloc_mr_integrity(pd, size, size); + if (IS_ERR(desc->rsc.sig_mr)) { + ret = PTR_ERR(desc->rsc.sig_mr); + iser_err("Failed to allocate sig_mr err=%d\n", ret); + goto err_alloc_mr_integrity; + } } - pi_ctx->sig_mr_valid = 0; - desc->pi_ctx->sig_protected = 0; + desc->rsc.mr_valid = 0; - return 0; + return desc; -sig_mr_failure: - iser_free_reg_res(&pi_ctx->rsc); -alloc_reg_res_err: - kfree(desc->pi_ctx); +err_alloc_mr_integrity: + ib_dereg_mr(desc->rsc.mr); +err_alloc_mr: + kfree(desc); - return ret; + return ERR_PTR(ret); } -static void -iser_free_pi_ctx(struct iser_pi_context *pi_ctx) +static void iser_destroy_fastreg_desc(struct iser_fr_desc *desc) { - iser_free_reg_res(&pi_ctx->rsc); - ib_dereg_mr(pi_ctx->sig_mr); - kfree(pi_ctx); -} - -static struct iser_fr_desc * -iser_create_fastreg_desc(struct iser_device *device, - struct ib_pd *pd, - bool pi_enable, - unsigned int size) -{ - struct iser_fr_desc *desc; - int ret; + struct iser_reg_resources *res = &desc->rsc; - desc = kzalloc(sizeof(*desc), GFP_KERNEL); - if (!desc) - return ERR_PTR(-ENOMEM); - - ret = iser_alloc_reg_res(device, pd, &desc->rsc, size); - if (ret) - goto reg_res_alloc_failure; - - if (pi_enable) { - ret = iser_alloc_pi_ctx(device, pd, desc, size); - if (ret) - goto pi_ctx_alloc_failure; + ib_dereg_mr(res->mr); + if (res->sig_mr) { + ib_dereg_mr(res->sig_mr); + res->sig_mr = NULL; } - - return desc; - -pi_ctx_alloc_failure: - iser_free_reg_res(&desc->rsc); -reg_res_alloc_failure: kfree(desc); - - return ERR_PTR(ret); } /** @@ -399,10 +346,7 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn) list_for_each_entry_safe(desc, tmp, &fr_pool->all_list, all_list) { list_del(&desc->all_list); - iser_free_reg_res(&desc->rsc); - if (desc->pi_ctx) - iser_free_pi_ctx(desc->pi_ctx); - kfree(desc); + iser_destroy_fastreg_desc(desc); ++i; } @@ -707,6 +651,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, struct ib_device_attr *attr = &device->ib_device->attrs; unsigned short sg_tablesize, sup_sg_tablesize; unsigned short reserved_mr_pages; + u32 max_num_sg; /* * FRs without SG_GAPS or FMRs can only map up to a (device) page per @@ -720,12 +665,17 @@ iser_calc_scsi_params(struct iser_conn *iser_conn, else reserved_mr_pages = 1; + if (iser_conn->ib_conn.pi_support) + max_num_sg = attr->max_pi_fast_reg_page_list_len; + else + max_num_sg = attr->max_fast_reg_page_list_len; + sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K); if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) sup_sg_tablesize = min_t( uint, ISCSI_ISER_MAX_SG_TABLESIZE, - attr->max_fast_reg_page_list_len - reserved_mr_pages); + max_num_sg - reserved_mr_pages); else sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE; @@ -1118,9 +1068,9 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, struct ib_mr_status mr_status; int ret; - if (desc && desc->pi_ctx->sig_protected) { - desc->pi_ctx->sig_protected = 0; - ret = ib_check_mr_status(desc->pi_ctx->sig_mr, + if (desc && desc->sig_protected) { + desc->sig_protected = 0; + ret = ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); if (ret) { pr_err("ib_check_mr_status failed, ret %d\n", ret); From patchwork Tue Jun 11 15:52:48 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987629 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C0AC1924 for ; Tue, 11 Jun 2019 15:53:13 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B0DCE287AE for ; Tue, 11 Jun 2019 15:53:13 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id A3A71287CD; Tue, 11 Jun 2019 15:53:13 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 096FA287AE for ; Tue, 11 Jun 2019 15:53:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391847AbfFKPxM (ORCPT ); Tue, 11 Jun 2019 11:53:12 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33133 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391831AbfFKPxL (ORCPT ); Tue, 11 Jun 2019 11:53:11 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLE023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 12/21] IB/iser: Unwind WR union at iser_tx_desc Date: Tue, 11 Jun 2019 18:52:48 +0300 Message-Id: <1560268377-26560-13-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin After decreasing WRs array size from 7 to 3 it is more readable to give each WR a descriptive name. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/infiniband/ulp/iser/iscsi_iser.c | 3 ++- drivers/infiniband/ulp/iser/iscsi_iser.h | 34 ++++++------------------------- drivers/infiniband/ulp/iser/iser_memory.c | 16 ++++++++------- drivers/infiniband/ulp/iser/iser_verbs.c | 12 +++++++++-- 4 files changed, 27 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dbad8275b3bc..c7a3d75fb308 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -205,7 +205,8 @@ iser_initialize_task_headers(struct iscsi_task *task, goto out; } - tx_desc->wr_idx = 0; + tx_desc->inv_wr.next = NULL; + tx_desc->reg_wr.wr.next = NULL; tx_desc->mapped = true; tx_desc->dma_addr = dma_addr; tx_desc->tx_sg[0].addr = tx_desc->dma_addr; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 6bf9eaa8ec96..39bf213444cb 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -225,12 +225,6 @@ enum iser_desc_type { ISCSI_TX_DATAOUT }; -/* - * Maximum number of work requests per task - * (invalidate, registration, send) - */ -#define ISER_MAX_WRS 3 - /** * struct iser_tx_desc - iSER TX descriptor * @@ -243,8 +237,9 @@ enum iser_desc_type { * unsolicited data-out or control * @num_sge: number sges used on this TX task * @mapped: Is the task header mapped - * @wr_idx: Current WR index - * @wrs: Array of WRs per task + * reg_wr: registration WR + * send_wr: send WR + * inv_wr: invalidate WR */ struct iser_tx_desc { struct iser_ctrl iser_header; @@ -255,11 +250,9 @@ struct iser_tx_desc { int num_sge; struct ib_cqe cqe; bool mapped; - u8 wr_idx; - union iser_wr { - struct ib_send_wr send; - struct ib_reg_wr fast_reg; - } wrs[ISER_MAX_WRS]; + struct ib_reg_wr reg_wr; + struct ib_send_wr send_wr; + struct ib_send_wr inv_wr; }; #define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \ @@ -652,21 +645,6 @@ void iser_reg_desc_put_fmr(struct ib_conn *ib_conn, struct iser_fr_desc *desc); -static inline struct ib_send_wr * -iser_tx_next_wr(struct iser_tx_desc *tx_desc) -{ - struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send; - struct ib_send_wr *last_wr; - - if (tx_desc->wr_idx) { - last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send; - last_wr->next = cur_wr; - } - tx_desc->wr_idx++; - - return cur_wr; -} - static inline struct iser_conn * to_iser_conn(struct ib_conn *ib_conn) { diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d66e17c2a085..2cc89a9b9e9b 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -365,13 +365,15 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask) static inline void iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr, - struct ib_cqe *cqe) + struct ib_cqe *cqe, + struct ib_send_wr *next_wr) { inv_wr->opcode = IB_WR_LOCAL_INV; inv_wr->wr_cqe = cqe; inv_wr->ex.invalidate_rkey = mr->rkey; inv_wr->send_flags = 0; inv_wr->num_sge = 0; + inv_wr->next = next_wr; } static int @@ -385,7 +387,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->sig_mr; struct ib_sig_attrs *sig_attrs = mr->sig_attrs; - struct ib_reg_wr *wr; + struct ib_reg_wr *wr = &tx_desc->reg_wr; int ret; memset(sig_attrs, 0, sizeof(*sig_attrs)); @@ -396,7 +398,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask); if (rsc->mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); + iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -408,8 +410,8 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, goto err; } - wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); memset(wr, 0, sizeof(*wr)); + wr->wr.next = &tx_desc->send_wr; wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; wr->wr.wr_cqe = cqe; wr->wr.num_sge = 0; @@ -441,11 +443,11 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_tx_desc *tx_desc = &iser_task->desc; struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe; struct ib_mr *mr = rsc->mr; - struct ib_reg_wr *wr; + struct ib_reg_wr *wr = &tx_desc->reg_wr; int n; if (rsc->mr_valid) - iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe); + iser_inv_rkey(&tx_desc->inv_wr, mr, cqe, &wr->wr); ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); @@ -456,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return n < 0 ? n : -EINVAL; } - wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); + wr->wr.next = &tx_desc->send_wr; wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ffd6bbc819f7..ea9cf04ad002 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1037,7 +1037,8 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) { - struct ib_send_wr *wr = iser_tx_next_wr(tx_desc); + struct ib_send_wr *wr = &tx_desc->send_wr; + struct ib_send_wr *first_wr; int ib_ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, @@ -1051,7 +1052,14 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->opcode = IB_WR_SEND; wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL); + if (tx_desc->inv_wr.next) + first_wr = &tx_desc->inv_wr; + else if (tx_desc->reg_wr.wr.next) + first_wr = &tx_desc->reg_wr.wr; + else + first_wr = wr; + + ib_ret = ib_post_send(ib_conn->qp, first_wr, NULL); if (ib_ret) iser_err("ib_post_send failed, ret:%d opcode:%d\n", ib_ret, wr->opcode); From patchwork Tue Jun 11 15:52:49 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987631 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 571F114BB for ; Tue, 11 Jun 2019 15:53:14 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 47F65287C8 for ; Tue, 11 Jun 2019 15:53:14 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 3C2F428831; Tue, 11 Jun 2019 15:53:14 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C266D287CD for ; Tue, 11 Jun 2019 15:53:13 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391831AbfFKPxN (ORCPT ); Tue, 11 Jun 2019 11:53:13 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33148 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391834AbfFKPxM (ORCPT ); Tue, 11 Jun 2019 11:53:12 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLF023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 13/21] RDMA/core: Add an integrity MR pool support Date: Tue, 11 Jun 2019 18:52:49 +0300 Message-Id: <1560268377-26560-14-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin This is a preparation for adding new signature API to the rw-API. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/infiniband/core/mr_pool.c | 8 ++++++-- drivers/infiniband/core/rw.c | 4 ++-- drivers/nvme/host/rdma.c | 2 +- include/rdma/mr_pool.h | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/mr_pool.c b/drivers/infiniband/core/mr_pool.c index d117f21ce9fd..c0e2df128b34 100644 --- a/drivers/infiniband/core/mr_pool.c +++ b/drivers/infiniband/core/mr_pool.c @@ -34,14 +34,18 @@ void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr) EXPORT_SYMBOL(ib_mr_pool_put); int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, - enum ib_mr_type type, u32 max_num_sg) + enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg) { struct ib_mr *mr; unsigned long flags; int ret, i; for (i = 0; i < nr; i++) { - mr = ib_alloc_mr(qp->pd, type, max_num_sg); + if (type == IB_MR_TYPE_INTEGRITY) + mr = ib_alloc_mr_integrity(qp->pd, max_num_sg, + max_num_meta_sg); + else + mr = ib_alloc_mr(qp->pd, type, max_num_sg); if (IS_ERR(mr)) { ret = PTR_ERR(mr); goto out; diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index c68171601915..3b224f5d036a 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -711,7 +711,7 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) if (nr_mrs) { ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, IB_MR_TYPE_MEM_REG, - rdma_rw_fr_page_list_len(dev)); + rdma_rw_fr_page_list_len(dev), 0); if (ret) { pr_err("%s: failed to allocated %d MRs\n", __func__, nr_mrs); @@ -721,7 +721,7 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) if (nr_sig_mrs) { ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, - IB_MR_TYPE_SIGNATURE, 2); + IB_MR_TYPE_SIGNATURE, 2, 0); if (ret) { pr_err("%s: failed to allocated %d SIG MRs\n", __func__, nr_sig_mrs); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 97f668a39ae1..f3e3a8c861d6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -482,7 +482,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, - nvme_rdma_get_max_fr_pages(ibdev)); + nvme_rdma_get_max_fr_pages(ibdev), 0); if (ret) { dev_err(queue->ctrl->ctrl.device, "failed to initialize MR pool sized %d for QID %d\n", diff --git a/include/rdma/mr_pool.h b/include/rdma/mr_pool.h index 83763ef82354..e77123bcb43b 100644 --- a/include/rdma/mr_pool.h +++ b/include/rdma/mr_pool.h @@ -11,7 +11,7 @@ struct ib_mr *ib_mr_pool_get(struct ib_qp *qp, struct list_head *list); void ib_mr_pool_put(struct ib_qp *qp, struct list_head *list, struct ib_mr *mr); int ib_mr_pool_init(struct ib_qp *qp, struct list_head *list, int nr, - enum ib_mr_type type, u32 max_num_sg); + enum ib_mr_type type, u32 max_num_sg, u32 max_num_meta_sg); void ib_mr_pool_destroy(struct ib_qp *qp, struct list_head *list); #endif /* _RDMA_MR_POOL_H */ From patchwork Tue Jun 11 15:52:50 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987633 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6544613AF for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 5523D287CC for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4975B287CD; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 23C1D28563 for ; Tue, 11 Jun 2019 15:53:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391834AbfFKPxN (ORCPT ); Tue, 11 Jun 2019 11:53:13 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33161 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391827AbfFKPxN (ORCPT ); Tue, 11 Jun 2019 11:53:13 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:00 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLG023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 14/21] RDMA/core: Rename signature qp create flag and signature device capability Date: Tue, 11 Jun 2019 18:52:50 +0300 Message-Id: <1560268377-26560-15-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin Rename IB_QP_CREATE_SIGNATURE_EN to IB_QP_CREATE_INTEGRITY_EN and IB_DEVICE_SIGNATURE_HANDOVER to IB_DEVICE_INTEGRITY_HANDOVER. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig --- drivers/infiniband/core/rw.c | 4 ++-- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +-- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++------- drivers/infiniband/ulp/iser/iser_verbs.c | 4 ++-- drivers/infiniband/ulp/isert/ib_isert.c | 4 ++-- include/rdma/ib_verbs.h | 4 ++-- 7 files changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 3b224f5d036a..289aef824269 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -680,7 +680,7 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) * we'll need two additional MRs for the registrations and the * invalidation. */ - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) factor += 6; /* (inv + reg) * (data + prot + sig) */ else if (rdma_rw_can_use_mr(dev, attr->port_num)) factor += 2; /* inv + reg */ @@ -701,7 +701,7 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) u32 nr_mrs = 0, nr_sig_mrs = 0; int ret = 0; - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) { + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) { nr_sig_mrs = attr->cap.max_rdma_ctxs; nr_mrs = attr->cap.max_rdma_ctxs * 2; } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 1458f1b18d19..4e8a7547d354 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -888,7 +888,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { - props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | IB_PROT_T10DIF_TYPE_2 | diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 07bac37c3450..32ae7af0b17d 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -431,8 +431,7 @@ struct mlx5_ib_qp { int create_type; - /* Store signature errors */ - bool signature_en; + bool integrity_en; struct list_head qps_list; struct list_head cq_recv_list; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7c9fd335d43d..53d6baf167ec 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -442,9 +442,9 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN && ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) - return MLX5_SIG_WQE_SIZE; + return MLX5_SIG_WQE_SIZE; else return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } @@ -496,8 +496,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; - if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) - qp->signature_en = true; + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) + qp->integrity_en = true; wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; @@ -1042,7 +1042,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, void *qpc; int err; - if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | + if (init_attr->create_flags & ~(IB_QP_CREATE_INTEGRITY_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK | IB_QP_CREATE_IPOIB_UD_LSO | IB_QP_CREATE_NETIF_QP | @@ -4717,7 +4717,7 @@ static int set_pi_umr_wr(const struct ib_send_wr *send_wr, if (unlikely(send_wr->num_sge != 0) || unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig) || unlikely(!qp->integrity_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; @@ -4766,7 +4766,7 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, if (unlikely(wr->wr.num_sge != 1) || unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig) || unlikely(!qp->integrity_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ea9cf04ad002..a6548de0e218 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -399,7 +399,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn) init_attr.qp_type = IB_QPT_RC; if (ib_conn->pi_support) { init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1; - init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; iser_conn->max_cmds = ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS); } else { @@ -712,7 +712,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) /* connection T10-PI support */ if (iser_pi_enable) { if (!(device->ib_device->attrs.device_cap_flags & - IB_DEVICE_SIGNATURE_HANDOVER)) { + IB_DEVICE_INTEGRITY_HANDOVER)) { iser_warn("T10-PI requested but not supported on %s, " "continue without T10-PI\n", dev_name(&ib_conn->device->ib_device->dev)); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3f8ced4341b6..df6c303103d5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -133,7 +133,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; if (device->pi_capable) - attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; + attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(cma_id, device->pd, &attr); if (ret) { @@ -309,7 +309,7 @@ isert_create_device_ib_res(struct isert_device *device) /* Check signature cap */ device->pi_capable = ib_dev->attrs.device_cap_flags & - IB_DEVICE_SIGNATURE_HANDOVER ? true : false; + IB_DEVICE_INTEGRITY_HANDOVER ? true : false; return 0; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8fd13356c12b..3b1799b0f0c0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -264,7 +264,7 @@ enum ib_device_cap_flags { */ IB_DEVICE_CROSS_CHANNEL = (1 << 27), IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29), - IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30), + IB_DEVICE_INTEGRITY_HANDOVER = (1 << 30), IB_DEVICE_ON_DEMAND_PAGING = (1ULL << 31), IB_DEVICE_SG_GAPS_REG = (1ULL << 32), IB_DEVICE_VIRTUAL_FUNCTION = (1ULL << 33), @@ -1067,7 +1067,7 @@ enum ib_qp_create_flags { IB_QP_CREATE_MANAGED_SEND = 1 << 3, IB_QP_CREATE_MANAGED_RECV = 1 << 4, IB_QP_CREATE_NETIF_QP = 1 << 5, - IB_QP_CREATE_SIGNATURE_EN = 1 << 6, + IB_QP_CREATE_INTEGRITY_EN = 1 << 6, /* FREE = 1 << 7, */ IB_QP_CREATE_SCATTER_FCS = 1 << 8, IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9, From patchwork Tue Jun 11 15:52:51 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987635 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7FBE714BB for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 71AF2287EF for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 654DE26E1A; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id B1677283BB for ; Tue, 11 Jun 2019 15:53:14 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391842AbfFKPxN (ORCPT ); Tue, 11 Jun 2019 11:53:13 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33173 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2388958AbfFKPxN (ORCPT ); Tue, 11 Jun 2019 11:53:13 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLH023036; Tue, 11 Jun 2019 18:53:00 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 15/21] RDMA/core: Validate integrity handover device cap Date: Tue, 11 Jun 2019 18:52:51 +0300 Message-Id: <1560268377-26560-16-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Protect the case that a ULP tries to allocate a QP with signature enabled flag while the LLD doesn't support this feature. While we're here, also move integrity_en attribute from mlx5_qp to ib_qp as a preparation for adding new integrity API to the rw-API (that is part of ib_core module). Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Christoph Hellwig --- drivers/infiniband/core/verbs.c | 6 ++++++ drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 -- drivers/infiniband/hw/mlx5/qp.c | 7 ++----- include/rdma/ib_verbs.h | 1 + 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 54b25adc65d3..3f1632cd49cb 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1158,6 +1158,10 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, qp_init_attr->cap.max_recv_sge)) return ERR_PTR(-EINVAL); + if ((qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) && + !(device->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)) + return ERR_PTR(-EINVAL); + /* * If the callers is using the RDMA API calculate the resources * needed for the RDMA READ/WRITE operations. @@ -1233,6 +1237,8 @@ struct ib_qp *ib_create_qp_user(struct ib_pd *pd, qp->max_write_sge = qp_init_attr->cap.max_send_sge; qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge, device->attrs.max_sge_rd); + if (qp_init_attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) + qp->integrity_en = true; return qp; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 32ae7af0b17d..06de3507e3d6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -431,8 +431,6 @@ struct mlx5_ib_qp { int create_type; - bool integrity_en; - struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 53d6baf167ec..4cbafcb215e8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -496,9 +496,6 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; - if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) - qp->integrity_en = true; - wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { @@ -4717,7 +4714,7 @@ static int set_pi_umr_wr(const struct ib_send_wr *send_wr, if (unlikely(send_wr->num_sge != 0) || unlikely(wr->access & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->integrity_en) || + unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; @@ -4766,7 +4763,7 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, if (unlikely(wr->wr.num_sge != 1) || unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->integrity_en) || + unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || unlikely(!sig_mr->sig->sig_status_checked)) return -EINVAL; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3b1799b0f0c0..0da1fa1d02ee 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1722,6 +1722,7 @@ struct ib_qp { struct ib_qp_security *qp_sec; u8 port; + bool integrity_en; /* * Implementation details of the RDMA core, don't use in drivers: */ From patchwork Tue Jun 11 15:52:52 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987637 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id EC81C924 for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id DC8EC283BB for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id D0CCE287CC; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 78D00286A0 for ; Tue, 11 Jun 2019 15:53:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391833AbfFKPxO (ORCPT ); Tue, 11 Jun 2019 11:53:14 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33187 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391827AbfFKPxO (ORCPT ); Tue, 11 Jun 2019 11:53:14 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLI023036; Tue, 11 Jun 2019 18:53:01 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 16/21] RDMA/rw: Introduce rdma_rw_inv_key helper Date: Tue, 11 Jun 2019 18:52:52 +0300 Message-Id: <1560268377-26560-17-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin This is a preparation for adding new signature API to the rw-API. Signed-off-by: Israel Rukshin Suggested-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig --- drivers/infiniband/core/rw.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 289aef824269..096fafec1047 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -57,6 +57,22 @@ static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); } +static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) +{ + int count = 0; + + if (reg->mr->need_inval) { + reg->inv_wr.opcode = IB_WR_LOCAL_INV; + reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; + reg->inv_wr.next = ®->reg_wr.wr; + count++; + } else { + reg->inv_wr.next = NULL; + } + + return count; +} + /* Caller must have zero-initialized *reg. */ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, @@ -70,14 +86,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, if (!reg->mr) return -EAGAIN; - if (reg->mr->need_inval) { - reg->inv_wr.opcode = IB_WR_LOCAL_INV; - reg->inv_wr.ex.invalidate_rkey = reg->mr->lkey; - reg->inv_wr.next = ®->reg_wr.wr; - count++; - } else { - reg->inv_wr.next = NULL; - } + count += rdma_rw_inv_key(reg); ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE); if (ret < 0 || ret < nents) { From patchwork Tue Jun 11 15:52:53 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987639 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 0691A13AF for ; Tue, 11 Jun 2019 15:53:18 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id E9197287F5 for ; Tue, 11 Jun 2019 15:53:17 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id DD42F287CC; Tue, 11 Jun 2019 15:53:17 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C9610286C6 for ; Tue, 11 Jun 2019 15:53:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391836AbfFKPxQ (ORCPT ); Tue, 11 Jun 2019 11:53:16 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33201 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2388958AbfFKPxQ (ORCPT ); Tue, 11 Jun 2019 11:53:16 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLJ023036; Tue, 11 Jun 2019 18:53:01 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 17/21] RDMA/rw: Use IB_WR_REG_MR_INTEGRITY for PI handover Date: Tue, 11 Jun 2019 18:52:53 +0300 Message-Id: <1560268377-26560-18-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin Replace the old signature handover API with the new one. The new API simplifes PI handover code complexity for ULPs and improve performance. For RW API it will reduce the maximum number of work requests per task and the need of dealing with multiple MRs (and their registrations and invalidations) per task. All the mappings and registration of the data and the protection buffers is done by the LLD using a single WR and a special MR type (IB_MR_TYPE_INTEGRITY) for the PI handover operation. The setup of the tested benchmark (using iSER ULP): - 2 servers with 24 cores (1 initiator and 1 target) - ConnectX-4/ConnectX-5 adapters - 24 target sessions with 1 LUN each - ramdisk backstore - PI active Performance results running fio (24 jobs, 128 iodepth) using write_generate=1 and read_verify=1 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1243.3K/1182.3K 1725.1K/1680.2K 4k 571233/528835 743293/748259 32k 72388/71086 71789/93573 Using write_generate=0 and read_verify=0 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1572.1K/1427.2K 1823.5K/1724.3K 4k 921992/916194 753772/768267 32k 75052/73960 73180/95484 There is a performance degradation when writing big block sizes. Degradation is caused by the complexity of combining multiple indirections and perform RDMA READ operation from it. This will be fixed in the following patches by reducing the indirections if possible. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig --- drivers/infiniband/core/rw.c | 167 +++++++++++++------------------- drivers/infiniband/ulp/isert/ib_isert.c | 4 +- include/rdma/rw.h | 9 -- 3 files changed, 72 insertions(+), 108 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 096fafec1047..dce06108c8c3 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -51,10 +51,18 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, return false; } -static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev) +static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev, + bool pi_support) { + u32 max_pages; + + if (pi_support) + max_pages = dev->attrs.max_pi_fast_reg_page_list_len; + else + max_pages = dev->attrs.max_fast_reg_page_list_len; + /* arbitrary limit to avoid allocating gigantic resources */ - return min_t(u32, dev->attrs.max_fast_reg_page_list_len, 256); + return min_t(u32, max_pages, 256); } static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) @@ -78,7 +86,8 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num, struct rdma_rw_reg_ctx *reg, struct scatterlist *sg, u32 sg_cnt, u32 offset) { - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); u32 nents = min(sg_cnt, pages_per_mr); int count = 0, ret; @@ -111,7 +120,8 @@ static int rdma_rw_init_mr_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct rdma_rw_reg_ctx *prev = NULL; - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); int i, j, ret = 0, count = 0; ctx->nr_ops = (sg_cnt + pages_per_mr - 1) / pages_per_mr; @@ -352,9 +362,9 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u64 remote_addr, u32 rkey, enum dma_data_direction dir) { struct ib_device *dev = qp->pd->device; - u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device); + u32 pages_per_mr = rdma_rw_fr_page_list_len(qp->pd->device, + qp->integrity_en); struct ib_rdma_wr *rdma_wr; - struct ib_send_wr *prev_wr = NULL; int count = 0, ret; if (sg_cnt > pages_per_mr || prot_sg_cnt > pages_per_mr) { @@ -368,75 +378,58 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, return -ENOMEM; sg_cnt = ret; - ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); - if (!ret) { - ret = -ENOMEM; - goto out_unmap_sg; + if (prot_sg_cnt) { + ret = ib_dma_map_sg(dev, prot_sg, prot_sg_cnt, dir); + if (!ret) { + ret = -ENOMEM; + goto out_unmap_sg; + } + prot_sg_cnt = ret; } - prot_sg_cnt = ret; ctx->type = RDMA_RW_SIG_MR; ctx->nr_ops = 1; - ctx->sig = kcalloc(1, sizeof(*ctx->sig), GFP_KERNEL); - if (!ctx->sig) { + ctx->reg = kcalloc(1, sizeof(*ctx->reg), GFP_KERNEL); + if (!ctx->reg) { ret = -ENOMEM; goto out_unmap_prot_sg; } - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->data, sg, sg_cnt, 0); - if (ret < 0) - goto out_free_ctx; - count += ret; - prev_wr = &ctx->sig->data.reg_wr.wr; - - ret = rdma_rw_init_one_mr(qp, port_num, &ctx->sig->prot, - prot_sg, prot_sg_cnt, 0); - if (ret < 0) - goto out_destroy_data_mr; - count += ret; - - if (ctx->sig->prot.inv_wr.next) - prev_wr->next = &ctx->sig->prot.inv_wr; - else - prev_wr->next = &ctx->sig->prot.reg_wr.wr; - prev_wr = &ctx->sig->prot.reg_wr.wr; - - ctx->sig->sig_mr = ib_mr_pool_get(qp, &qp->sig_mrs); - if (!ctx->sig->sig_mr) { + ctx->reg->mr = ib_mr_pool_get(qp, &qp->sig_mrs); + if (!ctx->reg->mr) { ret = -EAGAIN; - goto out_destroy_prot_mr; + goto out_free_ctx; } - if (ctx->sig->sig_mr->need_inval) { - memset(&ctx->sig->sig_inv_wr, 0, sizeof(ctx->sig->sig_inv_wr)); + count += rdma_rw_inv_key(ctx->reg); - ctx->sig->sig_inv_wr.opcode = IB_WR_LOCAL_INV; - ctx->sig->sig_inv_wr.ex.invalidate_rkey = ctx->sig->sig_mr->rkey; + memcpy(ctx->reg->mr->sig_attrs, sig_attrs, sizeof(struct ib_sig_attrs)); - prev_wr->next = &ctx->sig->sig_inv_wr; - prev_wr = &ctx->sig->sig_inv_wr; + ret = ib_map_mr_sg_pi(ctx->reg->mr, sg, sg_cnt, NULL, prot_sg, + prot_sg_cnt, NULL, SZ_4K); + if (unlikely(ret)) { + pr_err("failed to map PI sg (%d)\n", sg_cnt + prot_sg_cnt); + goto out_destroy_sig_mr; } - ctx->sig->sig_wr.wr.opcode = IB_WR_REG_SIG_MR; - ctx->sig->sig_wr.wr.wr_cqe = NULL; - ctx->sig->sig_wr.wr.sg_list = &ctx->sig->data.sge; - ctx->sig->sig_wr.wr.num_sge = 1; - ctx->sig->sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE; - ctx->sig->sig_wr.sig_attrs = sig_attrs; - ctx->sig->sig_wr.sig_mr = ctx->sig->sig_mr; - if (prot_sg_cnt) - ctx->sig->sig_wr.prot = &ctx->sig->prot.sge; - prev_wr->next = &ctx->sig->sig_wr.wr; - prev_wr = &ctx->sig->sig_wr.wr; + ctx->reg->reg_wr.wr.opcode = IB_WR_REG_MR_INTEGRITY; + ctx->reg->reg_wr.wr.wr_cqe = NULL; + ctx->reg->reg_wr.wr.num_sge = 0; + ctx->reg->reg_wr.wr.send_flags = 0; + ctx->reg->reg_wr.access = IB_ACCESS_LOCAL_WRITE; + if (rdma_protocol_iwarp(qp->device, port_num)) + ctx->reg->reg_wr.access |= IB_ACCESS_REMOTE_WRITE; + ctx->reg->reg_wr.mr = ctx->reg->mr; + ctx->reg->reg_wr.key = ctx->reg->mr->lkey; count++; - ctx->sig->sig_sge.addr = 0; - ctx->sig->sig_sge.length = ctx->sig->data.sge.length; - if (sig_attrs->wire.sig_type != IB_SIG_TYPE_NONE) - ctx->sig->sig_sge.length += ctx->sig->prot.sge.length; + ctx->reg->sge.addr = ctx->reg->mr->iova; + ctx->reg->sge.length = ctx->reg->mr->length; + if (sig_attrs->wire.sig_type == IB_SIG_TYPE_NONE) + ctx->reg->sge.length -= ctx->reg->mr->sig_attrs->meta_length; - rdma_wr = &ctx->sig->data.wr; - rdma_wr->wr.sg_list = &ctx->sig->sig_sge; + rdma_wr = &ctx->reg->wr; + rdma_wr->wr.sg_list = &ctx->reg->sge; rdma_wr->wr.num_sge = 1; rdma_wr->remote_addr = remote_addr; rdma_wr->rkey = rkey; @@ -444,21 +437,18 @@ int rdma_rw_ctx_signature_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, rdma_wr->wr.opcode = IB_WR_RDMA_WRITE; else rdma_wr->wr.opcode = IB_WR_RDMA_READ; - prev_wr->next = &rdma_wr->wr; - prev_wr = &rdma_wr->wr; + ctx->reg->reg_wr.wr.next = &rdma_wr->wr; count++; return count; -out_destroy_prot_mr: - if (prot_sg_cnt) - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); -out_destroy_data_mr: - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); +out_destroy_sig_mr: + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); out_free_ctx: - kfree(ctx->sig); + kfree(ctx->reg); out_unmap_prot_sg: - ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); + if (prot_sg_cnt) + ib_dma_unmap_sg(dev, prot_sg, prot_sg_cnt, dir); out_unmap_sg: ib_dma_unmap_sg(dev, sg, sg_cnt, dir); return ret; @@ -501,22 +491,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, switch (ctx->type) { case RDMA_RW_SIG_MR: - rdma_rw_update_lkey(&ctx->sig->data, true); - if (ctx->sig->prot.mr) - rdma_rw_update_lkey(&ctx->sig->prot, true); - - ctx->sig->sig_mr->need_inval = true; - ib_update_fast_reg_key(ctx->sig->sig_mr, - ib_inc_rkey(ctx->sig->sig_mr->lkey)); - ctx->sig->sig_sge.lkey = ctx->sig->sig_mr->lkey; - - if (ctx->sig->data.inv_wr.next) - first_wr = &ctx->sig->data.inv_wr; - else - first_wr = &ctx->sig->data.reg_wr.wr; - last_wr = &ctx->sig->data.wr.wr; - break; case RDMA_RW_MR: + /* fallthrough */ for (i = 0; i < ctx->nr_ops; i++) { rdma_rw_update_lkey(&ctx->reg[i], ctx->reg[i].wr.wr.opcode != @@ -633,16 +609,12 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, if (WARN_ON_ONCE(ctx->type != RDMA_RW_SIG_MR)) return; - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->data.mr); - ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + ib_mr_pool_put(qp, &qp->sig_mrs, ctx->reg->mr); + kfree(ctx->reg); - if (ctx->sig->prot.mr) { - ib_mr_pool_put(qp, &qp->rdma_mrs, ctx->sig->prot.mr); + ib_dma_unmap_sg(qp->pd->device, sg, sg_cnt, dir); + if (prot_sg_cnt) ib_dma_unmap_sg(qp->pd->device, prot_sg, prot_sg_cnt, dir); - } - - ib_mr_pool_put(qp, &qp->sig_mrs, ctx->sig->sig_mr); - kfree(ctx->sig); } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); @@ -663,7 +635,7 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, unsigned int mr_pages; if (rdma_rw_can_use_mr(device, port_num)) - mr_pages = rdma_rw_fr_page_list_len(device); + mr_pages = rdma_rw_fr_page_list_len(device, false); else mr_pages = device->attrs.max_sge_rd; return DIV_ROUND_UP(maxpages, mr_pages); @@ -689,9 +661,8 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) * we'll need two additional MRs for the registrations and the * invalidation. */ - if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) - factor += 6; /* (inv + reg) * (data + prot + sig) */ - else if (rdma_rw_can_use_mr(dev, attr->port_num)) + if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN || + rdma_rw_can_use_mr(dev, attr->port_num)) factor += 2; /* inv + reg */ attr->cap.max_send_wr += factor * attr->cap.max_rdma_ctxs; @@ -707,20 +678,22 @@ void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) { struct ib_device *dev = qp->pd->device; - u32 nr_mrs = 0, nr_sig_mrs = 0; + u32 nr_mrs = 0, nr_sig_mrs = 0, max_num_sg = 0; int ret = 0; if (attr->create_flags & IB_QP_CREATE_INTEGRITY_EN) { nr_sig_mrs = attr->cap.max_rdma_ctxs; - nr_mrs = attr->cap.max_rdma_ctxs * 2; + nr_mrs = attr->cap.max_rdma_ctxs; + max_num_sg = rdma_rw_fr_page_list_len(dev, true); } else if (rdma_rw_can_use_mr(dev, attr->port_num)) { nr_mrs = attr->cap.max_rdma_ctxs; + max_num_sg = rdma_rw_fr_page_list_len(dev, false); } if (nr_mrs) { ret = ib_mr_pool_init(qp, &qp->rdma_mrs, nr_mrs, IB_MR_TYPE_MEM_REG, - rdma_rw_fr_page_list_len(dev), 0); + max_num_sg, 0); if (ret) { pr_err("%s: failed to allocated %d MRs\n", __func__, nr_mrs); @@ -730,7 +703,7 @@ int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr) if (nr_sig_mrs) { ret = ib_mr_pool_init(qp, &qp->sig_mrs, nr_sig_mrs, - IB_MR_TYPE_SIGNATURE, 2, 0); + IB_MR_TYPE_INTEGRITY, max_num_sg, max_num_sg); if (ret) { pr_err("%s: failed to allocated %d SIG MRs\n", __func__, nr_sig_mrs); diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index df6c303103d5..a1a035270cab 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1669,7 +1669,7 @@ isert_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc) isert_dbg("Cmd %p\n", isert_cmd); - ret = isert_check_pi_status(cmd, isert_cmd->rw.sig->sig_mr); + ret = isert_check_pi_status(cmd, isert_cmd->rw.reg->mr); isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); if (ret) { @@ -1715,7 +1715,7 @@ isert_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc) iscsit_stop_dataout_timer(cmd); if (isert_prot_cmd(isert_conn, se_cmd)) - ret = isert_check_pi_status(se_cmd, isert_cmd->rw.sig->sig_mr); + ret = isert_check_pi_status(se_cmd, isert_cmd->rw.reg->mr); isert_rdma_rw_ctx_destroy(isert_cmd, isert_conn); cmd->write_data_done = 0; diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 494f79ca3e62..6ad9dc836c10 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -39,15 +39,6 @@ struct rdma_rw_ctx { struct ib_send_wr inv_wr; struct ib_mr *mr; } *reg; - - struct { - struct rdma_rw_reg_ctx data; - struct rdma_rw_reg_ctx prot; - struct ib_send_wr sig_inv_wr; - struct ib_mr *sig_mr; - struct ib_sge sig_sge; - struct ib_sig_handover_wr sig_wr; - } *sig; }; }; From patchwork Tue Jun 11 15:52:54 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987643 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1C3FA924 for ; Tue, 11 Jun 2019 15:53:20 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 0B3F5287CD for ; Tue, 11 Jun 2019 15:53:20 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id F3C52287F5; Tue, 11 Jun 2019 15:53:19 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 3741628831 for ; Tue, 11 Jun 2019 15:53:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391835AbfFKPxS (ORCPT ); Tue, 11 Jun 2019 11:53:18 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33214 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391827AbfFKPxS (ORCPT ); Tue, 11 Jun 2019 11:53:18 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLK023036; Tue, 11 Jun 2019 18:53:01 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 18/21] RDMA/mlx5: Remove unused IB_WR_REG_SIG_MR code Date: Tue, 11 Jun 2019 18:52:54 +0300 Message-Id: <1560268377-26560-19-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin IB_WR_REG_SIG_MR is not needed after IB_WR_REG_MR_INTEGRITY was used. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/mr.c | 15 ++- drivers/infiniband/hw/mlx5/qp.c | 154 ++---------------------------- drivers/infiniband/hw/vmw_pvrdma/pvrdma.h | 2 +- include/rdma/ib_verbs.h | 19 ---- 4 files changed, 17 insertions(+), 173 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1ae1846256b8..beed2d5530d3 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1760,8 +1760,7 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; mr->desc_size = sizeof(struct mlx5_klm); mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SIGNATURE || - mr_type == IB_MR_TYPE_INTEGRITY) { + } else if (mr_type == IB_MR_TYPE_INTEGRITY) { u32 psv_index[2]; MLX5_SET(mkc, mkc, bsf_en, 1); @@ -1787,13 +1786,11 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, mr->sig->sig_err_exists = false; /* Next UMR, Arm SIGERR */ ++mr->sig->sigerr_count; - if (mr_type == IB_MR_TYPE_INTEGRITY) { - mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, - max_num_meta_sg); - if (IS_ERR(mr->pi_mr)) { - err = PTR_ERR(mr->pi_mr); - goto err_destroy_psv; - } + mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, + max_num_meta_sg); + if (IS_ERR(mr->pi_mr)) { + err = PTR_ERR(mr->pi_mr); + goto err_destroy_psv; } } else { mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 4cbafcb215e8..6626468ab628 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4560,32 +4560,17 @@ static int set_sig_data_segment(const struct ib_send_wr *send_wr, bool prot = false; int ret; int wqe_size; + struct mlx5_ib_mr *mr = to_mmr(sig_mr); + struct mlx5_ib_mr *pi_mr = mr->pi_mr; - if (send_wr->opcode == IB_WR_REG_SIG_MR) { - const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); - - data_len = wr->wr.sg_list->length; - data_key = wr->wr.sg_list->lkey; - data_va = wr->wr.sg_list->addr; - if (wr->prot) { - prot_len = wr->prot->length; - prot_key = wr->prot->lkey; - prot_va = wr->prot->addr; - prot = true; - } - } else { - struct mlx5_ib_mr *mr = to_mmr(sig_mr); - struct mlx5_ib_mr *pi_mr = mr->pi_mr; - - data_len = pi_mr->data_length; - data_key = pi_mr->ibmr.lkey; - data_va = pi_mr->ibmr.iova; - if (pi_mr->meta_ndescs) { - prot_len = pi_mr->meta_length; - prot_key = pi_mr->ibmr.lkey; - prot_va = pi_mr->ibmr.iova + data_len; - prot = true; - } + data_len = pi_mr->data_length; + data_key = pi_mr->ibmr.lkey; + data_va = pi_mr->ibmr.iova; + if (pi_mr->meta_ndescs) { + prot_len = pi_mr->meta_length; + prot_key = pi_mr->ibmr.lkey; + prot_va = pi_mr->ibmr.iova + data_len; + prot = true; } if (!prot || (data_key == prot_key && data_va == prot_va && @@ -4751,57 +4736,6 @@ static int set_pi_umr_wr(const struct ib_send_wr *send_wr, return 0; } -static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size, - void **cur_edge) -{ - const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); - struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); - u32 pdn = get_pd(qp)->pdn; - u32 xlt_size; - int region_len, ret; - - if (unlikely(wr->wr.num_sge != 1) || - unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) || - unlikely(!sig_mr->sig) || unlikely(!qp->ibqp.integrity_en) || - unlikely(!sig_mr->sig->sig_status_checked)) - return -EINVAL; - - /* length of the protected region, data + protection */ - region_len = wr->wr.sg_list->length; - if (wr->prot && - (wr->prot->lkey != wr->wr.sg_list->lkey || - wr->prot->addr != wr->wr.sg_list->addr || - wr->prot->length != wr->wr.sg_list->length)) - region_len += wr->prot->length; - - /** - * KLM octoword size - if protection was provided - * then we use strided block format (3 octowords), - * else we use single KLM (1 octoword) - **/ - xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm); - - set_sig_umr_segment(*seg, xlt_size); - *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); - *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - set_sig_mkey_segment(*seg, wr->sig_mr, wr->access_flags, xlt_size, - region_len, pdn); - *seg += sizeof(struct mlx5_mkey_seg); - *size += sizeof(struct mlx5_mkey_seg) / 16; - handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - - ret = set_sig_data_segment(send_wr, wr->sig_mr, wr->sig_attrs, qp, seg, - size, cur_edge); - if (ret) - return ret; - - sig_mr->sig->sig_status_checked = false; - return 0; -} - static int set_psv_wr(struct ib_sig_domain *domain, u32 psv_idx, void **seg, int *size) { @@ -5190,74 +5124,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, num_sge = 0; goto skip_psv; - case IB_WR_REG_SIG_MR: - qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; - mr = to_mmr(sig_handover_wr(wr)->sig_mr); - - ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size, - &cur_edge); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, - MLX5_OPCODE_UMR); - /* - * SET_PSV WQEs are not signaled and solicited - * on error - */ - err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq, false, - true); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - - err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem, - mr->sig->psv_memory.psv_idx, &seg, - &size); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, - MLX5_OPCODE_SET_PSV); - err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq, false, - true); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; - } - - err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire, - mr->sig->psv_wire.psv_idx, &seg, - &size); - if (err) { - mlx5_ib_warn(dev, "\n"); - *bad_wr = wr; - goto out; - } - - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, - MLX5_OPCODE_SET_PSV); - qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; - num_sge = 0; - goto skip_psv; - default: break; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index 3c633ab58052..c142f5e7f25f 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -456,7 +456,7 @@ static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op) return PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP; case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD: return PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD; - case IB_WR_REG_SIG_MR: + case IB_WR_REG_MR_INTEGRITY: return PVRDMA_WR_REG_SIG_MR; default: return PVRDMA_WR_ERROR; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0da1fa1d02ee..d425ef12517c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -787,9 +787,6 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); * enum ib_mr_type - memory region type * @IB_MR_TYPE_MEM_REG: memory region that is used for * normal registration - * @IB_MR_TYPE_SIGNATURE: memory region that is used for - * signature operations (data-integrity - * capable regions) * @IB_MR_TYPE_SG_GAPS: memory region that is capable to * register any arbitrary sg lists (without * the normal mr constraints - see @@ -805,7 +802,6 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate); */ enum ib_mr_type { IB_MR_TYPE_MEM_REG, - IB_MR_TYPE_SIGNATURE, IB_MR_TYPE_SG_GAPS, IB_MR_TYPE_DM, IB_MR_TYPE_USER, @@ -1246,7 +1242,6 @@ enum ib_wr_opcode { /* These are kernel only and can not be issued by userspace */ IB_WR_REG_MR = 0x20, - IB_WR_REG_SIG_MR, IB_WR_REG_MR_INTEGRITY, /* reserve values for low level drivers' internal use. @@ -1357,20 +1352,6 @@ static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr) return container_of(wr, struct ib_reg_wr, wr); } -struct ib_sig_handover_wr { - struct ib_send_wr wr; - struct ib_sig_attrs *sig_attrs; - struct ib_mr *sig_mr; - int access_flags; - struct ib_sge *prot; -}; - -static inline const struct ib_sig_handover_wr * -sig_handover_wr(const struct ib_send_wr *wr) -{ - return container_of(wr, struct ib_sig_handover_wr, wr); -} - struct ib_recv_wr { struct ib_recv_wr *next; union { From patchwork Tue Jun 11 15:52:55 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987641 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A68A013AF for ; Tue, 11 Jun 2019 15:53:19 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 964D2286C6 for ; Tue, 11 Jun 2019 15:53:19 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8A2542886B; Tue, 11 Jun 2019 15:53:19 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A114F2881E for ; Tue, 11 Jun 2019 15:53:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391838AbfFKPxS (ORCPT ); Tue, 11 Jun 2019 11:53:18 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33227 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391835AbfFKPxS (ORCPT ); Tue, 11 Jun 2019 11:53:18 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLL023036; Tue, 11 Jun 2019 18:53:01 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 19/21] RDMA/mlx5: Improve PI handover performance Date: Tue, 11 Jun 2019 18:52:55 +0300 Message-Id: <1560268377-26560-20-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Israel Rukshin In some loads, there is performance degradation when using KLM mkey instead of MTT mkey. This is because KLM descriptor access is via indirection that might require more HW resources and cycles. Using KLM descriptor is not necessary when there are no gaps at the data/metadata sg lists. As an optimization, use MTT mkey whenever it is possible. For that matter, allocate internal MTT mkey and choose the effective pi_mr for in transaction according to the required mapping scheme. The setup of the tested benchmark (using iSER ULP): - 2 servers with 24 cores (1 initiator and 1 target) - ConnectX-4/ConnectX-5 adapters - 24 target sessions with 1 LUN each - ramdisk backstore - PI active Performance results running fio (24 jobs, 128 iodepth) using write_generate=1 and read_verify=1 (w/w.o/baseline): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1262.4K/1243.3K/1147.1K 1732.1K/1725.1K/1423.8K 4k 570902/571233/457874 773982/743293/642080 32k 72086/72388/71933 96164/71789/93249 Using write_generate=0 and read_verify=0 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1600.1K/1572.1K/1393.3K 1830.3K/1823.5K/1557.2K 4k 937272/921992/762934 815304/753772/646071 32k 77369/75052/72058 97435/73180/94612 Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Suggested-by: Max Gurtovoy Suggested-by: Idan Burstein Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 7 +- drivers/infiniband/hw/mlx5/mr.c | 179 ++++++++++++++++++++++++++++++----- drivers/infiniband/hw/mlx5/qp.c | 2 +- 3 files changed, 164 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 06de3507e3d6..6039a1fc80a1 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -605,7 +605,12 @@ struct mlx5_ib_mr { int access_flags; /* Needed for rereg MR */ struct mlx5_ib_mr *parent; - struct mlx5_ib_mr *pi_mr; /* Needed for IB_MR_TYPE_INTEGRITY */ + /* Needed for IB_MR_TYPE_INTEGRITY */ + struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr *klm_mr; + struct mlx5_ib_mr *mtt_mr; + u64 pi_iova; + atomic_t num_leaf_free; wait_queue_head_t q_leaf_free; struct mlx5_async_work cb_work; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index beed2d5530d3..46dc944bbd40 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1641,8 +1641,10 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct mlx5_ib_mr *mmr = to_mmr(ibmr); - if (ibmr->type == IB_MR_TYPE_INTEGRITY) - dereg_mr(to_mdev(mmr->pi_mr->ibmr.device), mmr->pi_mr); + if (ibmr->type == IB_MR_TYPE_INTEGRITY) { + dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); + dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); + } dereg_mr(to_mdev(ibmr->device), mmr); @@ -1650,7 +1652,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, - u32 max_num_sg, u32 max_num_meta_sg) + u32 max_num_sg, u32 max_num_meta_sg, + int desc_size, int access_mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); @@ -1673,16 +1676,17 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; + mr->access_mode = access_mode; - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_klm)); + err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); if (err) goto err_free_in; - mr->desc_size = sizeof(struct mlx5_klm); + mr->desc_size = desc_size; mr->max_descs = ndescs; MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); @@ -1786,12 +1790,22 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, mr->sig->sig_err_exists = false; /* Next UMR, Arm SIGERR */ ++mr->sig->sigerr_count; - mr->pi_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, - max_num_meta_sg); - if (IS_ERR(mr->pi_mr)) { - err = PTR_ERR(mr->pi_mr); + mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, + max_num_meta_sg, + sizeof(struct mlx5_klm), + MLX5_MKC_ACCESS_MODE_KLMS); + if (IS_ERR(mr->klm_mr)) { + err = PTR_ERR(mr->klm_mr); goto err_destroy_psv; } + mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, + max_num_meta_sg, + sizeof(struct mlx5_mtt), + MLX5_MKC_ACCESS_MODE_MTT); + if (IS_ERR(mr->mtt_mr)) { + err = PTR_ERR(mr->mtt_mr); + goto err_free_klm_mr; + } } else { mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; @@ -1816,9 +1830,14 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, return &mr->ibmr; err_free_pi_mr: - if (mr->pi_mr) { - dereg_mr(to_mdev(mr->pi_mr->ibmr.device), mr->pi_mr); - mr->pi_mr = NULL; + if (mr->mtt_mr) { + dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); + mr->mtt_mr = NULL; + } +err_free_klm_mr: + if (mr->klm_mr) { + dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); + mr->klm_mr = NULL; } err_destroy_psv: if (mr->sig) { @@ -2056,16 +2075,95 @@ static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, +static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + __be64 *descs; + + if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) + return -ENOMEM; + + descs = mr->descs; + descs[mr->ndescs + mr->meta_ndescs++] = + cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); + + return 0; +} + +static int +mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, int data_sg_nents, unsigned int *data_sg_offset, struct scatterlist *meta_sg, int meta_sg_nents, unsigned int *meta_sg_offset) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - struct mlx5_ib_mr *pi_mr = mr->pi_mr; + struct mlx5_ib_mr *pi_mr = mr->mtt_mr; int n; + u64 iova; - WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + pi_mr->ndescs = 0; + pi_mr->meta_ndescs = 0; + pi_mr->meta_length = 0; + + ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + pi_mr->ibmr.page_size = ibmr->page_size; + n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, + mlx5_set_page); + if (n != data_sg_nents) + return n; + + iova = pi_mr->ibmr.iova; + pi_mr->data_length = pi_mr->ibmr.length; + pi_mr->ibmr.length = pi_mr->data_length; + ibmr->length = pi_mr->data_length; + + if (meta_sg_nents) { + u64 page_mask = ~((u64)ibmr->page_size - 1); + + n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, + meta_sg_offset, mlx5_set_page_pi); + + pi_mr->meta_length = pi_mr->ibmr.length; + /* + * PI address for the HW is the offset of the metadata address + * relative to the first data page address. + * It equals to first data page address + size of data pages + + * metadata offset at the first metadata page + */ + pi_mr->pi_iova = (iova & page_mask) + + pi_mr->ndescs * ibmr->page_size + + (pi_mr->ibmr.iova & ~page_mask); + /* + * In order to use one MTT MR for data and metadata, we register + * also the gaps between the end of the data and the start of + * the metadata (the sig MR will verify that the HW will access + * to right addresses). This mapping is safe because we use + * internal mkey for the registration. + */ + pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; + pi_mr->ibmr.iova = iova; + ibmr->length += pi_mr->meta_length; + } + + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + + return n; +} + +static int +mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->klm_mr; + int n; pi_mr->ndescs = 0; pi_mr->meta_ndescs = 0; @@ -2078,19 +2176,56 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, meta_sg, meta_sg_nents, meta_sg_offset); + ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, + pi_mr->desc_size * pi_mr->max_descs, + DMA_TO_DEVICE); + /* This is zero-based memory region */ pi_mr->ibmr.iova = 0; + pi_mr->pi_iova = pi_mr->data_length; ibmr->length = pi_mr->ibmr.length; - ibmr->iova = pi_mr->ibmr.iova; - ibmr->sig_attrs->meta_length = pi_mr->meta_length; - ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, - pi_mr->desc_size * pi_mr->max_descs, - DMA_TO_DEVICE); + return n; +} +int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + struct mlx5_ib_mr *pi_mr = mr->mtt_mr; + int n; + + WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + + /* + * As a performance optimization, if possible, there is no need to map + * the sg lists to KLM descriptors. First try to map the sg lists to MTT + * descriptors and fallback to KLM only in case of a failure. + * It's more efficient for the HW to work with MTT descriptors + * (especially in high load). + * Use KLM (indirect access) only if it's mandatory. + */ + n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; + + pi_mr = mr->klm_mr; + n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); if (unlikely(n != data_sg_nents + meta_sg_nents)) return -ENOMEM; +out: + /* This is zero-based memory region */ + ibmr->iova = 0; + mr->pi_mr = pi_mr; + ibmr->sig_attrs->meta_length = pi_mr->meta_length; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6626468ab628..09447838be7e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4569,7 +4569,7 @@ static int set_sig_data_segment(const struct ib_send_wr *send_wr, if (pi_mr->meta_ndescs) { prot_len = pi_mr->meta_length; prot_key = pi_mr->ibmr.lkey; - prot_va = pi_mr->ibmr.iova + data_len; + prot_va = pi_mr->pi_iova; prot = true; } From patchwork Tue Jun 11 15:52:56 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987647 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D616714BB for ; Tue, 11 Jun 2019 15:53:21 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id C63ED287F5 for ; Tue, 11 Jun 2019 15:53:21 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id BA6C72864B; Tue, 11 Jun 2019 15:53:21 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id CCA1B28831 for ; Tue, 11 Jun 2019 15:53:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2391837AbfFKPxU (ORCPT ); Tue, 11 Jun 2019 11:53:20 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33242 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2388958AbfFKPxU (ORCPT ); Tue, 11 Jun 2019 11:53:20 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLM023036; Tue, 11 Jun 2019 18:53:01 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 20/21] RDMA/mlx5: Use PA mapping for PI handover Date: Tue, 11 Jun 2019 18:52:56 +0300 Message-Id: <1560268377-26560-21-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP If possibe, avoid doing a UMR operation to register data and protection buffers (via MTT/KLM mkeys). Instead, use the local DMA key and map the SG lists using PA access. This is safe, since the internal key for data and protection never exposed to the remote server (only signature key might be exposed). If PA mappings are not possible, perform mapping using MTT/KLM descriptors. The setup of the tested benchmark (using iSER ULP): - 2 servers with 24 cores (1 initiator and 1 target) - ConnectX-4/ConnectX-5 adapters - 24 target sessions with 1 LUN each - ramdisk backstore - PI active Performance results running fio (24 jobs, 128 iodepth) using write_generate=1 and read_verify=1 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1266.4K/1262.4K 1720.1K/1732.1K 4k 793139/570902 1129.6K/773982 32k 72660/72086 97229/96164 Using write_generate=0 and read_verify=0 (w/w.o patch): bs IOPS(read) IOPS(write) ---- ---------- ---------- 512 1590.2K/1600.1K 1828.2K/1830.3K 4k 1078.1K/937272 1142.1K/815304 32k 77012/77369 98125/97435 Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Suggested-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 63 ++++++++++++++++++++++++++-- drivers/infiniband/hw/mlx5/qp.c | 80 ++++++++++++++++++++++++------------ 3 files changed, 114 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6039a1fc80a1..97c8534c5802 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -609,6 +609,7 @@ struct mlx5_ib_mr { struct mlx5_ib_mr *pi_mr; struct mlx5_ib_mr *klm_mr; struct mlx5_ib_mr *mtt_mr; + u64 data_iova; u64 pi_iova; atomic_t num_leaf_free; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 46dc944bbd40..d4465edf6ad0 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -2001,6 +2001,40 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, return ret; } +static int +mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, + int data_sg_nents, unsigned int *data_sg_offset, + struct scatterlist *meta_sg, int meta_sg_nents, + unsigned int *meta_sg_offset) +{ + struct mlx5_ib_mr *mr = to_mmr(ibmr); + unsigned int sg_offset = 0; + int n = 0; + + mr->meta_length = 0; + if (data_sg_nents == 1) { + n++; + mr->ndescs = 1; + if (data_sg_offset) + sg_offset = *data_sg_offset; + mr->data_length = sg_dma_len(data_sg) - sg_offset; + mr->data_iova = sg_dma_address(data_sg) + sg_offset; + if (meta_sg_nents == 1) { + n++; + mr->meta_ndescs = 1; + if (meta_sg_offset) + sg_offset = *meta_sg_offset; + else + sg_offset = 0; + mr->meta_length = sg_dma_len(meta_sg) - sg_offset; + mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; + } + ibmr->length = mr->data_length + mr->meta_length; + } + + return n; +} + static int mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, struct scatterlist *sgl, @@ -2099,7 +2133,6 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, struct mlx5_ib_mr *mr = to_mmr(ibmr); struct mlx5_ib_mr *pi_mr = mr->mtt_mr; int n; - u64 iova; pi_mr->ndescs = 0; pi_mr->meta_ndescs = 0; @@ -2115,13 +2148,14 @@ mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, if (n != data_sg_nents) return n; - iova = pi_mr->ibmr.iova; + pi_mr->data_iova = pi_mr->ibmr.iova; pi_mr->data_length = pi_mr->ibmr.length; pi_mr->ibmr.length = pi_mr->data_length; ibmr->length = pi_mr->data_length; if (meta_sg_nents) { u64 page_mask = ~((u64)ibmr->page_size - 1); + u64 iova = pi_mr->data_iova; n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, meta_sg_offset, mlx5_set_page_pi); @@ -2181,6 +2215,7 @@ mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, DMA_TO_DEVICE); /* This is zero-based memory region */ + pi_mr->data_iova = 0; pi_mr->ibmr.iova = 0; pi_mr->pi_iova = pi_mr->data_length; ibmr->length = pi_mr->ibmr.length; @@ -2194,11 +2229,27 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, unsigned int *meta_sg_offset) { struct mlx5_ib_mr *mr = to_mmr(ibmr); - struct mlx5_ib_mr *pi_mr = mr->mtt_mr; + struct mlx5_ib_mr *pi_mr = NULL; int n; WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); + mr->ndescs = 0; + mr->data_length = 0; + mr->data_iova = 0; + mr->meta_ndescs = 0; + mr->pi_iova = 0; + /* + * As a performance optimization, if possible, there is no need to + * perform UMR operation to register the data/metadata buffers. + * First try to map the sg lists to PA descriptors with local_dma_lkey. + * Fallback to UMR only in case of a failure. + */ + n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, + data_sg_offset, meta_sg, meta_sg_nents, + meta_sg_offset); + if (n == data_sg_nents + meta_sg_nents) + goto out; /* * As a performance optimization, if possible, there is no need to map * the sg lists to KLM descriptors. First try to map the sg lists to MTT @@ -2207,6 +2258,7 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, * (especially in high load). * Use KLM (indirect access) only if it's mandatory. */ + pi_mr = mr->mtt_mr; n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, data_sg_offset, meta_sg, meta_sg_nents, meta_sg_offset); @@ -2224,7 +2276,10 @@ int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, /* This is zero-based memory region */ ibmr->iova = 0; mr->pi_mr = pi_mr; - ibmr->sig_attrs->meta_length = pi_mr->meta_length; + if (pi_mr) + ibmr->sig_attrs->meta_length = pi_mr->meta_length; + else + ibmr->sig_attrs->meta_length = mr->meta_length; return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 09447838be7e..252c8c31f0ae 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4565,7 +4565,7 @@ static int set_sig_data_segment(const struct ib_send_wr *send_wr, data_len = pi_mr->data_length; data_key = pi_mr->ibmr.lkey; - data_va = pi_mr->ibmr.iova; + data_va = pi_mr->data_iova; if (pi_mr->meta_ndescs) { prot_len = pi_mr->meta_length; prot_key = pi_mr->ibmr.lkey; @@ -4915,6 +4915,7 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; struct mlx5_ib_mr *pi_mr; + struct mlx5_ib_mr pa_pi_mr; struct ib_sig_attrs *sig_attrs; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; @@ -5029,35 +5030,62 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case IB_WR_REG_MR_INTEGRITY: - memset(®_pi_wr, 0, sizeof(struct ib_reg_wr)); + qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; mr = to_mmr(reg_wr(wr)->mr); pi_mr = mr->pi_mr; - reg_pi_wr.mr = &pi_mr->ibmr; - reg_pi_wr.access = reg_wr(wr)->access; - reg_pi_wr.key = pi_mr->ibmr.rkey; - - qp->sq.wr_data[idx] = IB_WR_REG_MR_INTEGRITY; - ctrl->imm = cpu_to_be32(reg_pi_wr.key); - /* UMR for data + protection registration */ - err = set_reg_wr(qp, ®_pi_wr, &seg, &size, - &cur_edge, false); - if (err) { - *bad_wr = wr; - goto out; - } - finish_wqe(qp, ctrl, seg, size, cur_edge, idx, - wr->wr_id, nreq, fence, - MLX5_OPCODE_UMR); - - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, &cur_edge, nreq); - if (err) { - mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; - *bad_wr = wr; - goto out; + if (pi_mr) { + memset(®_pi_wr, 0, + sizeof(struct ib_reg_wr)); + + reg_pi_wr.mr = &pi_mr->ibmr; + reg_pi_wr.access = reg_wr(wr)->access; + reg_pi_wr.key = pi_mr->ibmr.rkey; + + ctrl->imm = cpu_to_be32(reg_pi_wr.key); + /* UMR for data + prot registration */ + err = set_reg_wr(qp, ®_pi_wr, &seg, + &size, &cur_edge, + false); + if (err) { + *bad_wr = wr; + goto out; + } + finish_wqe(qp, ctrl, seg, size, + cur_edge, idx, wr->wr_id, + nreq, fence, + MLX5_OPCODE_UMR); + + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, &cur_edge, + nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + } else { + memset(&pa_pi_mr, 0, + sizeof(struct mlx5_ib_mr)); + /* No UMR, use local_dma_lkey */ + pa_pi_mr.ibmr.lkey = + mr->ibmr.pd->local_dma_lkey; + + pa_pi_mr.ndescs = mr->ndescs; + pa_pi_mr.data_length = mr->data_length; + pa_pi_mr.data_iova = mr->data_iova; + if (mr->meta_ndescs) { + pa_pi_mr.meta_ndescs = + mr->meta_ndescs; + pa_pi_mr.meta_length = + mr->meta_length; + pa_pi_mr.pi_iova = mr->pi_iova; + } + + pa_pi_mr.ibmr.length = mr->ibmr.length; + mr->pi_mr = &pa_pi_mr; } ctrl->imm = cpu_to_be32(mr->ibmr.rkey); /* UMR for sig MR */ From patchwork Tue Jun 11 15:52:57 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 10987645 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 930C7924 for ; Tue, 11 Jun 2019 15:53:21 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 83FCF287E8 for ; Tue, 11 Jun 2019 15:53:21 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 7866D28852; Tue, 11 Jun 2019 15:53:21 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI,UNPARSEABLE_RELAY autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A0F1B287E8 for ; Tue, 11 Jun 2019 15:53:20 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S2403976AbfFKPxU (ORCPT ); Tue, 11 Jun 2019 11:53:20 -0400 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:33258 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S2391837AbfFKPxT (ORCPT ); Tue, 11 Jun 2019 11:53:19 -0400 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 11 Jun 2019 18:53:01 +0300 Received: from r-vnc12.mtr.labs.mlnx (r-vnc12.mtr.labs.mlnx [10.208.0.12]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x5BFqxLN023036; Tue, 11 Jun 2019 18:53:01 +0300 From: Max Gurtovoy To: leonro@mellanox.com, linux-rdma@vger.kernel.org, sagi@grimberg.me, jgg@mellanox.com, dledford@redhat.com, hch@lst.de, bvanassche@acm.org Cc: maxg@mellanox.com, israelr@mellanox.com, idanb@mellanox.com, oren@mellanox.com, vladimirk@mellanox.com, shlomin@mellanox.com Subject: [PATCH 21/21] RDMA/mlx5: Refactor MR descriptors allocation Date: Tue, 11 Jun 2019 18:52:57 +0300 Message-Id: <1560268377-26560-22-git-send-email-maxg@mellanox.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1560268377-26560-1-git-send-email-maxg@mellanox.com> References: <1560268377-26560-1-git-send-email-maxg@mellanox.com> Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Improve code readability using static helpers for each memory region type. Re-use the common logic to get smaller functions that are easy to maintain and reduce code duplication. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/mr.c | 290 ++++++++++++++++++++++------------------ 1 file changed, 157 insertions(+), 133 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d4465edf6ad0..bb854384a243 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1651,15 +1651,63 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } +static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, + int access_mode, int page_shift) +{ + void *mkc; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); + MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, log_page_size, page_shift); +} + +static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, int desc_size, int page_shift, + int access_mode, u32 *in, int inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + int err; + + mr->access_mode = access_mode; + mr->desc_size = desc_size; + mr->max_descs = ndescs; + + err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); + if (err) + return err; + + mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + if (err) + goto err_free_descs; + + mr->mmkey.type = MLX5_MKEY_MR; + mr->ibmr.lkey = mr->mmkey.key; + mr->ibmr.rkey = mr->mmkey.key; + + return 0; + +err_free_descs: + mlx5_free_priv_descs(mr); + return err; +} + static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, u32 max_num_sg, u32 max_num_meta_sg, int desc_size, int access_mode) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); + int page_shift = 0; struct mlx5_ib_mr *mr; - void *mkc; u32 *in; int err; @@ -1667,48 +1715,28 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); + mr->ibmr.pd = pd; + mr->ibmr.device = pd->device; + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, translations_octword_size, ndescs); if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + page_shift = PAGE_SHIFT; - mr->access_mode = access_mode; - - err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); + err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, + access_mode, in, inlen); if (err) goto err_free_in; - mr->desc_size = desc_size; - mr->max_descs = ndescs; - - MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, umr_en, 1); - mr->ibmr.pd = pd; - mr->ibmr.device = pd->device; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); - if (err) - goto err_priv_descs; - - mr->mmkey.type = MLX5_MKEY_MR; - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; mr->umem = NULL; kfree(in); return mr; -err_priv_descs: - mlx5_free_priv_descs(mr); err_free_in: kfree(in); err_free: @@ -1716,6 +1744,92 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, return ERR_PTR(err); } +static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, u32 *in, int inlen) +{ + return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), + PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, + inlen); +} + +static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int ndescs, u32 *in, int inlen) +{ + return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), + 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); +} + +static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, + int max_num_sg, int max_num_meta_sg, + u32 *in, int inlen) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 psv_index[2]; + void *mkc; + int err; + + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) + return -ENOMEM; + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); + if (err) + goto err_free_sig; + + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; + mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, + sizeof(struct mlx5_klm), + MLX5_MKC_ACCESS_MODE_KLMS); + if (IS_ERR(mr->klm_mr)) { + err = PTR_ERR(mr->klm_mr); + goto err_destroy_psv; + } + mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, + sizeof(struct mlx5_mtt), + MLX5_MKC_ACCESS_MODE_MTT); + if (IS_ERR(mr->mtt_mr)) { + err = PTR_ERR(mr->mtt_mr); + goto err_free_klm_mr; + } + + /* Set bsf descriptors for mkey */ + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, bsf_en, 1); + MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); + + err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, + MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); + if (err) + goto err_free_mtt_mr; + + return 0; + +err_free_mtt_mr: + dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); + mr->mtt_mr = NULL; +err_free_klm_mr: + dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); + mr->klm_mr = NULL; +err_destroy_psv: + if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); +err_free_sig: + kfree(mr->sig); + + return err; +} + static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg, u32 max_num_meta_sg) @@ -1724,7 +1838,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); int ndescs = ALIGN(max_num_sg, 4); struct mlx5_ib_mr *mr; - void *mkc; u32 *in; int err; @@ -1738,121 +1851,32 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free; } - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - - if (mr_type == IB_MR_TYPE_MEM_REG) { - mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; - MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_mtt)); - if (err) - goto err_free_in; - - mr->desc_size = sizeof(struct mlx5_mtt); - mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_SG_GAPS) { - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; - MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - - err = mlx5_alloc_priv_descs(pd->device, mr, - ndescs, sizeof(struct mlx5_klm)); - if (err) - goto err_free_in; - mr->desc_size = sizeof(struct mlx5_klm); - mr->max_descs = ndescs; - } else if (mr_type == IB_MR_TYPE_INTEGRITY) { - u32 psv_index[2]; - - MLX5_SET(mkc, mkc, bsf_en, 1); - MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); - MLX5_SET(mkc, mkc, translations_octword_size, 4); - mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); - if (!mr->sig) { - err = -ENOMEM; - goto err_free_in; - } + mr->ibmr.device = pd->device; + mr->umem = NULL; - /* create mem & wire PSVs */ - err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, - 2, psv_index); - if (err) - goto err_free_sig; - - mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; - mr->sig->psv_memory.psv_idx = psv_index[0]; - mr->sig->psv_wire.psv_idx = psv_index[1]; - - mr->sig->sig_status_checked = true; - mr->sig->sig_err_exists = false; - /* Next UMR, Arm SIGERR */ - ++mr->sig->sigerr_count; - mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, - max_num_meta_sg, - sizeof(struct mlx5_klm), - MLX5_MKC_ACCESS_MODE_KLMS); - if (IS_ERR(mr->klm_mr)) { - err = PTR_ERR(mr->klm_mr); - goto err_destroy_psv; - } - mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, - max_num_meta_sg, - sizeof(struct mlx5_mtt), - MLX5_MKC_ACCESS_MODE_MTT); - if (IS_ERR(mr->mtt_mr)) { - err = PTR_ERR(mr->mtt_mr); - goto err_free_klm_mr; - } - } else { + switch (mr_type) { + case IB_MR_TYPE_MEM_REG: + err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); + break; + case IB_MR_TYPE_SG_GAPS: + err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); + break; + case IB_MR_TYPE_INTEGRITY: + err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, + max_num_meta_sg, in, inlen); + break; + default: mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); err = -EINVAL; - goto err_free_in; } - MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3); - MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, umr_en, 1); - - mr->ibmr.device = pd->device; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) - goto err_free_pi_mr; + goto err_free_in; - mr->mmkey.type = MLX5_MKEY_MR; - mr->ibmr.lkey = mr->mmkey.key; - mr->ibmr.rkey = mr->mmkey.key; - mr->umem = NULL; kfree(in); return &mr->ibmr; -err_free_pi_mr: - if (mr->mtt_mr) { - dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); - mr->mtt_mr = NULL; - } -err_free_klm_mr: - if (mr->klm_mr) { - dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); - mr->klm_mr = NULL; - } -err_destroy_psv: - if (mr->sig) { - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_memory.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", - mr->sig->psv_memory.psv_idx); - if (mlx5_core_destroy_psv(dev->mdev, - mr->sig->psv_wire.psv_idx)) - mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", - mr->sig->psv_wire.psv_idx); - } - mlx5_free_priv_descs(mr); -err_free_sig: - kfree(mr->sig); err_free_in: kfree(in); err_free: