From patchwork Mon Feb 24 16:45:26 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401005 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 857081871 for ; Mon, 24 Feb 2020 16:45:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 6E9B620836 for ; Mon, 24 Feb 2020 16:45:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727895AbgBXQpy (ORCPT ); Mon, 24 Feb 2020 11:45:54 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59462 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727869AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:45 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9N013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 01/19] nvme: Introduce namespace features flag Date: Mon, 24 Feb 2020 18:45:26 +0200 Message-Id: <20200224164544.219438-3-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Centralize all the integrity checks to one place and make the code more readable. Suggested-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- drivers/nvme/host/core.c | 39 ++++++++++++++++++++++++++------------- drivers/nvme/host/nvme.h | 5 ++++- 2 files changed, 30 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8491422..3a754e0 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -209,11 +209,6 @@ static int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) return ret; } -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) -{ - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); -} - static blk_status_t nvme_error_status(u16 status) { switch (status & 0x7ff) { @@ -715,7 +710,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, * namespace capacity to zero to prevent any I/O. */ if (!blk_integrity_rq(req)) { - if (WARN_ON_ONCE(!nvme_ns_has_pi(ns))) + if (WARN_ON_ONCE(!(ns->features & + NVME_NS_MD_CTRL_SUPPORTED))) return BLK_STS_NOTSUPP; control |= NVME_RW_PRINFO_PRACT; } @@ -1277,7 +1273,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; metadata = (void __user *)(uintptr_t)io.metadata; - if (ns->ext) { + if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { @@ -1807,10 +1803,10 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !ns->ext && - (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) + if (ns->features & NVME_NS_MD_HOST_SUPPORTED) nvme_init_integrity(disk, ns->ms, ns->pi_type); - if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || + if ((ns->ms && !(ns->features & NVME_NS_MD_CTRL_SUPPORTED) && + !blk_get_integrity(disk)) || ns->lba_shift > PAGE_SHIFT) capacity = 0; @@ -1840,12 +1836,29 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = 9; ns->noiob = le16_to_cpu(id->noiob); ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); + ns->features = 0; /* the PI implementation requires metadata equal t10 pi tuple size */ - if (ns->ms == sizeof(struct t10_pi_tuple)) + if (ns->ms == sizeof(struct t10_pi_tuple)) { ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; - else + if (ns->pi_type) + ns->features |= NVME_NS_MD_CTRL_SUPPORTED; + } else { ns->pi_type = 0; + } + + if (ns->ms) { + if (id->flbas & NVME_NS_FLBAS_META_EXT) + ns->features |= NVME_NS_EXT_LBAS; + + /* + * For PCI, Extended logical block will be generated by the + * controller. + */ + if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { + if (!(ns->features & NVME_NS_EXT_LBAS)) + ns->features |= NVME_NS_MD_HOST_SUPPORTED; + } + } if (ns->noiob) nvme_set_chunk_size(ns); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1024fec..fea20f1 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -381,8 +381,11 @@ struct nvme_ns { u16 ms; u16 sgs; u32 sws; - bool ext; u8 pi_type; + unsigned long features; +#define NVME_NS_EXT_LBAS (1 << 0) +#define NVME_NS_MD_HOST_SUPPORTED (1 << 1) +#define NVME_NS_MD_CTRL_SUPPORTED (1 << 2) unsigned long flags; #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 From patchwork Mon Feb 24 16:45:27 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11400991 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1C3271871 for ; Mon, 24 Feb 2020 16:45:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id ED63B222D9 for ; Mon, 24 Feb 2020 16:45:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727689AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46526 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727501AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:45 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9O013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 02/19] nvme: Add has_pi field to the nvme_req structure Date: Mon, 24 Feb 2020 18:45:27 +0200 Message-Id: <20200224164544.219438-4-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Transport drivers will use this field to determine if the request has PI. Reviewed-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- drivers/nvme/host/core.c | 6 ++++-- drivers/nvme/host/nvme.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3a754e0..15d0863 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -470,6 +470,7 @@ static inline void nvme_clear_nvme_request(struct request *req) if (!(req->rq_flags & RQF_DONTPREP)) { nvme_req(req)->retries = 0; nvme_req(req)->flags = 0; + nvme_req(req)->has_pi = false; req->rq_flags |= RQF_DONTPREP; } } @@ -703,6 +704,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, nvme_assign_write_stream(ctrl, req, &control, &dsmgmt); if (ns->ms) { + nvme_req(req)->has_pi = + ns->features & NVME_NS_MD_CTRL_SUPPORTED; /* * If formated with metadata, the block layer always provides a * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else @@ -710,8 +713,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, * namespace capacity to zero to prevent any I/O. */ if (!blk_integrity_rq(req)) { - if (WARN_ON_ONCE(!(ns->features & - NVME_NS_MD_CTRL_SUPPORTED))) + if (WARN_ON_ONCE(!nvme_req(req)->has_pi)) return BLK_STS_NOTSUPP; control |= NVME_RW_PRINFO_PRACT; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index fea20f1..99340d7 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -139,6 +139,7 @@ struct nvme_request { u8 flags; u16 status; struct nvme_ctrl *ctrl; + bool has_pi; }; /* From patchwork Mon Feb 24 16:45:28 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401017 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 131AA14E3 for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id F07EB20836 for ; Mon, 24 Feb 2020 16:45:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727869AbgBXQp4 (ORCPT ); Mon, 24 Feb 2020 11:45:56 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59445 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727825AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:45 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9P013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 03/19] nvme: Enforce extended LBA format for fabrics metadata Date: Mon, 24 Feb 2020 18:45:28 +0200 Message-Id: <20200224164544.219438-5-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org An extended LBA is a larger LBA that is created when metadata associated with the LBA is transferred contiguously with the LBA data (AKA interleaved). The metadata may be either transferred as part of the LBA (creating an extended LBA) or it may be transferred as a separate contiguous buffer of data. According to the NVMeoF spec, a fabrics ctrl supports only an Extended LBA format. Fail revalidation in case we have a spec violation. Also initialize the integrity profile for the block device for fabrics ctrl. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- drivers/nvme/host/core.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 15d0863..86e39f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1825,7 +1825,7 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_unfreeze_queue(disk->queue); } -static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; @@ -1853,11 +1853,21 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->features |= NVME_NS_EXT_LBAS; /* + * For Fabrics, only metadata as part of extended data LBA is + * supported. Fail in case of a spec violation. + */ + if (ns->ctrl->ops->flags & NVME_F_FABRICS) { + if (WARN_ON_ONCE(!(ns->features & NVME_NS_EXT_LBAS))) + return -EINVAL; + } + + /* * For PCI, Extended logical block will be generated by the * controller. */ if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { - if (!(ns->features & NVME_NS_EXT_LBAS)) + if (ns->ctrl->ops->flags & NVME_F_FABRICS || + !(ns->features & NVME_NS_EXT_LBAS)) ns->features |= NVME_NS_MD_HOST_SUPPORTED; } } @@ -1872,6 +1882,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) revalidate_disk(ns->head->disk); } #endif + return 0; } static int nvme_revalidate_disk(struct gendisk *disk) @@ -1896,7 +1907,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); + ret = __nvme_revalidate_disk(disk, id); + if (ret) + goto free_id; + ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); if (ret) goto free_id; @@ -3582,7 +3596,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - __nvme_revalidate_disk(disk, id); + if (__nvme_revalidate_disk(disk, id)) + goto out_free_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3607,6 +3622,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) return; out_put_disk: put_disk(ns->disk); + out_free_disk: + del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); From patchwork Mon Feb 24 16:45:29 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11400993 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 9DC9813A4 for ; Mon, 24 Feb 2020 16:45:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8653820836 for ; Mon, 24 Feb 2020 16:45:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727775AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59443 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727802AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:45 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9Q013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 04/19] nvme: Introduce max_integrity_segments ctrl attribute Date: Mon, 24 Feb 2020 18:45:29 +0200 Message-Id: <20200224164544.219438-6-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org This patch doesn't change any logic, and is needed as a preparation for adding PI support for fabrics drivers that will use an extended LBA format for metadata. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen --- drivers/nvme/host/core.c | 11 +++++++---- drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 7 +++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 86e39f6..ab25128 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1626,7 +1626,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { struct blk_integrity integrity; @@ -1649,10 +1650,11 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } integrity.tuple_size = ms; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, 1); + blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -1806,7 +1808,8 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_opt(disk->queue, io_opt); if (ns->features & NVME_NS_MD_HOST_SUPPORTED) - nvme_init_integrity(disk, ns->ms, ns->pi_type); + nvme_init_integrity(disk, ns->ms, ns->pi_type, + ns->ctrl->max_integrity_segments); if ((ns->ms && !(ns->features & NVME_NS_MD_CTRL_SUPPORTED) && !blk_get_integrity(disk)) || ns->lba_shift > PAGE_SHIFT) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 99340d7..af8e10a 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -229,6 +229,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u32 max_integrity_segments; u16 crdt[3]; u16 oncs; u16 oacs; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index da392b5..e4e95c9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2573,6 +2573,13 @@ static void nvme_reset_work(struct work_struct *work) goto out; } + /* + * NVMe PCI driver doesn't support Extended LBA format and supports + * only a single integrity segment for a separate contiguous buffer + * of metadata. + */ + dev->ctrl.max_integrity_segments = 1; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; From patchwork Mon Feb 24 16:45:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11400989 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A4D8813A4 for ; Mon, 24 Feb 2020 16:45:52 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 8A3902192A for ; Mon, 24 Feb 2020 16:45:52 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727932AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46508 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727438AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:45 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9R013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 05/19] nvme-fabrics: Allow user enabling metadata/T10-PI support Date: Mon, 24 Feb 2020 18:45:30 +0200 Message-Id: <20200224164544.219438-7-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Preparation for adding metadata (T10-PI) over fabric support. This will allow end-to-end protection information passthrough and validation for NVMe over Fabric. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- drivers/nvme/host/core.c | 3 ++- drivers/nvme/host/fabrics.c | 11 +++++++++++ drivers/nvme/host/fabrics.h | 3 +++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ab25128..c7933a8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1869,7 +1869,8 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) * controller. */ if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) { - if (ns->ctrl->ops->flags & NVME_F_FABRICS || + if ((ns->ctrl->ops->flags & NVME_F_FABRICS && + ns->ctrl->opts->pi_enable) || !(ns->features & NVME_NS_EXT_LBAS)) ns->features |= NVME_NS_MD_HOST_SUPPORTED; } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 74b8818..c09230e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -612,6 +612,7 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, + { NVMF_OPT_PI_ENABLE, "pi_enable" }, { NVMF_OPT_ERR, NULL } }; @@ -634,6 +635,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->hdr_digest = false; opts->data_digest = false; opts->tos = -1; /* < 0 == use transport default */ + opts->pi_enable = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -867,6 +869,15 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tos = token; break; +#ifdef CONFIG_BLK_DEV_INTEGRITY + case NVMF_OPT_PI_ENABLE: + if (opts->discovery_nqn) { + pr_debug("Ignoring pi_enable value for discovery controller\n"); + break; + } + opts->pi_enable = true; + break; +#endif default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a0ec40a..773f748 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -56,6 +56,7 @@ enum { NVMF_OPT_NR_WRITE_QUEUES = 1 << 17, NVMF_OPT_NR_POLL_QUEUES = 1 << 18, NVMF_OPT_TOS = 1 << 19, + NVMF_OPT_PI_ENABLE = 1 << 20, }; /** @@ -89,6 +90,7 @@ enum { * @nr_write_queues: number of queues for write I/O * @nr_poll_queues: number of queues for polling I/O * @tos: type of service + * @pi_enable: Enable metadata (T10-PI) support */ struct nvmf_ctrl_options { unsigned mask; @@ -111,6 +113,7 @@ struct nvmf_ctrl_options { unsigned int nr_write_queues; unsigned int nr_poll_queues; int tos; + bool pi_enable; }; /* From patchwork Mon Feb 24 16:45:31 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11400997 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 49E5C14E3 for ; Mon, 24 Feb 2020 16:45:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 29CE920880 for ; Mon, 24 Feb 2020 16:45:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727736AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59465 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727877AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:45 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9S013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 06/19] nvme: Introduce NVME_INLINE_PROT_SG_CNT Date: Mon, 24 Feb 2020 18:45:31 +0200 Message-Id: <20200224164544.219438-8-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin SGL size of PI metadata is usually small. Thus, 1 inline sg should cover most cases. The macro will be used for pre-allocate a single SGL entry for protection information. The preallocation of small inline SGLs depends on SG_CHAIN capability so if the ARCH doesn't support SG_CHAIN, use the runtime allocation for the SGL. This patch is a preparation for adding metadata (T10-PI) over fabric support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Martin K. Petersen --- drivers/nvme/host/nvme.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index af8e10a..d0bfa2b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -30,8 +30,10 @@ #ifdef CONFIG_ARCH_NO_SG_CHAIN #define NVME_INLINE_SG_CNT 0 +#define NVME_INLINE_PROT_SG_CNT 0 #else #define NVME_INLINE_SG_CNT 2 +#define NVME_INLINE_PROT_SG_CNT 1 #endif extern struct workqueue_struct *nvme_wq; From patchwork Mon Feb 24 16:45:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401021 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8F458159A for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 790F420836 for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727843AbgBXQp5 (ORCPT ); Mon, 24 Feb 2020 11:45:57 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46528 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727673AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9T013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 07/19] nvme-rdma: Introduce nvme_rdma_sgl structure Date: Mon, 24 Feb 2020 18:45:32 +0200 Message-Id: <20200224164544.219438-9-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Remove first_sgl pointer from struct nvme_rdma_request and use pointer arithmetic instead. The inline scatterlist, if exists, will be located right after the nvme_rdma_request. This patch is needed as a preparation for adding PI support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- drivers/nvme/host/rdma.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2a47c6c..b6dd9f5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -48,6 +48,11 @@ struct nvme_rdma_qe { u64 dma; }; +struct nvme_rdma_sgl { + int nents; + struct sg_table sg_table; +}; + struct nvme_rdma_queue; struct nvme_rdma_request { struct nvme_request req; @@ -58,12 +63,10 @@ struct nvme_rdma_request { refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; - int nents; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; - struct sg_table sg_table; - struct scatterlist first_sgl[]; + struct nvme_rdma_sgl data_sgl; }; enum nvme_rdma_queue_flags { @@ -1159,8 +1162,9 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, req->mr = NULL; } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1179,7 +1183,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; - struct scatterlist *sgl = req->sg_table.sgl; + struct scatterlist *sgl = req->data_sgl.sg_table.sgl; struct ib_sge *sge = &req->sge[1]; u32 len = 0; int i; @@ -1204,8 +1208,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, { struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; - sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); - put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); + sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl)); + put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length); put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; @@ -1226,7 +1230,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); + nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, + SZ_4K); if (unlikely(nr < count)) { ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); req->mr = NULL; @@ -1273,17 +1278,18 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return nvme_rdma_set_sg_null(c); - req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, - blk_rq_nr_phys_segments(rq), req->sg_table.sgl, + req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); + ret = sg_alloc_table_chained(&req->data_sgl.sg_table, + blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl, NVME_INLINE_SG_CNT); if (ret) return -ENOMEM; - req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); + req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, + req->data_sgl.sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, - rq_dma_dir(rq)); + count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, + req->data_sgl.nents, rq_dma_dir(rq)); if (unlikely(count <= 0)) { ret = -EIO; goto out_free_table; @@ -1312,9 +1318,10 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, return 0; out_unmap_sg: - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); out_free_table: - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); return ret; } From patchwork Mon Feb 24 16:45:33 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401011 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8C7641871 for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 6B52120836 for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727901AbgBXQpz (ORCPT ); Mon, 24 Feb 2020 11:45:55 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59484 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727890AbgBXQpz (ORCPT ); Mon, 24 Feb 2020 11:45:55 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9U013647; Mon, 24 Feb 2020 18:45:45 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 08/19] nvme-rdma: Add metadata/T10-PI support Date: Mon, 24 Feb 2020 18:45:33 +0200 Message-Id: <20200224164544.219438-10-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org For capable HCAs (e.g. ConnectX-4/ConnectX-5) this will allow end-to-end protection information passthrough and validation for NVMe over RDMA transport. Metadata offload support was implemented over the new RDMA signature verbs API and it is enabled per controller by using nvme-cli. usage example: nvme connect --pi_enable --transport=rdma --traddr=10.0.1.1 --nqn=test-nvme Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- drivers/nvme/host/rdma.c | 330 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 296 insertions(+), 34 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b6dd9f5..ad860ec 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -67,6 +67,9 @@ struct nvme_rdma_request { struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; struct nvme_rdma_sgl data_sgl; + /* Metadata (T10-PI) support */ + struct nvme_rdma_sgl *pi_sgl; + bool use_pi; }; enum nvme_rdma_queue_flags { @@ -88,6 +91,7 @@ struct nvme_rdma_queue { struct rdma_cm_id *cm_id; int cm_error; struct completion cm_done; + bool pi_support; }; struct nvme_rdma_ctrl { @@ -115,6 +119,7 @@ struct nvme_rdma_ctrl { struct nvme_ctrl ctrl; bool use_inline_data; u32 io_queues[HCTX_MAX_TYPES]; + bool pi_support; }; static inline struct nvme_rdma_ctrl *to_rdma_ctrl(struct nvme_ctrl *ctrl) @@ -272,6 +277,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = queue->ib_cq; init_attr.recv_cq = queue->ib_cq; + if (queue->pi_support) + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); @@ -287,6 +294,18 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, kfree(req->sqe.data); } +static unsigned int nvme_rdma_cmd_size(bool has_pi) +{ + /* + * nvme rdma command consists of struct nvme_rdma_request, data SGL, + * PI nvme_rdma_sgl struct and then PI SGL. + */ + return sizeof(struct nvme_rdma_request) + + sizeof(struct scatterlist) * NVME_INLINE_SG_CNT + + has_pi * (sizeof(struct nvme_rdma_sgl) + + sizeof(struct scatterlist) * NVME_INLINE_PROT_SG_CNT); +} + static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) @@ -301,6 +320,10 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (!req->sqe.data) return -ENOMEM; + /* PI nvme_rdma_sgl struct is located after command's data SGL */ + if (queue->pi_support) + req->pi_sgl = (void *)nvme_req(rq) + nvme_rdma_cmd_size(false); + req->queue = queue; return 0; @@ -411,6 +434,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) dev = queue->device; ibdev = dev->dev; + if (queue->pi_support) + ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs); ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); /* @@ -427,10 +452,13 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } -static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) { - return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len - 1); + u32 max_page_list_len = + pi_support ? ibdev->attrs.max_pi_fast_reg_page_list_len : + ibdev->attrs.max_fast_reg_page_list_len; + + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -487,7 +515,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) * misaligned we'll end up using two entries for a single data page, * so one additional entry is required. */ - pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, @@ -499,10 +527,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + if (queue->pi_support) { + ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs, + queue->queue_size, IB_MR_TYPE_INTEGRITY, + pages_per_mr, pages_per_mr); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize PI MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_mr_pool; + } + } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); return 0; +out_destroy_mr_pool: + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); out_destroy_ring: nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, sizeof(struct nvme_completion), DMA_FROM_DEVICE); @@ -524,6 +566,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + queue->pi_support = idx && ctrl->pi_support; init_completion(&queue->cm_done); if (idx > 0) @@ -733,8 +776,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = nctrl->numa_node; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + set->cmd_size = nvme_rdma_cmd_size(false); set->driver_data = ctrl; set->nr_hw_queues = 1; set->timeout = ADMIN_TIMEOUT; @@ -747,8 +789,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->reserved_tags = 1; /* fabric connect */ set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + set->cmd_size = nvme_rdma_cmd_size(ctrl->pi_support); set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; @@ -790,7 +831,21 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); - ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); + /* T10-PI support */ + if (ctrl->ctrl.opts->pi_enable) { + if (!(ctrl->device->dev->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + dev_warn(ctrl->ctrl.device, + "T10-PI requested but not supported on %s, continue without T10-PI\n", + ctrl->device->dev->name); + ctrl->ctrl.opts->pi_enable = false; + } else { + ctrl->pi_support = true; + } + } + + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, + ctrl->pi_support); /* * Bind the async event SQE DMA mapping to the admin queue lifetime. @@ -832,6 +887,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->ctrl.max_segments = ctrl->max_fr_pages; ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); + if (ctrl->pi_support) + ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages; blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -1157,8 +1214,19 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return; + if (blk_integrity_rq(rq)) { + ib_dma_unmap_sg(ibdev, req->pi_sgl->sg_table.sgl, + req->pi_sgl->nents, rq_dma_dir(rq)); + sg_free_table_chained(&req->pi_sgl->sg_table, + NVME_INLINE_PROT_SG_CNT); + } + if (req->mr) { - ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + if (req->use_pi) + ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); + else + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, + req->mr); req->mr = NULL; } @@ -1215,34 +1283,112 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, return 0; } -static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, - struct nvme_rdma_request *req, struct nvme_command *c, - int count) +#ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control) { - struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; - int nr; - - req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); - if (WARN_ON_ONCE(!req->mr)) - return -EAGAIN; + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); /* - * Align the MR to a 4K page size to match the ctrl page size and - * the block virtual boundary. + * At the moment we hard code those, but in the future + * we will take them from cmd. */ - nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, - SZ_4K); - if (unlikely(nr < count)) { - ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); - req->mr = NULL; - if (nr < 0) - return nr; - return -EINVAL; + domain->sig.dif.apptag_check_mask = 0xffff; + domain->sig.dif.app_escape = true; + domain->sig.dif.ref_escape = true; + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; +} + +static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs) +{ + u16 control = le16_to_cpu(cmd->rw.control); + + WARN_ON(bi == NULL); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no memory domain */ + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control); + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control); } +} + +static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask) +{ + *mask = 0; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF) + *mask |= IB_SIG_CHECK_REFTAG; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD) + *mask |= IB_SIG_CHECK_GUARD; +} + +static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "SIG"); +} + +static void nvme_rdma_set_pi_wr(struct nvme_rdma_request *req, + struct nvme_command *c) +{ + struct ib_sig_attrs *sig_attrs = req->mr->sig_attrs; + struct ib_reg_wr *wr = &req->reg_wr; + struct request *rq = blk_mq_rq_from_pdu(req); + struct bio *bio = rq->bio; + struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + + nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c, sig_attrs); + + nvme_rdma_set_prot_checks(c, &sig_attrs->check_mask); ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); + req->reg_cqe.done = nvme_rdma_sig_done; + + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; + wr->wr.wr_cqe = &req->reg_cqe; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = req->mr; + wr->key = req->mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + + sg->addr = cpu_to_le64(req->mr->iova); + put_unaligned_le24(req->mr->length, sg->length); + put_unaligned_le32(req->mr->rkey, sg->key); + sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_INVALIDATE; +} +#else +static void nvme_rdma_set_pi_wr(struct nvme_rdma_request *req, + struct nvme_command *c) +{ +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + +static void nvme_rdma_set_reg_wr(struct nvme_rdma_request *req, + struct nvme_command *c) +{ + struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + req->reg_cqe.done = nvme_rdma_memreg_done; + memset(&req->reg_wr, 0, sizeof(req->reg_wr)); req->reg_wr.wr.opcode = IB_WR_REG_MR; req->reg_wr.wr.wr_cqe = &req->reg_cqe; @@ -1258,8 +1404,52 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, put_unaligned_le32(req->mr->rkey, sg->key); sg->type = (NVME_KEY_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_INVALIDATE; +} + +static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, + struct nvme_rdma_request *req, struct nvme_command *c, + int count, int pi_count) +{ + struct nvme_rdma_sgl *sgl = &req->data_sgl; + int nr; + + if (req->use_pi) { + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + + nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, 0, + req->pi_sgl->sg_table.sgl, pi_count, 0, + SZ_4K); + if (unlikely(nr)) + goto mr_put; + + nvme_rdma_set_pi_wr(req, c); + } else { + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + /* + * Align the MR to a 4K page size to match the ctrl page size + * and the block virtual boundary. + */ + nr = ib_map_mr_sg(req->mr, sgl->sg_table.sgl, count, 0, SZ_4K); + if (unlikely(nr < count)) + goto mr_put; + + nvme_rdma_set_reg_wr(req, c); + } return 0; +mr_put: + if (req->use_pi) + ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); + else + ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + req->mr = NULL; + if (nr < 0) + return nr; + return -EINVAL; } static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, @@ -1268,6 +1458,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + int pi_count = 0; int count, ret; req->num_sge = 1; @@ -1295,7 +1486,30 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, goto out_free_table; } - if (count <= dev->num_inline_segments) { + if (blk_integrity_rq(rq)) { + memset(req->pi_sgl, 0, sizeof(struct nvme_rdma_sgl)); + req->pi_sgl->sg_table.sgl = + (struct scatterlist *)(req->pi_sgl + 1); + ret = sg_alloc_table_chained(&req->pi_sgl->sg_table, + blk_rq_count_integrity_sg(rq->q, rq->bio), + req->pi_sgl->sg_table.sgl, + NVME_INLINE_PROT_SG_CNT); + if (unlikely(ret)) { + ret = -ENOMEM; + goto out_unmap_sg; + } + + req->pi_sgl->nents = blk_rq_map_integrity_sg(rq->q, rq->bio, + req->pi_sgl->sg_table.sgl); + pi_count = ib_dma_map_sg(ibdev, req->pi_sgl->sg_table.sgl, + req->pi_sgl->nents, rq_dma_dir(rq)); + if (unlikely(pi_count <= 0)) { + ret = -EIO; + goto out_free_pi_table; + } + } + + if (count <= dev->num_inline_segments && !req->use_pi) { if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && queue->ctrl->use_inline_data && blk_rq_payload_bytes(rq) <= @@ -1310,13 +1524,21 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, } } - ret = nvme_rdma_map_sg_fr(queue, req, c, count); + ret = nvme_rdma_map_sg_fr(queue, req, c, count, pi_count); out: if (unlikely(ret)) - goto out_unmap_sg; + goto out_unmap_pi_sg; return 0; +out_unmap_pi_sg: + if (blk_integrity_rq(rq)) + ib_dma_unmap_sg(ibdev, req->pi_sgl->sg_table.sgl, + req->pi_sgl->nents, rq_dma_dir(rq)); +out_free_pi_table: + if (blk_integrity_rq(rq)) + sg_free_table_chained(&req->pi_sgl->sg_table, + NVME_INLINE_PROT_SG_CNT); out_unmap_sg: ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, rq_dma_dir(rq)); @@ -1769,6 +1991,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); + req->use_pi = queue->pi_support && nvme_req(rq)->has_pi; + err = nvme_rdma_map_data(queue, rq, c); if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1809,12 +2033,50 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) return ib_process_cq_direct(queue->ib_cq, -1); } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req) +{ + struct request *rq = blk_mq_rq_from_pdu(req); + struct ib_mr_status mr_status; + int ret; + + ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + nvme_req(rq)->status = NVME_SC_INVALID_PI; + return; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + nvme_req(rq)->status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + nvme_req(rq)->status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + nvme_req(rq)->status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, mr_status.sig_err.expected, + mr_status.sig_err.actual); + } +} +#endif + static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct ib_device *ibdev = queue->device->dev; +#ifdef CONFIG_BLK_DEV_INTEGRITY + if (req->use_pi) + nvme_rdma_check_pi_status(req); +#endif + nvme_rdma_unmap_data(queue, rq); ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1934,7 +2196,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, - .flags = NVME_F_FABRICS, + .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, @@ -2078,7 +2340,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | - NVMF_OPT_TOS, + NVMF_OPT_TOS | NVMF_OPT_PI_ENABLE, .create_ctrl = nvme_rdma_create_ctrl, }; From patchwork Mon Feb 24 16:45:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401025 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 5376314E3 for ; Mon, 24 Feb 2020 16:45:59 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 377BF20836 for ; Mon, 24 Feb 2020 16:45:59 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727426AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46547 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727689AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9V013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 09/19] nvmet: Prepare metadata request Date: Mon, 24 Feb 2020 18:45:34 +0200 Message-Id: <20200224164544.219438-11-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Allocate the metadata SGL buffers and add metadata fields for the request. This is a preparation for adding metadata support over the fabrics. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- drivers/nvme/target/core.c | 48 ++++++++++++++++++++++++++++++++++++++------- drivers/nvme/target/nvmet.h | 5 +++++ 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b685f99d..192b9ef 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -873,13 +873,17 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sq = sq; req->ops = ops; req->sg = NULL; + req->prot_sg = NULL; req->sg_cnt = 0; + req->prot_sg_cnt = 0; req->transfer_len = 0; + req->prot_len = 0; req->cqe->status = 0; req->cqe->sq_head = 0; req->ns = NULL; req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; + req->use_pi = false; trace_nvmet_req_init(req, req->cmd); @@ -962,6 +966,7 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) int nvmet_req_alloc_sgl(struct nvmet_req *req) { struct pci_dev *p2p_dev = NULL; + int data_len = req->transfer_len - req->prot_len; if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { if (req->sq->ctrl && req->ns) @@ -971,11 +976,23 @@ int nvmet_req_alloc_sgl(struct nvmet_req *req) req->p2p_dev = NULL; if (req->sq->qid && p2p_dev) { req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, - req->transfer_len); - if (req->sg) { - req->p2p_dev = p2p_dev; - return 0; + data_len); + if (!req->sg) + goto fallback; + + if (req->prot_len) { + req->prot_sg = + pci_p2pmem_alloc_sgl(p2p_dev, + &req->prot_sg_cnt, + req->prot_len); + if (!req->prot_sg) { + pci_p2pmem_free_sgl(req->p2p_dev, + req->sg); + goto fallback; + } } + req->p2p_dev = p2p_dev; + return 0; } /* @@ -984,23 +1001,40 @@ int nvmet_req_alloc_sgl(struct nvmet_req *req) */ } - req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); +fallback: + req->sg = sgl_alloc(data_len, GFP_KERNEL, &req->sg_cnt); if (unlikely(!req->sg)) return -ENOMEM; + if (req->prot_len) { + req->prot_sg = sgl_alloc(req->prot_len, GFP_KERNEL, + &req->prot_sg_cnt); + if (unlikely(!req->prot_sg)) { + sgl_free(req->sg); + return -ENOMEM; + } + } + return 0; } EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); void nvmet_req_free_sgl(struct nvmet_req *req) { - if (req->p2p_dev) + if (req->p2p_dev) { pci_p2pmem_free_sgl(req->p2p_dev, req->sg); - else + if (req->prot_sg) + pci_p2pmem_free_sgl(req->p2p_dev, req->prot_sg); + } else { sgl_free(req->sg); + if (req->prot_sg) + sgl_free(req->prot_sg); + } req->sg = NULL; + req->prot_sg = NULL; req->sg_cnt = 0; + req->prot_sg_cnt = 0; } EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 42ba2dd..a0f29e6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -301,6 +301,7 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; + struct scatterlist *prot_sg; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; union { struct { @@ -314,8 +315,10 @@ struct nvmet_req { } f; }; int sg_cnt; + int prot_sg_cnt; /* data length as parsed from the SGL descriptor: */ size_t transfer_len; + size_t prot_len; struct nvmet_port *port; @@ -326,6 +329,8 @@ struct nvmet_req { struct device *p2p_client; u16 error_loc; u64 error_slba; + /* Metadata (T10-PI) support */ + bool use_pi; }; extern struct workqueue_struct *buffered_io_wq; From patchwork Mon Feb 24 16:45:35 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11400995 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CA5E4159A for ; Mon, 24 Feb 2020 16:45:53 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id A6BAB20836 for ; Mon, 24 Feb 2020 16:45:53 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727789AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59488 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727895AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9W013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 10/19] nvmet: Add metadata characteristics for a namespace Date: Mon, 24 Feb 2020 18:45:35 +0200 Message-Id: <20200224164544.219438-12-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Fill those namespace fields from the block device format for adding metadata (T10-PI) over fabric support with block devices. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Martin K. Petersen --- drivers/nvme/target/io-cmd-bdev.c | 22 ++++++++++++++++++++++ drivers/nvme/target/nvmet.h | 3 +++ 2 files changed, 25 insertions(+) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index ea0e596..97131e2 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -50,6 +50,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) int nvmet_bdev_ns_enable(struct nvmet_ns *ns) { int ret; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct blk_integrity *bi; +#endif ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE, NULL); @@ -64,6 +67,25 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) } ns->size = i_size_read(ns->bdev->bd_inode); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + + ns->prot_type = 0; + ns->ms = 0; +#ifdef CONFIG_BLK_DEV_INTEGRITY + bi = bdev_get_integrity(ns->bdev); + if (bi) { + ns->ms = bi->tuple_size; + if (bi->profile == &t10_pi_type1_crc) + ns->prot_type = NVME_NS_DPS_PI_TYPE1; + else if (bi->profile == &t10_pi_type3_crc) + ns->prot_type = NVME_NS_DPS_PI_TYPE3; + else + /* Unsupported metadata type */ + ns->ms = 0; + } + + pr_debug("ms %d pi_type %d\n", ns->ms, ns->prot_type); +#endif + return 0; } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index a0f29e6..562819c 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -19,6 +19,7 @@ #include #include #include +#include #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 @@ -77,6 +78,8 @@ struct nvmet_ns { int use_p2pmem; struct pci_dev *p2p_dev; + int prot_type; + int ms; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) From patchwork Mon Feb 24 16:45:36 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401019 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3318C13A4 for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 1D2AA2084E for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727438AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46550 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727730AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9X013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 11/19] nvmet: Rename nvmet_rw_len to nvmet_rw_data_len Date: Mon, 24 Feb 2020 18:45:36 +0200 Message-Id: <20200224164544.219438-13-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin The function doesn't add the metadata length (only data length is calculated). This is preparation for adding metadata (T10-PI) support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen --- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/nvme/target/io-cmd-file.c | 2 +- drivers/nvme/target/nvmet.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 97131e2..c0337d0 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -173,7 +173,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector_t sector; int op, i; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index cd5670b..ffbba88 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -232,7 +232,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 562819c..846d1de 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -506,7 +506,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); -static inline u32 nvmet_rw_len(struct nvmet_req *req) +static inline u32 nvmet_rw_data_len(struct nvmet_req *req) { return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << req->ns->blksize_shift; From patchwork Mon Feb 24 16:45:37 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11400999 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6A61A13A4 for ; Mon, 24 Feb 2020 16:45:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 544AB20880 for ; Mon, 24 Feb 2020 16:45:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727896AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46571 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727736AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9Y013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 12/19] nvmet: Rename nvmet_check_data_len to nvmet_check_transfer_len Date: Mon, 24 Feb 2020 18:45:37 +0200 Message-Id: <20200224164544.219438-14-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin The function doesn't check only the data length, because the transfer length includes also the metadata length in some cases. This is preparation for adding metadata (T10-PI) support. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen --- drivers/nvme/target/admin-cmd.c | 14 +++++++------- drivers/nvme/target/core.c | 6 +++--- drivers/nvme/target/discovery.c | 8 ++++---- drivers/nvme/target/fabrics-cmd.c | 8 ++++---- drivers/nvme/target/io-cmd-bdev.c | 6 +++--- drivers/nvme/target/io-cmd-file.c | 6 +++--- drivers/nvme/target/nvmet.h | 2 +- 7 files changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 19f9495..d446813 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -294,7 +294,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) static void nvmet_execute_get_log_page(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd))) + if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd))) return; switch (req->cmd->get_log_page.lid) { @@ -620,7 +620,7 @@ static void nvmet_execute_identify_desclist(struct nvmet_req *req) static void nvmet_execute_identify(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; switch (req->cmd->identify.cns) { @@ -649,7 +649,7 @@ static void nvmet_execute_identify(struct nvmet_req *req) */ static void nvmet_execute_abort(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; nvmet_set_result(req, 1); nvmet_req_complete(req, 0); @@ -735,7 +735,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -801,7 +801,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; - if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10))) + if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10))) return; switch (cdw10 & 0xff) { @@ -868,7 +868,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; mutex_lock(&ctrl->lock); @@ -887,7 +887,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; pr_debug("ctrl %d update keep-alive timer for %d secs\n", diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 192b9ef..ce29db3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -940,9 +940,9 @@ void nvmet_req_uninit(struct nvmet_req *req) } EXPORT_SYMBOL_GPL(nvmet_req_uninit); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) { - if (unlikely(data_len != req->transfer_len)) { + if (unlikely(len != req->transfer_len)) { req->error_loc = offsetof(struct nvme_common_command, dptr); nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); return false; @@ -950,7 +950,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) return true; } -EXPORT_SYMBOL_GPL(nvmet_check_data_len); +EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) { diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 0c2274b..40cf0b6 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) u16 status = 0; void *buffer; - if (!nvmet_check_data_len(req, data_len)) + if (!nvmet_check_transfer_len(req, data_len)) return; if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { @@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) const char model[] = "Linux"; u16 status = 0; - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { @@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index feef15c..52a6f70 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req) u64 val = le64_to_cpu(req->cmd->prop_set.value); u16 status = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_set.attrib & 1) { @@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) u16 status = 0; u64 val = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_get.attrib & 1) { @@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); @@ -223,7 +223,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c0337d0..6e74fdf 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -173,7 +173,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector_t sector; int op, i; - if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt) { @@ -234,7 +234,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); @@ -326,7 +326,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) sector_t nr_sector; int ret; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; sector = le64_to_cpu(write_zeroes->slba) << diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index ffbba88..6b098df 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -232,7 +232,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { @@ -276,7 +276,7 @@ static void nvmet_file_flush_work(struct work_struct *w) static void nvmet_file_execute_flush(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); schedule_work(&req->f.work); @@ -366,7 +366,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 846d1de..d2273c7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -391,7 +391,7 @@ static inline bool nvmet_aen_bit_disabled(struct nvmet_ctrl *ctrl, u32 bn) bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); From patchwork Mon Feb 24 16:45:38 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401001 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E797314E3 for ; Mon, 24 Feb 2020 16:45:54 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id D154A20836 for ; Mon, 24 Feb 2020 16:45:54 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727802AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59507 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727896AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:46 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9Z013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 13/19] nvme: Add Metadata Capabilities enumerations Date: Mon, 24 Feb 2020 18:45:38 +0200 Message-Id: <20200224164544.219438-15-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin The enumerations will be used to expose the namespace metadata format by the target. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 3d5189f..4e968d3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -396,6 +396,8 @@ enum { NVME_NS_FEAT_THIN = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, NVME_NS_FLBAS_META_EXT = 0x10, + NVME_NS_MC_META_EXT = 1 << 0, + NVME_NS_MC_META_BUF = 1 << 1, NVME_LBAF_RP_BEST = 0, NVME_LBAF_RP_BETTER = 1, NVME_LBAF_RP_GOOD = 2, From patchwork Mon Feb 24 16:45:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401015 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 70C5C13A4 for ; Mon, 24 Feb 2020 16:45:57 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 59CF320836 for ; Mon, 24 Feb 2020 16:45:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727501AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46575 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727750AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:47 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9a013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 14/19] nvmet: Add mdts setting op for controllers Date: Mon, 24 Feb 2020 18:45:39 +0200 Message-Id: <20200224164544.219438-16-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Some transports, such as RDMA, would like to set the Maximum Data Transfer Size (MDTS) according to device/port/ctrl characteristics. This will enable the transport to set the optimal MDTS according to controller needs and device capabilities. Add a new nvmet transport op that is called during ctrl identification. This will not effect transports that don't set this option. Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin --- drivers/nvme/target/admin-cmd.c | 8 ++++++-- drivers/nvme/target/nvmet.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index d446813..c4ad0dd 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -369,8 +369,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* we support multiple ports, multiples hosts and ANA: */ id->cmic = (1 << 0) | (1 << 1) | (1 << 3); - /* no limit on data transfer sizes for now */ - id->mdts = 0; + /* Limit MDTS according to transport capability */ + if (ctrl->ops->set_mdts) + id->mdts = ctrl->ops->set_mdts(ctrl); + else + id->mdts = 0; + id->cntlid = cpu_to_le16(ctrl->cntlid); id->ver = cpu_to_le32(ctrl->subsys->ver); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d2273c7..ff34d7a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -292,6 +292,7 @@ struct nvmet_fabrics_ops { struct nvmet_port *port, char *traddr); u16 (*install_queue)(struct nvmet_sq *nvme_sq); void (*discovery_chg)(struct nvmet_port *port); + u8 (*set_mdts)(struct nvmet_ctrl *ctrl); }; #define NVMET_MAX_INLINE_BIOVEC 8 From patchwork Mon Feb 24 16:45:40 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401009 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 33CAB159A for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 1D02320637 for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727636AbgBXQpz (ORCPT ); Mon, 24 Feb 2020 11:45:55 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59509 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727901AbgBXQpx (ORCPT ); Mon, 24 Feb 2020 11:45:53 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:47 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9b013647; Mon, 24 Feb 2020 18:45:46 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 15/19] nvmet: Add metadata/T10-PI support Date: Mon, 24 Feb 2020 18:45:40 +0200 Message-Id: <20200224164544.219438-17-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Expose the namespace metadata format when PI is enabled. The user needs to enable the capability per subsystem and per port. The other metadata properties are taken from the namespace/bdev. Usage example: echo 1 > /config/nvmet/subsystems/${NAME}/attr_pi_enable echo 1 > /config/nvmet/ports/${PORT_NUM}/param_pi_enable Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- drivers/nvme/target/admin-cmd.c | 19 ++++++++++-- drivers/nvme/target/configfs.c | 61 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/fabrics-cmd.c | 11 +++++++ drivers/nvme/target/nvmet.h | 26 +++++++++++++++++ 4 files changed, 115 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index c4ad0dd..ebb6873 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -432,9 +432,13 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); - /* Max command capsule size is sqe + single page of in-capsule data */ + /* + * Max command capsule size is sqe + single page of in-capsule data. + * Disable inline data for Metadata capable controllers. + */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + - req->port->inline_data_size) / 16); + req->port->inline_data_size * + !ctrl->pi_support) / 16); /* Max response capsule size is cqe */ id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); @@ -464,6 +468,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) static void nvmet_execute_identify_ns(struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ns *ns; struct nvme_id_ns *id; u16 status = 0; @@ -520,6 +525,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->lbaf[0].ds = ns->blksize_shift; + if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { + id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | + NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | + NVME_NS_DPC_PI_TYPE3; + id->mc = NVME_NS_MC_META_EXT; + id->dps = ns->prot_type; + id->flbas = NVME_NS_FLBAS_META_EXT; + id->lbaf[0].ms = ns->ms; + } + if (ns->readonly) id->nsattr |= (1 << 0); nvmet_put_namespace(ns); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 7aa1078..19bf240 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -248,6 +248,36 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_inline_data_size); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_param_pi_enable_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable); +} + +static ssize_t nvmet_param_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (port->enabled) { + pr_err("Disable port before setting pi_enable value.\n"); + return -EACCES; + } + + port->pi_enable = val; + return count; +} + +CONFIGFS_ATTR(nvmet_, param_pi_enable); +#endif + static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { @@ -987,6 +1017,31 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support); +} + +static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + bool pi_enable; + + if (strtobool(page, &pi_enable)) + return -EINVAL; + + down_write(&nvmet_config_sem); + subsys->pi_support = pi_enable; + up_write(&nvmet_config_sem); + + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); +#endif + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, @@ -994,6 +1049,9 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_subsys_attr_attr_pi_enable, +#endif NULL, }; @@ -1289,6 +1347,9 @@ static void nvmet_port_release(struct config_item *item) &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, &nvmet_attr_param_inline_data_size, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_attr_param_pi_enable, +#endif NULL, }; diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 52a6f70..799de18 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -197,6 +197,17 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + if (ctrl->subsys->pi_support && ctrl->port->pi_enable) { + if (ctrl->port->pi_capable) { + ctrl->pi_support = true; + pr_info("controller %d T10-PI enabled\n", ctrl->cntlid); + } else { + ctrl->pi_support = false; + pr_warn("T10-PI is not supported on controller %d\n", + ctrl->cntlid); + } + } + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index ff34d7a..ef231fd 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -145,6 +145,8 @@ struct nvmet_port { bool enabled; int inline_data_size; const struct nvmet_fabrics_ops *tr_ops; + bool pi_capable; + bool pi_enable; }; static inline struct nvmet_port *to_nvmet_port(struct config_item *item) @@ -204,6 +206,7 @@ struct nvmet_ctrl { spinlock_t error_lock; u64 err_counter; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; + bool pi_support; }; struct nvmet_subsys_model { @@ -233,6 +236,7 @@ struct nvmet_subsys { u64 ver; u64 serial; char *subsysnqn; + bool pi_support; struct config_group group; @@ -513,6 +517,28 @@ static inline u32 nvmet_rw_data_len(struct nvmet_req *req) req->ns->blksize_shift; } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static inline u32 nvmet_rw_prot_len(struct nvmet_req *req) +{ + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) * req->ns->ms; +} + +static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) +{ + return ns->prot_type && ns->ms == sizeof(struct t10_pi_tuple); +} +#else +static inline u32 nvmet_rw_prot_len(struct nvmet_req *req) +{ + return 0; +} + +static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) +{ + return false; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + static inline u32 nvmet_dsm_len(struct nvmet_req *req) { return (le32_to_cpu(req->cmd->dsm.nr) + 1) * From patchwork Mon Feb 24 16:45:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401013 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B506D13A4 for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 9E46E20836 for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727730AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46593 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727789AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:47 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9c013647; Mon, 24 Feb 2020 18:45:47 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 16/19] nvmet: Add metadata support for block devices Date: Mon, 24 Feb 2020 18:45:41 +0200 Message-Id: <20200224164544.219438-18-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin Create a block IO request for the metadata from the protection SG list. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- drivers/nvme/target/io-cmd-bdev.c | 87 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 6e74fdf..735ad62 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -164,6 +164,61 @@ static void nvmet_bio_done(struct bio *bio) bio_put(bio); } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + struct blk_integrity *bi; + struct bio_integrity_payload *bip; + struct block_device *bdev = req->ns->bdev; + int rc; + size_t resid, len; + + bi = bdev_get_integrity(bdev); + if (unlikely(!bi)) { + pr_err("Unable to locate bio_integrity\n"); + return -ENODEV; + } + + bip = bio_integrity_alloc(bio, GFP_NOIO, + min_t(unsigned int, req->prot_sg_cnt, BIO_MAX_PAGES)); + if (IS_ERR(bip)) { + pr_err("Unable to allocate bio_integrity_payload\n"); + return PTR_ERR(bip); + } + + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); + /* virtual start sector must be in integrity interval units */ + bip_set_seed(bip, bio->bi_iter.bi_sector >> + (bi->interval_exp - SECTOR_SHIFT)); + + resid = bip->bip_iter.bi_size; + while (resid > 0 && sg_miter_next(miter)) { + len = min_t(size_t, miter->length, resid); + rc = bio_integrity_add_page(bio, miter->page, len, + offset_in_page(miter->addr)); + if (unlikely(rc != len)) { + pr_err("bio_integrity_add_page() failed; %d\n", rc); + sg_miter_stop(miter); + return -ENOMEM; + } + + resid -= len; + if (len < miter->length) + miter->consumed -= miter->length - len; + } + sg_miter_stop(miter); + + return 0; +} +#else +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + return 0; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; @@ -171,9 +226,11 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) struct scatterlist *sg; struct blk_plug plug; sector_t sector; - int op, i; + int op, i, rc; + struct sg_mapping_iter prot_miter; - if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) + if (!nvmet_check_transfer_len(req, + nvmet_rw_data_len(req) + req->prot_len)) return; if (!req->sg_cnt) { @@ -208,11 +265,25 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio->bi_opf = op; blk_start_plug(&plug); + if (req->use_pi) + sg_miter_start(&prot_miter, req->prot_sg, req->prot_sg_cnt, + op == REQ_OP_READ ? SG_MITER_FROM_SG : + SG_MITER_TO_SG); + for_each_sg(req->sg, sg, req->sg_cnt, i) { while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { struct bio *prev = bio; + if (req->use_pi) { + rc = nvmet_bdev_alloc_bip(req, bio, + &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; @@ -226,6 +297,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sg_cnt--; } + if (req->use_pi) { + rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + submit_bio(bio); blk_finish_plug(&plug); } @@ -353,6 +432,10 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_read: case nvme_cmd_write: req->execute = nvmet_bdev_execute_rw; + if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) { + req->use_pi = true; + req->prot_len = nvmet_rw_prot_len(req); + } return 0; case nvme_cmd_flush: req->execute = nvmet_bdev_execute_flush; From patchwork Mon Feb 24 16:45:42 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401003 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 27FEE13A4 for ; Mon, 24 Feb 2020 16:45:55 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 06A2E20836 for ; Mon, 24 Feb 2020 16:45:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727935AbgBXQpy (ORCPT ); Mon, 24 Feb 2020 11:45:54 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59539 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727895AbgBXQpy (ORCPT ); Mon, 24 Feb 2020 11:45:54 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:47 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9d013647; Mon, 24 Feb 2020 18:45:47 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 17/19] RDMA/rw: Expose maximal page list for a device per 1 MR Date: Mon, 24 Feb 2020 18:45:42 +0200 Message-Id: <20200224164544.219438-19-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org ULP's will use this information to determine the maximal data transfer size per IO operation. Signed-off-by: Max Gurtovoy --- drivers/infiniband/core/rw.c | 14 ++++++++++++-- include/rdma/rw.h | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index 4fad732..edc9bee 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -56,8 +56,17 @@ static inline bool rdma_rw_io_needs_mr(struct ib_device *dev, u8 port_num, return false; } -static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev, - bool pi_support) +/** + * rdma_rw_fr_page_list_len - return the max number of pages mapped by 1 MR + * @dev: RDMA device that will eventually create a PD for needed MRs + * @pi_support: Whether MRs will be created for protection information offload + * + * Returns the number of pages that one MR can map for RDMA operation by the + * given device. One can determine the maximal data size according to the + * result of this function, or chose using multiple MRs for the RDMA operation + * as well. + */ +u32 rdma_rw_fr_page_list_len(struct ib_device *dev, bool pi_support) { u32 max_pages; @@ -69,6 +78,7 @@ static inline u32 rdma_rw_fr_page_list_len(struct ib_device *dev, /* arbitrary limit to avoid allocating gigantic resources */ return min_t(u32, max_pages, 256); } +EXPORT_SYMBOL(rdma_rw_fr_page_list_len); static inline int rdma_rw_inv_key(struct rdma_rw_reg_ctx *reg) { diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 6ad9dc8..a9bbda7 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -69,5 +69,6 @@ unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); void rdma_rw_cleanup_mrs(struct ib_qp *qp); +u32 rdma_rw_fr_page_list_len(struct ib_device *dev, bool pi_support); #endif /* _RDMA_RW_H */ From patchwork Mon Feb 24 16:45:43 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401007 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1380E13A4 for ; Mon, 24 Feb 2020 16:45:56 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id F142020836 for ; Mon, 24 Feb 2020 16:45:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727750AbgBXQpw (ORCPT ); Mon, 24 Feb 2020 11:45:52 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:46596 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727775AbgBXQpv (ORCPT ); Mon, 24 Feb 2020 11:45:51 -0500 Received: from Internal Mail-Server by MTLPINE1 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:47 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9e013647; Mon, 24 Feb 2020 18:45:47 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 18/19] nvmet-rdma: Implement set_mdts controller op Date: Mon, 24 Feb 2020 18:45:43 +0200 Message-Id: <20200224164544.219438-20-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org Set the maximal data transfer size according to device capabilities. For example, for T10-DIF offload by supporting RDMA HCA, one uses RDMA/rw API that limits the IO operation to use a single MR with 256 pages at the most. Limit the mdts according to RDMA/rw API and even decrease it in order to avoid multiple splits by the local block layer for large IOs to ease on the CPU on the target side. Signed-off-by: Max Gurtovoy --- drivers/nvme/target/rdma.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 37d262a..2227adf 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -30,6 +30,7 @@ #define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE #define NVMET_RDMA_MAX_INLINE_SGE 4 #define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) +#define NVMET_RDMA_T10_PI_MDTS 5 struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; @@ -1602,6 +1603,21 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, } } +static u8 nvmet_rdma_set_mdts(struct nvmet_ctrl *ctrl) +{ + struct nvmet_port *port = ctrl->port; + struct rdma_cm_id *cm_id = port->priv; + u32 max_pages; + + if (ctrl->pi_support) { + max_pages = rdma_rw_fr_page_list_len(cm_id->device, true); + /* Assume mpsmin == device_page_size == 4KB */ + return min(ilog2(max_pages), NVMET_RDMA_T10_PI_MDTS); + } + + return 0; +} + static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, @@ -1612,6 +1628,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, .queue_response = nvmet_rdma_queue_response, .delete_ctrl = nvmet_rdma_delete_ctrl, .disc_traddr = nvmet_rdma_disc_port_addr, + .set_mdts = nvmet_rdma_set_mdts, }; static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) From patchwork Mon Feb 24 16:45:44 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Max Gurtovoy X-Patchwork-Id: 11401023 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C67DB1871 for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id A6A9320836 for ; Mon, 24 Feb 2020 16:45:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727825AbgBXQp5 (ORCPT ); Mon, 24 Feb 2020 11:45:57 -0500 Received: from mail-il-dmz.mellanox.com ([193.47.165.129]:59551 "EHLO mellanox.co.il" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727877AbgBXQp4 (ORCPT ); Mon, 24 Feb 2020 11:45:56 -0500 Received: from Internal Mail-Server by MTLPINE2 (envelope-from maxg@mellanox.com) with ESMTPS (AES256-SHA encrypted); 24 Feb 2020 18:45:47 +0200 Received: from mtr-vdi-031.wap.labs.mlnx. (mtr-vdi-031.wap.labs.mlnx [10.209.102.136]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 01OGji9f013647; Mon, 24 Feb 2020 18:45:47 +0200 From: Max Gurtovoy To: linux-nvme@lists.infradead.org, sagi@grimberg.me, linux-rdma@vger.kernel.org, kbusch@kernel.org, hch@lst.de, martin.petersen@oracle.com Cc: vladimirk@mellanox.com, idanb@mellanox.com, maxg@mellanox.com, israelr@mellanox.com, axboe@kernel.dk, shlomin@mellanox.com Subject: [PATCH 19/19] nvmet-rdma: Add metadata/T10-PI support Date: Mon, 24 Feb 2020 18:45:44 +0200 Message-Id: <20200224164544.219438-21-maxg@mellanox.com> X-Mailer: git-send-email 2.21.0 In-Reply-To: <20200224164544.219438-1-maxg@mellanox.com> References: <20200224164544.219438-1-maxg@mellanox.com> MIME-Version: 1.0 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Israel Rukshin For capable HCAs (e.g. ConnectX-4/ConnectX-5) this will allow end-to-end protection information passthrough and validation for NVMe over RDMA transport. Metadata support was implemented over the new RDMA signature verbs API. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy --- drivers/nvme/target/rdma.c | 235 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 217 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2227adf..006f613 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -55,6 +55,7 @@ struct nvmet_rdma_rsp { struct nvmet_rdma_queue *queue; struct ib_cqe read_cqe; + struct ib_cqe write_cqe; struct rdma_rw_ctx rw; struct nvmet_req req; @@ -130,6 +131,9 @@ struct nvmet_rdma_device { static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); +#endif static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, @@ -387,6 +391,10 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, /* Data In / RDMA READ */ r->read_cqe.done = nvmet_rdma_read_data_done; +#ifdef CONFIG_BLK_DEV_INTEGRITY + /* Data Out / RDMA WRITE */ + r->write_cqe.done = nvmet_rdma_write_data_done; +#endif return 0; out_free_rsp: @@ -496,6 +504,138 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) spin_unlock(&queue->rsp_wr_wait_lock); } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) +{ + struct ib_mr_status mr_status; + int ret; + u16 status = 0; + + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + return NVME_SC_INVALID_PI; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + } + + return status; +} + +static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + + /* + * At the moment we hard code those, but in the future + * we will take them from cmd. + */ + domain->sig.dif.apptag_check_mask = 0xffff; + domain->sig.dif.app_escape = true; + domain->sig.dif.ref_escape = true; + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; +} + +static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, + struct ib_sig_attrs *sig_attrs) +{ + struct nvme_command *cmd = req->cmd; + struct blk_integrity *bi = bdev_get_integrity(req->ns->bdev); + u16 control = le16_to_cpu(cmd->rw.control); + + WARN_ON(bi == NULL); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no wire domain */ + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + /* PI is added by the HW */ + req->transfer_len += req->prot_len; + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control); + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control); + } + + if (control & NVME_RW_PRINFO_PRCHK_REF) + sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; + if (control & NVME_RW_PRINFO_PRCHK_GUARD) + sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; + if (control & NVME_RW_PRINFO_PRCHK_APP) + sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; +} +#else +static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) +{ + return 0; +} + +static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, + struct ib_sig_attrs *sig_attrs) +{ +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + +static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, + struct ib_sig_attrs *sig_attrs) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + int ret; + + if (req->use_pi) + ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, req->prot_sg, + req->prot_sg_cnt, sig_attrs, addr, key, + nvmet_data_dir(req)); + else + ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, 0, addr, key, + nvmet_data_dir(req)); + + return ret; +} + +static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + + if (req->use_pi) + rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, req->prot_sg, + req->prot_sg_cnt, nvmet_data_dir(req)); + else + rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, nvmet_data_dir(req)); +} static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) { @@ -503,11 +643,8 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); - if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); - } + if (rsp->n_rdma) + nvmet_rdma_rw_ctx_destroy(rsp); if (rsp->req.sg != rsp->cmd->inline_sg) nvmet_req_free_sgl(&rsp->req); @@ -562,11 +699,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_wr.opcode = IB_WR_SEND; } - if (nvmet_rdma_need_data_out(rsp)) - first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, - cm_id->port_num, NULL, &rsp->send_wr); - else + if (nvmet_rdma_need_data_out(rsp)) { + if (rsp->req.use_pi) + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, &rsp->write_cqe, NULL); + else + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, NULL, &rsp->send_wr); + } else { first_wr = &rsp->send_wr; + } nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); @@ -585,15 +727,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); struct nvmet_rdma_queue *queue = cq->cq_context; + u16 status = 0; WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { @@ -604,8 +745,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) return; } - rsp->req.execute(&rsp->req); + if (rsp->req.use_pi) + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(status)) + nvmet_req_complete(&rsp->req, status); + else + rsp->req.execute(&rsp->req); +} + +#ifdef CONFIG_BLK_DEV_INTEGRITY +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct nvmet_rdma_rsp *rsp = + container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + u16 status; + + WARN_ON(rsp->n_rdma <= 0); + atomic_add(rsp->n_rdma, &queue->sq_wr_avail); + rsp->n_rdma = 0; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); + nvmet_req_uninit(&rsp->req); + nvmet_rdma_release_rsp(rsp); + if (wc->status != IB_WC_WR_FLUSH_ERR) { + pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", + wc->wr_cqe, ib_wc_status_msg(wc->status), + wc->status); + nvmet_rdma_error_comp(queue); + } + return; + } + + /* + * Upon RDMA completion check the signature status + * - if succeeded send good NVMe response + * - if failed send bad NVMe response with appropriate error + */ + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + if (unlikely(status)) + rsp->req.cqe->status = cpu_to_le16(status << 1); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { + pr_err("sending cmd response failed\n"); + nvmet_rdma_release_rsp(rsp); + } } +#endif /* CONFIG_BLK_DEV_INTEGRITY */ static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, u64 off) @@ -661,9 +852,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, struct nvme_keyed_sgl_desc *sgl, bool invalidate) { - struct rdma_cm_id *cm_id = rsp->queue->cm_id; u64 addr = le64_to_cpu(sgl->addr); u32 key = get_unaligned_le32(sgl->key); + struct ib_sig_attrs sig_attrs; int ret; rsp->req.transfer_len = get_unaligned_le24(sgl->length); @@ -672,13 +863,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!rsp->req.transfer_len) return 0; + if (rsp->req.use_pi) + nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); + ret = nvmet_req_alloc_sgl(&rsp->req); if (unlikely(ret < 0)) goto error_out; - ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, - rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, - nvmet_data_dir(&rsp->req)); + ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); if (unlikely(ret < 0)) goto error_out; rsp->n_rdma += ret; @@ -957,6 +1149,9 @@ static void nvmet_rdma_free_dev(struct kref *ref) goto out_free_pd; } + port->pi_capable = ndev->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER ? true : false; + list_add(&ndev->entry, &device_list); out_unlock: mutex_unlock(&device_list_mutex); @@ -1021,6 +1216,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; } + if (queue->port->pi_capable && queue->port->pi_enable && + queue->host_qid) + qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; + ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); if (ret) { pr_err("failed to create_qp ret= %d\n", ret); @@ -1165,6 +1364,7 @@ static int nvmet_rdma_cm_reject(struct rdma_cm_id *cm_id, INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); queue->dev = ndev; queue->cm_id = cm_id; + queue->port = cm_id->context; spin_lock_init(&queue->state_lock); queue->state = NVMET_RDMA_Q_CONNECTING; @@ -1283,7 +1483,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = cm_id->context; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */