diff mbox series

[RFC,2/3] nvme: add copy offload support

Message ID alpine.LRH.2.02.2202011332330.22481@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State Not Applicable
Headers show
Series [RFC,1/3] block: add copy offload support | expand

Commit Message

Mikulas Patocka Feb. 1, 2022, 6:33 p.m. UTC
This patch adds copy offload support to the nvme host driver.

The function nvme_setup_read_token stores namespace and location in the
token and the function nvme_setup_write_token retrieves information from
the token and submits the copy command to the device.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/nvme/host/core.c   |   94 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvme/host/fc.c     |    5 ++
 drivers/nvme/host/nvme.h   |    1 
 drivers/nvme/host/pci.c    |    5 ++
 drivers/nvme/host/rdma.c   |    5 ++
 drivers/nvme/host/tcp.c    |    5 ++
 drivers/nvme/target/loop.c |    5 ++
 include/linux/nvme.h       |   33 +++++++++++++++
 8 files changed, 153 insertions(+)

Comments

Bart Van Assche Feb. 1, 2022, 7:18 p.m. UTC | #1
On 2/1/22 10:33, Mikulas Patocka wrote:
> +static inline blk_status_t nvme_setup_read_token(struct nvme_ns *ns, struct request *req)
> +{
> +	struct bio *bio = req->bio;
> +	struct nvme_copy_token *token = page_to_virt(bio->bi_io_vec[0].bv_page) + bio->bi_io_vec[0].bv_offset;

Hmm ... shouldn't this function use bvec_kmap_local() instead of 
page_to_virt()?

Thanks,

Bart.
Mikulas Patocka Feb. 1, 2022, 7:25 p.m. UTC | #2
On Tue, 1 Feb 2022, Bart Van Assche wrote:

> On 2/1/22 10:33, Mikulas Patocka wrote:
> > +static inline blk_status_t nvme_setup_read_token(struct nvme_ns *ns, struct
> > request *req)
> > +{
> > +	struct bio *bio = req->bio;
> > +	struct nvme_copy_token *token =
> > page_to_virt(bio->bi_io_vec[0].bv_page) + bio->bi_io_vec[0].bv_offset;
> 
> Hmm ... shouldn't this function use bvec_kmap_local() instead of
> page_to_virt()?
> 
> Thanks,
> 
> Bart.

.bv_page is allocated only in blkdev_issue_copy with alloc_page. So, 
page_to_virt works.

But you are right that bvec_kmap_local may be nicer.

Mikulas
diff mbox series

Patch

Index: linux-2.6/drivers/nvme/host/core.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/core.c	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/host/core.c	2022-02-01 18:34:19.000000000 +0100
@@ -975,6 +975,85 @@  static inline blk_status_t nvme_setup_rw
 	return 0;
 }
 
+struct nvme_copy_token {
+	char subsys[4];
+	struct nvme_ns *ns;
+	u64 src_sector;
+	u64 sectors;
+};
+
+static inline blk_status_t nvme_setup_read_token(struct nvme_ns *ns, struct request *req)
+{
+	struct bio *bio = req->bio;
+	struct nvme_copy_token *token = page_to_virt(bio->bi_io_vec[0].bv_page) + bio->bi_io_vec[0].bv_offset;
+	memcpy(token->subsys, "nvme", 4);
+	token->ns = ns;
+	token->src_sector = bio->bi_iter.bi_sector;
+	token->sectors = bio->bi_iter.bi_size >> 9;
+	return 0;
+}
+
+static inline blk_status_t nvme_setup_write_token(struct nvme_ns *ns,
+		struct request *req, struct nvme_command *cmnd)
+{
+	sector_t src_sector, dst_sector, n_sectors;
+	u64 src_lba, dst_lba, n_lba;
+
+	unsigned n_descriptors, i;
+	struct nvme_copy_desc *descriptors;
+
+	struct bio *bio = req->bio;
+	struct nvme_copy_token *token = page_to_virt(bio->bi_io_vec[0].bv_page) + bio->bi_io_vec[0].bv_offset;
+	if (unlikely(memcmp(token->subsys, "nvme", 4)))
+		return BLK_STS_NOTSUPP;
+	if (unlikely(token->ns != ns))
+		return BLK_STS_NOTSUPP;
+
+	src_sector = token->src_sector;
+	dst_sector = bio->bi_iter.bi_sector;
+	n_sectors = token->sectors;
+	if (WARN_ON(n_sectors != bio->bi_iter.bi_size >> 9))
+		return BLK_STS_NOTSUPP;
+
+	src_lba = nvme_sect_to_lba(ns, src_sector);
+	dst_lba = nvme_sect_to_lba(ns, dst_sector);
+	n_lba = nvme_sect_to_lba(ns, n_sectors);
+
+	if (unlikely(nvme_lba_to_sect(ns, src_lba) != src_sector) ||
+	    unlikely(nvme_lba_to_sect(ns, dst_lba) != dst_sector) ||
+	    unlikely(nvme_lba_to_sect(ns, n_lba) != n_sectors))
+		return BLK_STS_NOTSUPP;
+
+	if (WARN_ON(!n_lba))
+		return BLK_STS_NOTSUPP;
+
+	n_descriptors = (n_lba + 0xffff) / 0x10000;
+	descriptors = kzalloc(n_descriptors * sizeof(struct nvme_copy_desc), GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(!descriptors))
+		return BLK_STS_RESOURCE;
+
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->copy.opcode = nvme_cmd_copy;
+	cmnd->copy.nsid = cpu_to_le32(ns->head->ns_id);
+	cmnd->copy.sdlba = cpu_to_le64(dst_lba);
+	cmnd->copy.length = n_descriptors - 1;
+
+	for (i = 0; i < n_descriptors; i++) {
+		u64 this_step = min(n_lba, (u64)0x10000);
+		descriptors[i].slba = cpu_to_le64(src_lba);
+		descriptors[i].length = cpu_to_le16(this_step - 1);
+		src_lba += this_step;
+		n_lba -= this_step;
+	}
+
+	req->special_vec.bv_page = virt_to_page(descriptors);
+	req->special_vec.bv_offset = offset_in_page(descriptors);
+	req->special_vec.bv_len = n_descriptors * sizeof(struct nvme_copy_desc);
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return 0;
+}
+
 void nvme_cleanup_cmd(struct request *req)
 {
 	if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
@@ -1032,6 +1111,12 @@  blk_status_t nvme_setup_cmd(struct nvme_
 	case REQ_OP_ZONE_APPEND:
 		ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
 		break;
+	case REQ_OP_COPY_READ_TOKEN:
+		ret = nvme_setup_read_token(ns, req);
+		break;
+	case REQ_OP_COPY_WRITE_TOKEN:
+		ret = nvme_setup_write_token(ns, req, cmd);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		return BLK_STS_IOERR;
@@ -1865,6 +1950,8 @@  static void nvme_update_disk_info(struct
 	blk_queue_max_write_zeroes_sectors(disk->queue,
 					   ns->ctrl->max_zeroes_sectors);
 
+	blk_queue_max_copy_sectors(disk->queue, ns->ctrl->max_copy_sectors);
+
 	set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
 		test_bit(NVME_NS_FORCE_RO, &ns->flags));
 }
@@ -2891,6 +2978,12 @@  static int nvme_init_non_mdts_limits(str
 	else
 		ctrl->max_zeroes_sectors = 0;
 
+	if (ctrl->oncs & NVME_CTRL_ONCS_COPY) {
+		ctrl->max_copy_sectors = 1U << 24;
+	} else {
+		ctrl->max_copy_sectors = 0;
+	}
+
 	if (nvme_ctrl_limited_cns(ctrl))
 		return 0;
 
@@ -4716,6 +4809,7 @@  static inline void _nvme_check_size(void
 {
 	BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_copy_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_identify) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
Index: linux-2.6/drivers/nvme/host/nvme.h
===================================================================
--- linux-2.6.orig/drivers/nvme/host/nvme.h	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/host/nvme.h	2022-02-01 18:34:19.000000000 +0100
@@ -277,6 +277,7 @@  struct nvme_ctrl {
 #ifdef CONFIG_BLK_DEV_ZONED
 	u32 max_zone_append;
 #endif
+	u32 max_copy_sectors;
 	u16 crdt[3];
 	u16 oncs;
 	u16 oacs;
Index: linux-2.6/include/linux/nvme.h
===================================================================
--- linux-2.6.orig/include/linux/nvme.h	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/include/linux/nvme.h	2022-02-01 18:34:19.000000000 +0100
@@ -335,6 +335,8 @@  enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_ONCS_RESERVATIONS		= 1 << 5,
 	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
+	NVME_CTRL_ONCS_VERIFY			= 1 << 7,
+	NVME_CTRL_ONCS_COPY			= 1 << 8,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
 	NVME_CTRL_OACS_DIRECTIVES		= 1 << 5,
@@ -704,6 +706,7 @@  enum nvme_opcode {
 	nvme_cmd_resv_report	= 0x0e,
 	nvme_cmd_resv_acquire	= 0x11,
 	nvme_cmd_resv_release	= 0x15,
+	nvme_cmd_copy		= 0x19,
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
 	nvme_cmd_zone_append	= 0x7d,
@@ -872,6 +875,35 @@  enum {
 	NVME_RW_DTYPE_STREAMS		= 1 << 4,
 };
 
+struct nvme_copy_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			sdlba;
+	__u8			length;
+	__u8			control2;
+	__le16			control;
+	__le32			dspec;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
+struct nvme_copy_desc {
+	__u64			rsvd;
+	__le64			slba;
+	__le16			length;
+	__u16			rsvd2;
+	__u32			rsvd3;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
 struct nvme_dsm_cmd {
 	__u8			opcode;
 	__u8			flags;
@@ -1441,6 +1473,7 @@  struct nvme_command {
 	union {
 		struct nvme_common_command common;
 		struct nvme_rw_command rw;
+		struct nvme_copy_command copy;
 		struct nvme_identify identify;
 		struct nvme_features features;
 		struct nvme_create_cq create_cq;
Index: linux-2.6/drivers/nvme/host/pci.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/pci.c	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/host/pci.c	2022-02-01 18:34:19.000000000 +0100
@@ -949,6 +949,11 @@  static blk_status_t nvme_queue_rq(struct
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	blk_status_t ret;
 
+	if (unlikely((req->cmd_flags & REQ_OP_MASK) == REQ_OP_COPY_READ_TOKEN)) {
+		blk_mq_end_request(req, BLK_STS_OK);
+		return BLK_STS_OK;
+	}
+
 	/*
 	 * We should not need to do this, but we're still using this to
 	 * ensure we can drain requests on a dying queue.
Index: linux-2.6/drivers/nvme/host/fc.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/fc.c	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/host/fc.c	2022-02-01 18:34:19.000000000 +0100
@@ -2780,6 +2780,11 @@  nvme_fc_queue_rq(struct blk_mq_hw_ctx *h
 	u32 data_len;
 	blk_status_t ret;
 
+	if (unlikely((rq->cmd_flags & REQ_OP_MASK) == REQ_OP_COPY_READ_TOKEN)) {
+		blk_mq_end_request(rq, BLK_STS_OK);
+		return BLK_STS_OK;
+	}
+
 	if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
 	    !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
 		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
Index: linux-2.6/drivers/nvme/host/rdma.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/rdma.c	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/host/rdma.c	2022-02-01 18:34:19.000000000 +0100
@@ -2048,6 +2048,11 @@  static blk_status_t nvme_rdma_queue_rq(s
 	blk_status_t ret;
 	int err;
 
+	if (unlikely((rq->cmd_flags & REQ_OP_MASK) == REQ_OP_COPY_READ_TOKEN)) {
+		blk_mq_end_request(rq, BLK_STS_OK);
+		return BLK_STS_OK;
+	}
+
 	WARN_ON_ONCE(rq->tag < 0);
 
 	if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
Index: linux-2.6/drivers/nvme/host/tcp.c
===================================================================
--- linux-2.6.orig/drivers/nvme/host/tcp.c	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/host/tcp.c	2022-02-01 18:34:19.000000000 +0100
@@ -2372,6 +2372,11 @@  static blk_status_t nvme_tcp_queue_rq(st
 	bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
 	blk_status_t ret;
 
+	if (unlikely((rq->cmd_flags & REQ_OP_MASK) == REQ_OP_COPY_READ_TOKEN)) {
+		blk_mq_end_request(rq, BLK_STS_OK);
+		return BLK_STS_OK;
+	}
+
 	if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
 		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
 
Index: linux-2.6/drivers/nvme/target/loop.c
===================================================================
--- linux-2.6.orig/drivers/nvme/target/loop.c	2022-02-01 18:34:19.000000000 +0100
+++ linux-2.6/drivers/nvme/target/loop.c	2022-02-01 18:34:19.000000000 +0100
@@ -138,6 +138,11 @@  static blk_status_t nvme_loop_queue_rq(s
 	bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
 	blk_status_t ret;
 
+	if (unlikely((req->cmd_flags & REQ_OP_MASK) == REQ_OP_COPY_READ_TOKEN)) {
+		blk_mq_end_request(req, BLK_STS_OK);
+		return BLK_STS_OK;
+	}
+
 	if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready))
 		return nvme_fail_nonready_command(&queue->ctrl->ctrl, req);