Message ID | 20221123055827.26996-7-nj.shetty@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v5,01/10] block: Introduce queue limits for copy-offload support | expand |
On Wed, Nov 23, 2022 at 04:17:41PM +0800, Guixin Liu wrote: > > 在 2022/11/23 13:58, Nitesh Shetty 写道: > > Add support for handling target command on target. > > For bdev-ns we call into blkdev_issue_copy, which the block layer > > completes by a offloaded copy request to backend bdev or by emulating the > > request. > > > > For file-ns we call vfs_copy_file_range to service our request. > > > > Currently target always shows copy capability by setting > > NVME_CTRL_ONCS_COPY in controller ONCS. > > > > Signed-off-by: Nitesh Shetty<nj.shetty@samsung.com> > > Signed-off-by: Anuj Gupta<anuj20.g@samsung.com> > > --- > > drivers/nvme/target/admin-cmd.c | 9 +++- > > drivers/nvme/target/io-cmd-bdev.c | 79 +++++++++++++++++++++++++++++++ > > drivers/nvme/target/io-cmd-file.c | 51 ++++++++++++++++++++ > > drivers/nvme/target/loop.c | 6 +++ > > drivers/nvme/target/nvmet.h | 2 + > > 5 files changed, 145 insertions(+), 2 deletions(-) > > > > diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c > > index c8a061ce3ee5..5ae509ff4b19 100644 > > --- a/drivers/nvme/target/admin-cmd.c > > +++ b/drivers/nvme/target/admin-cmd.c > > @@ -431,8 +431,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) > > id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); > > id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); > > id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | > > - NVME_CTRL_ONCS_WRITE_ZEROES); > > - > > + NVME_CTRL_ONCS_WRITE_ZEROES | NVME_CTRL_ONCS_COPY); > > /* XXX: don't report vwc if the underlying device is write through */ > > id->vwc = NVME_CTRL_VWC_PRESENT; > > @@ -534,6 +533,12 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) > > if (req->ns->bdev) > > nvmet_bdev_set_limits(req->ns->bdev, id); > > + else { > > + id->msrc = (u8)to0based(BIO_MAX_VECS - 1); > > + id->mssrl = cpu_to_le16(BIO_MAX_VECS << > > + (PAGE_SHIFT - SECTOR_SHIFT)); > > + id->mcl = cpu_to_le32(le16_to_cpu(id->mssrl)); > > + } > > /* > > * We just provide a single LBA format that matches what the > > diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c > > index c2d6cea0236b..01f0160125fb 100644 > > --- a/drivers/nvme/target/io-cmd-bdev.c > > +++ b/drivers/nvme/target/io-cmd-bdev.c > > @@ -46,6 +46,19 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) > > id->npda = id->npdg; > > /* NOWS = Namespace Optimal Write Size */ > > id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev)); > > + > > + /*Copy limits*/ > > + if (bdev_max_copy_sectors(bdev)) { > > + id->msrc = id->msrc; > > + id->mssrl = cpu_to_le16((bdev_max_copy_sectors(bdev) << > > + SECTOR_SHIFT) / bdev_logical_block_size(bdev)); > > + id->mcl = cpu_to_le32(id->mssrl); > > + } else { > > + id->msrc = (u8)to0based(BIO_MAX_VECS - 1); > > + id->mssrl = cpu_to_le16((BIO_MAX_VECS << PAGE_SHIFT) / > > + bdev_logical_block_size(bdev)); > > + id->mcl = cpu_to_le32(id->mssrl); > > + } > > Based on my understanding of the NVMe protocol 2.0, the mssrl is the max > length per single range entry, > > the mcl is the total max copy length in one copy command, may I ask why mcl > = msssrl? not mcl = mssrl * msrc? > > Best Regards, > > Guixin Liu > You are right, as per NVMe spec, mcl >= mssrl. Since we decided to make copy offload generic for NVMe/Xcopy/copy across namespaces and all, we went with 2 bio/bdev design, which is compatible with device mapper. So effectively we are using 1 range(msrc), when using only 1 range, I feel it makes sense to use one of the limits, so went with mssrl. Thanks, Nitesh > > } > > void nvmet_bdev_ns_disable(struct nvmet_ns *ns) > > @@ -184,6 +197,23 @@ static void nvmet_bio_done(struct bio *bio) > > nvmet_req_bio_put(req, bio); > > } > > +static void nvmet_bdev_copy_end_io(void *private, int status) > > +{ > > + struct nvmet_req *req = (struct nvmet_req *)private; > > + int id; > > + > > + if (status) { > > + for (id = 0 ; id < req->nr_range; id++) { > > + if (req->ranges[id].len != req->ranges[id].comp_len) { > > + req->cqe->result.u32 = cpu_to_le32(id); > > + break; > > + } > > + } > > + } > > + kfree(req->ranges); > > + nvmet_req_complete(req, errno_to_nvme_status(req, status)); > > +} > > + > > #ifdef CONFIG_BLK_DEV_INTEGRITY > > static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, > > struct sg_mapping_iter *miter) > > @@ -450,6 +480,51 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) > > } > > } > > +static void nvmet_bdev_execute_copy(struct nvmet_req *req) > > +{ > > + struct nvme_copy_range range; > > + struct range_entry *ranges; > > + struct nvme_command *cmnd = req->cmd; > > + sector_t dest, dest_off = 0; > > + int ret, id, nr_range; > > + > > + nr_range = cmnd->copy.nr_range + 1; > > + dest = le64_to_cpu(cmnd->copy.sdlba) << req->ns->blksize_shift; > > + ranges = kmalloc_array(nr_range, sizeof(*ranges), GFP_KERNEL); > > + > > + for (id = 0 ; id < nr_range; id++) { > > + ret = nvmet_copy_from_sgl(req, id * sizeof(range), > > + &range, sizeof(range)); > > + if (ret) > > + goto out; > > + > > + ranges[id].dst = dest + dest_off; > > + ranges[id].src = le64_to_cpu(range.slba) << > > + req->ns->blksize_shift; > > + ranges[id].len = (le16_to_cpu(range.nlb) + 1) << > > + req->ns->blksize_shift; > > + ranges[id].comp_len = 0; > > + dest_off += ranges[id].len; > > + } > > + req->ranges = ranges; > > + req->nr_range = nr_range; > > + ret = blkdev_issue_copy(req->ns->bdev, req->ns->bdev, ranges, nr_range, > > + nvmet_bdev_copy_end_io, (void *)req, GFP_KERNEL); > > + if (ret) { > > + for (id = 0 ; id < nr_range; id++) { > > + if (ranges[id].len != ranges[id].comp_len) { > > + req->cqe->result.u32 = cpu_to_le32(id); > > + break; > > + } > > + } > > + goto out; > > + } else > > + return; > > +out: > > + kfree(ranges); > > + nvmet_req_complete(req, errno_to_nvme_status(req, ret)); > > +} > > + > > u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) > > { > > switch (req->cmd->common.opcode) { > > @@ -468,6 +543,10 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) > > case nvme_cmd_write_zeroes: > > req->execute = nvmet_bdev_execute_write_zeroes; > > return 0; > > + case nvme_cmd_copy: > > + req->execute = nvmet_bdev_execute_copy; > > + return 0; > > + > > default: > > return nvmet_report_invalid_opcode(req); > > } > > diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c > > index 64b47e2a4633..a81d38796e17 100644 > > --- a/drivers/nvme/target/io-cmd-file.c > > +++ b/drivers/nvme/target/io-cmd-file.c > > @@ -338,6 +338,48 @@ static void nvmet_file_dsm_work(struct work_struct *w) > > } > > } > > +static void nvmet_file_copy_work(struct work_struct *w) > > +{ > > + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); > > + int nr_range; > > + loff_t pos; > > + struct nvme_command *cmnd = req->cmd; > > + int ret = 0, len = 0, src, id; > > + > > + nr_range = cmnd->copy.nr_range + 1; > > + pos = le64_to_cpu(req->cmd->copy.sdlba) << req->ns->blksize_shift; > > + if (unlikely(pos + req->transfer_len > req->ns->size)) { > > + nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); > > + return; > > + } > > + > > + for (id = 0 ; id < nr_range; id++) { > > + struct nvme_copy_range range; > > + > > + ret = nvmet_copy_from_sgl(req, id * sizeof(range), &range, > > + sizeof(range)); > > + if (ret) > > + goto out; > > + > > + len = (le16_to_cpu(range.nlb) + 1) << (req->ns->blksize_shift); > > + src = (le64_to_cpu(range.slba) << (req->ns->blksize_shift)); > > + ret = vfs_copy_file_range(req->ns->file, src, req->ns->file, > > + pos, len, 0); > > +out: > > + if (ret != len) { > > + pos += ret; > > + req->cqe->result.u32 = cpu_to_le32(id); > > + nvmet_req_complete(req, ret < 0 ? > > + errno_to_nvme_status(req, ret) : > > + errno_to_nvme_status(req, -EIO)); > > + return; > > + > > + } else > > + pos += len; > > +} > > + nvmet_req_complete(req, ret); > > + > > +} > > static void nvmet_file_execute_dsm(struct nvmet_req *req) > > { > > if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) > > @@ -346,6 +388,12 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) > > queue_work(nvmet_wq, &req->f.work); > > } > > +static void nvmet_file_execute_copy(struct nvmet_req *req) > > +{ > > + INIT_WORK(&req->f.work, nvmet_file_copy_work); > > + queue_work(nvmet_wq, &req->f.work); > > +} > > + > > static void nvmet_file_write_zeroes_work(struct work_struct *w) > > { > > struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); > > @@ -392,6 +440,9 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) > > case nvme_cmd_write_zeroes: > > req->execute = nvmet_file_execute_write_zeroes; > > return 0; > > + case nvme_cmd_copy: > > + req->execute = nvmet_file_execute_copy; > > + return 0; > > default: > > return nvmet_report_invalid_opcode(req); > > } > > diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c > > index b45fe3adf015..55802632b407 100644 > > --- a/drivers/nvme/target/loop.c > > +++ b/drivers/nvme/target/loop.c > > @@ -146,6 +146,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, > > return ret; > > blk_mq_start_request(req); > > + if (unlikely((req->cmd_flags & REQ_COPY) && > > + (req_op(req) == REQ_OP_READ))) { > > + blk_mq_set_request_complete(req); > > + blk_mq_end_request(req, BLK_STS_OK); > > + return BLK_STS_OK; > > + } > > iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; > > iod->req.port = queue->ctrl->port; > > if (!nvmet_req_init(&iod->req, &queue->nvme_cq, > > diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h > > index dfe3894205aa..3b4c7d2ee45d 100644 > > --- a/drivers/nvme/target/nvmet.h > > +++ b/drivers/nvme/target/nvmet.h > > @@ -391,6 +391,8 @@ struct nvmet_req { > > struct device *p2p_client; > > u16 error_loc; > > u64 error_slba; > > + struct range_entry *ranges; > > + unsigned int nr_range; > > }; > > extern struct workqueue_struct *buffered_io_wq;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index c8a061ce3ee5..5ae509ff4b19 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -431,8 +431,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->nn = cpu_to_le32(NVMET_MAX_NAMESPACES); id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | - NVME_CTRL_ONCS_WRITE_ZEROES); - + NVME_CTRL_ONCS_WRITE_ZEROES | NVME_CTRL_ONCS_COPY); /* XXX: don't report vwc if the underlying device is write through */ id->vwc = NVME_CTRL_VWC_PRESENT; @@ -534,6 +533,12 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) if (req->ns->bdev) nvmet_bdev_set_limits(req->ns->bdev, id); + else { + id->msrc = (u8)to0based(BIO_MAX_VECS - 1); + id->mssrl = cpu_to_le16(BIO_MAX_VECS << + (PAGE_SHIFT - SECTOR_SHIFT)); + id->mcl = cpu_to_le32(le16_to_cpu(id->mssrl)); + } /* * We just provide a single LBA format that matches what the diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c2d6cea0236b..01f0160125fb 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -46,6 +46,19 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) id->npda = id->npdg; /* NOWS = Namespace Optimal Write Size */ id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev)); + + /*Copy limits*/ + if (bdev_max_copy_sectors(bdev)) { + id->msrc = id->msrc; + id->mssrl = cpu_to_le16((bdev_max_copy_sectors(bdev) << + SECTOR_SHIFT) / bdev_logical_block_size(bdev)); + id->mcl = cpu_to_le32(id->mssrl); + } else { + id->msrc = (u8)to0based(BIO_MAX_VECS - 1); + id->mssrl = cpu_to_le16((BIO_MAX_VECS << PAGE_SHIFT) / + bdev_logical_block_size(bdev)); + id->mcl = cpu_to_le32(id->mssrl); + } } void nvmet_bdev_ns_disable(struct nvmet_ns *ns) @@ -184,6 +197,23 @@ static void nvmet_bio_done(struct bio *bio) nvmet_req_bio_put(req, bio); } +static void nvmet_bdev_copy_end_io(void *private, int status) +{ + struct nvmet_req *req = (struct nvmet_req *)private; + int id; + + if (status) { + for (id = 0 ; id < req->nr_range; id++) { + if (req->ranges[id].len != req->ranges[id].comp_len) { + req->cqe->result.u32 = cpu_to_le32(id); + break; + } + } + } + kfree(req->ranges); + nvmet_req_complete(req, errno_to_nvme_status(req, status)); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, struct sg_mapping_iter *miter) @@ -450,6 +480,51 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) } } +static void nvmet_bdev_execute_copy(struct nvmet_req *req) +{ + struct nvme_copy_range range; + struct range_entry *ranges; + struct nvme_command *cmnd = req->cmd; + sector_t dest, dest_off = 0; + int ret, id, nr_range; + + nr_range = cmnd->copy.nr_range + 1; + dest = le64_to_cpu(cmnd->copy.sdlba) << req->ns->blksize_shift; + ranges = kmalloc_array(nr_range, sizeof(*ranges), GFP_KERNEL); + + for (id = 0 ; id < nr_range; id++) { + ret = nvmet_copy_from_sgl(req, id * sizeof(range), + &range, sizeof(range)); + if (ret) + goto out; + + ranges[id].dst = dest + dest_off; + ranges[id].src = le64_to_cpu(range.slba) << + req->ns->blksize_shift; + ranges[id].len = (le16_to_cpu(range.nlb) + 1) << + req->ns->blksize_shift; + ranges[id].comp_len = 0; + dest_off += ranges[id].len; + } + req->ranges = ranges; + req->nr_range = nr_range; + ret = blkdev_issue_copy(req->ns->bdev, req->ns->bdev, ranges, nr_range, + nvmet_bdev_copy_end_io, (void *)req, GFP_KERNEL); + if (ret) { + for (id = 0 ; id < nr_range; id++) { + if (ranges[id].len != ranges[id].comp_len) { + req->cqe->result.u32 = cpu_to_le32(id); + break; + } + } + goto out; + } else + return; +out: + kfree(ranges); + nvmet_req_complete(req, errno_to_nvme_status(req, ret)); +} + u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) { switch (req->cmd->common.opcode) { @@ -468,6 +543,10 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_write_zeroes: req->execute = nvmet_bdev_execute_write_zeroes; return 0; + case nvme_cmd_copy: + req->execute = nvmet_bdev_execute_copy; + return 0; + default: return nvmet_report_invalid_opcode(req); } diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 64b47e2a4633..a81d38796e17 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -338,6 +338,48 @@ static void nvmet_file_dsm_work(struct work_struct *w) } } +static void nvmet_file_copy_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + int nr_range; + loff_t pos; + struct nvme_command *cmnd = req->cmd; + int ret = 0, len = 0, src, id; + + nr_range = cmnd->copy.nr_range + 1; + pos = le64_to_cpu(req->cmd->copy.sdlba) << req->ns->blksize_shift; + if (unlikely(pos + req->transfer_len > req->ns->size)) { + nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); + return; + } + + for (id = 0 ; id < nr_range; id++) { + struct nvme_copy_range range; + + ret = nvmet_copy_from_sgl(req, id * sizeof(range), &range, + sizeof(range)); + if (ret) + goto out; + + len = (le16_to_cpu(range.nlb) + 1) << (req->ns->blksize_shift); + src = (le64_to_cpu(range.slba) << (req->ns->blksize_shift)); + ret = vfs_copy_file_range(req->ns->file, src, req->ns->file, + pos, len, 0); +out: + if (ret != len) { + pos += ret; + req->cqe->result.u32 = cpu_to_le32(id); + nvmet_req_complete(req, ret < 0 ? + errno_to_nvme_status(req, ret) : + errno_to_nvme_status(req, -EIO)); + return; + + } else + pos += len; +} + nvmet_req_complete(req, ret); + +} static void nvmet_file_execute_dsm(struct nvmet_req *req) { if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) @@ -346,6 +388,12 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) queue_work(nvmet_wq, &req->f.work); } +static void nvmet_file_execute_copy(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_copy_work); + queue_work(nvmet_wq, &req->f.work); +} + static void nvmet_file_write_zeroes_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); @@ -392,6 +440,9 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_write_zeroes: req->execute = nvmet_file_execute_write_zeroes; return 0; + case nvme_cmd_copy: + req->execute = nvmet_file_execute_copy; + return 0; default: return nvmet_report_invalid_opcode(req); } diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index b45fe3adf015..55802632b407 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -146,6 +146,12 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; blk_mq_start_request(req); + if (unlikely((req->cmd_flags & REQ_COPY) && + (req_op(req) == REQ_OP_READ))) { + blk_mq_set_request_complete(req); + blk_mq_end_request(req, BLK_STS_OK); + return BLK_STS_OK; + } iod->cmd.common.flags |= NVME_CMD_SGL_METABUF; iod->req.port = queue->ctrl->port; if (!nvmet_req_init(&iod->req, &queue->nvme_cq, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index dfe3894205aa..3b4c7d2ee45d 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -391,6 +391,8 @@ struct nvmet_req { struct device *p2p_client; u16 error_loc; u64 error_slba; + struct range_entry *ranges; + unsigned int nr_range; }; extern struct workqueue_struct *buffered_io_wq;