diff mbox series

[PATCHv2,2/2] nvme: improved uring polling

Message ID 20230609204517.493889-3-kbusch@meta.com (mailing list archive)
State New
Headers show
Series enhanced nvme uring command polling | expand

Commit Message

Keith Busch June 9, 2023, 8:45 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

Drivers can poll requests directly, so use that. We just need to ensure
the driver's request was allocated from a polled hctx, so a special
driver flag is added to struct io_uring_cmd.

The first advantage is unshared and multipath namespaces can use the
same polling callback, and multipath is guaranteed to get the same queue
as the command was submitted on. Previously multipath polling might
check a different path and poll the wrong info.

The other benefit is we don't need a bio payload in order to poll,
allowing commands like 'flush' and 'write zeroes' to be submitted on the
same high priority queue as read and write commands.

Finally, using the request based polling skips the unnecessary bio
overhead.

Signed-off-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
---
 drivers/nvme/host/ioctl.c     | 70 ++++++++++-------------------------
 drivers/nvme/host/multipath.c |  2 +-
 drivers/nvme/host/nvme.h      |  2 -
 include/uapi/linux/io_uring.h |  2 +
 4 files changed, 22 insertions(+), 54 deletions(-)

Comments

kernel test robot June 9, 2023, 11:16 p.m. UTC | #1
Hi Keith,

kernel test robot noticed the following build errors:

[auto build test ERROR on axboe-block/for-next]
[also build test ERROR on next-20230609]
[cannot apply to linus/master v6.4-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Keith-Busch/block-add-request-polling-helper/20230610-044707
base:   https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git for-next
patch link:    https://lore.kernel.org/r/20230609204517.493889-3-kbusch%40meta.com
patch subject: [PATCHv2 2/2] nvme: improved uring polling
config: riscv-randconfig-r015-20230608 (https://download.01.org/0day-ci/archive/20230610/202306100746.D72gucag-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        mkdir -p ~/bin
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv64-linux-gnu
        git remote add axboe-block https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
        git fetch axboe-block for-next
        git checkout axboe-block/for-next
        b4 shazam https://lore.kernel.org/r/20230609204517.493889-3-kbusch@meta.com
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=riscv olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang ~/bin/make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash drivers/nvme/host/

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306100746.D72gucag-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/nvme/host/ioctl.c:546:2: error: expected expression
           else {
           ^
>> drivers/nvme/host/ioctl.c:555:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:638:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:648:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:679:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:708:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:722:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:733:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:744:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:769:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:779:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:884:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:912:1: error: function definition is not allowed here
   {
   ^
   drivers/nvme/host/ioctl.c:946:1: error: function definition is not allowed here
   {
   ^
>> drivers/nvme/host/ioctl.c:974:2: error: expected '}'
   }
    ^
   drivers/nvme/host/ioctl.c:532:1: note: to match this '{'
   {
   ^
   15 errors generated.


vim +546 drivers/nvme/host/ioctl.c

   529	
   530	static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
   531							     blk_status_t err)
   532	{
   533		struct io_uring_cmd *ioucmd = req->end_io_data;
   534		struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
   535	
   536		req->bio = pdu->bio;
   537		pdu->req = req;
   538	
   539		/*
   540		 * For iopoll, complete it directly.
   541		 * Otherwise, move the completion to task work.
   542		 */
   543		if (blk_rq_is_poll(req)) {
   544			WRITE_ONCE(ioucmd->cookie, NULL);
   545			nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
 > 546		else {
   547			io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
   548		}
   549	
   550		return RQ_END_IO_NONE;
   551	}
   552	
   553	static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
   554			struct io_uring_cmd *ioucmd, unsigned int issue_flags, bool vec)
 > 555	{
   556		struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
   557		const struct nvme_uring_cmd *cmd = io_uring_sqe_cmd(ioucmd->sqe);
   558		struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
   559		struct nvme_uring_data d;
   560		struct nvme_command c;
   561		struct request *req;
   562		blk_opf_t rq_flags = REQ_ALLOC_CACHE;
   563		blk_mq_req_flags_t blk_flags = 0;
   564		void *meta = NULL;
   565		int ret;
   566	
   567		c.common.opcode = READ_ONCE(cmd->opcode);
   568		c.common.flags = READ_ONCE(cmd->flags);
   569		if (c.common.flags)
   570			return -EINVAL;
   571	
   572		c.common.command_id = 0;
   573		c.common.nsid = cpu_to_le32(cmd->nsid);
   574		if (!nvme_validate_passthru_nsid(ctrl, ns, le32_to_cpu(c.common.nsid)))
   575			return -EINVAL;
   576	
   577		c.common.cdw2[0] = cpu_to_le32(READ_ONCE(cmd->cdw2));
   578		c.common.cdw2[1] = cpu_to_le32(READ_ONCE(cmd->cdw3));
   579		c.common.metadata = 0;
   580		c.common.dptr.prp1 = c.common.dptr.prp2 = 0;
   581		c.common.cdw10 = cpu_to_le32(READ_ONCE(cmd->cdw10));
   582		c.common.cdw11 = cpu_to_le32(READ_ONCE(cmd->cdw11));
   583		c.common.cdw12 = cpu_to_le32(READ_ONCE(cmd->cdw12));
   584		c.common.cdw13 = cpu_to_le32(READ_ONCE(cmd->cdw13));
   585		c.common.cdw14 = cpu_to_le32(READ_ONCE(cmd->cdw14));
   586		c.common.cdw15 = cpu_to_le32(READ_ONCE(cmd->cdw15));
   587	
   588		if (!nvme_cmd_allowed(ns, &c, 0, ioucmd->file->f_mode))
   589			return -EACCES;
   590	
   591		d.metadata = READ_ONCE(cmd->metadata);
   592		d.addr = READ_ONCE(cmd->addr);
   593		d.data_len = READ_ONCE(cmd->data_len);
   594		d.metadata_len = READ_ONCE(cmd->metadata_len);
   595		d.timeout_ms = READ_ONCE(cmd->timeout_ms);
   596	
   597		if (issue_flags & IO_URING_F_NONBLOCK) {
   598			rq_flags |= REQ_NOWAIT;
   599			blk_flags = BLK_MQ_REQ_NOWAIT;
   600		}
   601		if (issue_flags & IO_URING_F_IOPOLL)
   602			rq_flags |= REQ_POLLED;
   603	
   604		req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
   605		if (IS_ERR(req))
   606			return PTR_ERR(req);
   607		req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
   608	
   609		if (d.addr && d.data_len) {
   610			ret = nvme_map_user_request(req, d.addr,
   611				d.data_len, nvme_to_user_ptr(d.metadata),
   612				d.metadata_len, 0, &meta, ioucmd, vec);
   613			if (ret)
   614				return ret;
   615		}
   616	
   617		if (blk_rq_is_poll(req)) {
   618			ioucmd->flags |= IORING_URING_CMD_POLLED;
   619			WRITE_ONCE(ioucmd->cookie, req);
   620		}
   621	
   622		/* to free bio on completion, as req->bio will be null at that time */
   623		pdu->bio = req->bio;
   624		pdu->meta_len = d.metadata_len;
   625		req->end_io_data = ioucmd;
   626		if (pdu->meta_len) {
   627			pdu->u.meta = meta;
   628			pdu->u.meta_buffer = nvme_to_user_ptr(d.metadata);
   629			req->end_io = nvme_uring_cmd_end_io_meta;
   630		} else {
   631			req->end_io = nvme_uring_cmd_end_io;
   632		}
   633		blk_execute_rq_nowait(req, false);
   634		return -EIOCBQUEUED;
   635	}
   636
diff mbox series

Patch

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 52ed1094ccbb2..3702013aa2bf9 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -505,7 +505,6 @@  static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
 {
 	struct io_uring_cmd *ioucmd = req->end_io_data;
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	void *cookie = READ_ONCE(ioucmd->cookie);
 
 	req->bio = pdu->bio;
 	if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
@@ -518,10 +517,12 @@  static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
 	 * For iopoll, complete it directly.
 	 * Otherwise, move the completion to task work.
 	 */
-	if (cookie != NULL && blk_rq_is_poll(req))
+	if (blk_rq_is_poll(req)) {
+		WRITE_ONCE(ioucmd->cookie, NULL);
 		nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
-	else
+	} else {
 		io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
+	}
 
 	return RQ_END_IO_FREE;
 }
@@ -531,7 +532,6 @@  static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
 {
 	struct io_uring_cmd *ioucmd = req->end_io_data;
 	struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
-	void *cookie = READ_ONCE(ioucmd->cookie);
 
 	req->bio = pdu->bio;
 	pdu->req = req;
@@ -540,10 +540,12 @@  static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req,
 	 * For iopoll, complete it directly.
 	 * Otherwise, move the completion to task work.
 	 */
-	if (cookie != NULL && blk_rq_is_poll(req))
+	if (blk_rq_is_poll(req)) {
+		WRITE_ONCE(ioucmd->cookie, NULL);
 		nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED);
-	else
+	else {
 		io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_meta_cb);
+	}
 
 	return RQ_END_IO_NONE;
 }
@@ -599,7 +601,6 @@  static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	if (issue_flags & IO_URING_F_IOPOLL)
 		rq_flags |= REQ_POLLED;
 
-retry:
 	req = nvme_alloc_user_request(q, &c, rq_flags, blk_flags);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -613,17 +614,11 @@  static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 			return ret;
 	}
 
-	if (issue_flags & IO_URING_F_IOPOLL && rq_flags & REQ_POLLED) {
-		if (unlikely(!req->bio)) {
-			/* we can't poll this, so alloc regular req instead */
-			blk_mq_free_request(req);
-			rq_flags &= ~REQ_POLLED;
-			goto retry;
-		} else {
-			WRITE_ONCE(ioucmd->cookie, req->bio);
-			req->bio->bi_opf |= REQ_POLLED;
-		}
+	if (blk_rq_is_poll(req)) {
+		ioucmd->flags |= IORING_URING_CMD_POLLED;
+		WRITE_ONCE(ioucmd->cookie, req);
 	}
+
 	/* to free bio on completion, as req->bio will be null at that time */
 	pdu->bio = req->bio;
 	pdu->meta_len = d.metadata_len;
@@ -782,18 +777,16 @@  int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
 				 struct io_comp_batch *iob,
 				 unsigned int poll_flags)
 {
-	struct bio *bio;
+	struct request *req;
 	int ret = 0;
-	struct nvme_ns *ns;
-	struct request_queue *q;
+
+	if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
+		return 0;
 
 	rcu_read_lock();
-	bio = READ_ONCE(ioucmd->cookie);
-	ns = container_of(file_inode(ioucmd->file)->i_cdev,
-			struct nvme_ns, cdev);
-	q = ns->queue;
-	if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio && bio->bi_bdev)
-		ret = bio_poll(bio, iob, poll_flags);
+	req = READ_ONCE(ioucmd->cookie);
+	if (req && blk_rq_is_poll(req))
+		ret = blk_rq_poll(req, iob, poll_flags);
 	rcu_read_unlock();
 	return ret;
 }
@@ -885,31 +878,6 @@  int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 	srcu_read_unlock(&head->srcu, srcu_idx);
 	return ret;
 }
-
-int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
-				      struct io_comp_batch *iob,
-				      unsigned int poll_flags)
-{
-	struct cdev *cdev = file_inode(ioucmd->file)->i_cdev;
-	struct nvme_ns_head *head = container_of(cdev, struct nvme_ns_head, cdev);
-	int srcu_idx = srcu_read_lock(&head->srcu);
-	struct nvme_ns *ns = nvme_find_path(head);
-	struct bio *bio;
-	int ret = 0;
-	struct request_queue *q;
-
-	if (ns) {
-		rcu_read_lock();
-		bio = READ_ONCE(ioucmd->cookie);
-		q = ns->queue;
-		if (test_bit(QUEUE_FLAG_POLL, &q->queue_flags) && bio
-				&& bio->bi_bdev)
-			ret = bio_poll(bio, iob, poll_flags);
-		rcu_read_unlock();
-	}
-	srcu_read_unlock(&head->srcu, srcu_idx);
-	return ret;
-}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 2bc159a318ff0..61130b4976b04 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -470,7 +470,7 @@  static const struct file_operations nvme_ns_head_chr_fops = {
 	.unlocked_ioctl	= nvme_ns_head_chr_ioctl,
 	.compat_ioctl	= compat_ptr_ioctl,
 	.uring_cmd	= nvme_ns_head_chr_uring_cmd,
-	.uring_cmd_iopoll = nvme_ns_head_chr_uring_cmd_iopoll,
+	.uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
 };
 
 static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a2d4f59e0535a..84aecf53870ae 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -852,8 +852,6 @@  long nvme_dev_ioctl(struct file *file, unsigned int cmd,
 		unsigned long arg);
 int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
 		struct io_comp_batch *iob, unsigned int poll_flags);
-int nvme_ns_head_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
-		struct io_comp_batch *iob, unsigned int poll_flags);
 int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
 		unsigned int issue_flags);
 int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index f222d263bc555..08720c7bd92f8 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -244,8 +244,10 @@  enum io_uring_op {
  * sqe->uring_cmd_flags
  * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
  *				along with setting sqe->buf_index.
+ * IORING_URING_CMD_POLLED	driver use only
  */
 #define IORING_URING_CMD_FIXED	(1U << 0)
+#define IORING_URING_CMD_POLLED	(1U << 31)
 
 
 /*