Message ID | 20241009193700.3438201-1-ushankar@purestorage.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ublk: eliminate unnecessary io_cmds queue | expand |
On Wed, Oct 09, 2024 at 01:37:00PM -0600, Uday Shankar wrote: > Currently, ublk_drv maintains a per-hctx queue of requests awaiting > dispatch to the ublk server, and pokes the ubq_daemon to come pick them > up via the task_work mechanism when needed. But task_work already > supports internal (lockless) queueing. Reuse this queueing mechanism > (i.e. have one task_work queue item per request awaiting dispatch) > instead of maintaining our own queue in ublk_drv. > > Signed-off-by: Uday Shankar <ushankar@purestorage.com> > --- > drivers/block/ublk_drv.c | 34 ++++++---------------------------- > 1 file changed, 6 insertions(+), 28 deletions(-) > > diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c > index 60f6d86ea1e6..2ea108347ec4 100644 > --- a/drivers/block/ublk_drv.c > +++ b/drivers/block/ublk_drv.c > @@ -80,6 +80,7 @@ struct ublk_rq_data { > > struct ublk_uring_cmd_pdu { > struct ublk_queue *ubq; > + struct request *req; > u16 tag; > }; I'd suggest to add the following build check in init function since there is only 32bytes in uring_cmd pdu: BUILD_BUG_ON(sizeof(ublk_uring_cmd_pdu) < sizeof_field(io_uring_cmd, pud)) > > @@ -141,8 +142,6 @@ struct ublk_queue { > struct task_struct *ubq_daemon; > char *io_cmd_buf; > > - struct llist_head io_cmds; > - > unsigned long io_addr; /* mapped vm address */ > unsigned int max_io_sz; > bool force_abort; > @@ -1132,9 +1131,10 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, > blk_mq_end_request(rq, BLK_STS_IOERR); > } > > -static inline void __ublk_rq_task_work(struct request *req, > +static inline void __ublk_rq_task_work(struct io_uring_cmd *cmd, > unsigned issue_flags) `inline` can be removed and __ublk_rq_task_work() can be named as ublk_rq_task_work() now. > { > + struct request *req = ublk_get_uring_cmd_pdu(cmd)->req; > struct ublk_queue *ubq = req->mq_hctx->driver_data; > int tag = req->tag; > struct ublk_io *io = &ubq->ios[tag]; > @@ -1211,34 +1211,12 @@ static inline void __ublk_rq_task_work(struct request *req, > ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); > } > > -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, > - unsigned issue_flags) > -{ > - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); > - struct ublk_rq_data *data, *tmp; > - > - io_cmds = llist_reverse_order(io_cmds); > - llist_for_each_entry_safe(data, tmp, io_cmds, node) > - __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); > -} > - > -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) > -{ > - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); > - struct ublk_queue *ubq = pdu->ubq; > - > - ublk_forward_io_cmds(ubq, issue_flags); > -} > - > static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) > { > - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); > - > - if (llist_add(&data->node, &ubq->io_cmds)) { > - struct ublk_io *io = &ubq->ios[rq->tag]; > + struct ublk_io *io = &ubq->ios[rq->tag]; > > - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); > - } > + ublk_get_uring_cmd_pdu(io->cmd)->req = rq; > + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work); > } I'd suggest to comment that io_uring_cmd_complete_in_task() needs to maintain io command order. Otherwise this patch looks fine. Thanks, Ming
On Thu, Oct 10, 2024 at 11:45:03AM +0800, Ming Lei wrote: > > static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) > > { > > - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); > > - > > - if (llist_add(&data->node, &ubq->io_cmds)) { > > - struct ublk_io *io = &ubq->ios[rq->tag]; > > + struct ublk_io *io = &ubq->ios[rq->tag]; > > > > - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); > > - } > > + ublk_get_uring_cmd_pdu(io->cmd)->req = rq; > > + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work); > > } > > I'd suggest to comment that io_uring_cmd_complete_in_task() needs to > maintain io command order. Sorry, can you explain why this is important? Generally speaking out-of-order completion of I/Os is considered okay, so what's the issue if the dispatch to the ublk server here is not in order?
On Tue, Oct 15, 2024 at 02:51:14PM -0600, Uday Shankar wrote: > On Thu, Oct 10, 2024 at 11:45:03AM +0800, Ming Lei wrote: > > > static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) > > > { > > > - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); > > > - > > > - if (llist_add(&data->node, &ubq->io_cmds)) { > > > - struct ublk_io *io = &ubq->ios[rq->tag]; > > > + struct ublk_io *io = &ubq->ios[rq->tag]; > > > > > > - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); > > > - } > > > + ublk_get_uring_cmd_pdu(io->cmd)->req = rq; > > > + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work); > > > } > > > > I'd suggest to comment that io_uring_cmd_complete_in_task() needs to > > maintain io command order. > > Sorry, can you explain why this is important? Generally speaking > out-of-order completion of I/Os is considered okay, so what's the issue > if the dispatch to the ublk server here is not in order? It is just okay, but proper implementation requires to keep IO order. Please see: 1) 7d4a93176e01 ("ublk_drv: don't forward io commands in reserve order") 2) [Report] requests are submitted to hardware in reverse order from nvme/virtio-blk queue_rqs() https://lore.kernel.org/linux-block/ZbD7ups50ryrlJ%2FG@fedora/ I am also working on ublk-bpf which needs this extra list for submitting IO in batch, please hold on this patch now. I plan to send out bpf patches in this cycle or next, and we can restart the cleanup if the bpf thing turns out not doable. Thanks, Ming
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 60f6d86ea1e6..2ea108347ec4 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -80,6 +80,7 @@ struct ublk_rq_data { struct ublk_uring_cmd_pdu { struct ublk_queue *ubq; + struct request *req; u16 tag; }; @@ -141,8 +142,6 @@ struct ublk_queue { struct task_struct *ubq_daemon; char *io_cmd_buf; - struct llist_head io_cmds; - unsigned long io_addr; /* mapped vm address */ unsigned int max_io_sz; bool force_abort; @@ -1132,9 +1131,10 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, blk_mq_end_request(rq, BLK_STS_IOERR); } -static inline void __ublk_rq_task_work(struct request *req, +static inline void __ublk_rq_task_work(struct io_uring_cmd *cmd, unsigned issue_flags) { + struct request *req = ublk_get_uring_cmd_pdu(cmd)->req; struct ublk_queue *ubq = req->mq_hctx->driver_data; int tag = req->tag; struct ublk_io *io = &ubq->ios[tag]; @@ -1211,34 +1211,12 @@ static inline void __ublk_rq_task_work(struct request *req, ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, - unsigned issue_flags) -{ - struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); - struct ublk_rq_data *data, *tmp; - - io_cmds = llist_reverse_order(io_cmds); - llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); -} - -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) -{ - struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); - struct ublk_queue *ubq = pdu->ubq; - - ublk_forward_io_cmds(ubq, issue_flags); -} - static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) { - struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq); - - if (llist_add(&data->node, &ubq->io_cmds)) { - struct ublk_io *io = &ubq->ios[rq->tag]; + struct ublk_io *io = &ubq->ios[rq->tag]; - io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); - } + ublk_get_uring_cmd_pdu(io->cmd)->req = rq; + io_uring_cmd_complete_in_task(io->cmd, __ublk_rq_task_work); } static enum blk_eh_timer_return ublk_timeout(struct request *rq)
Currently, ublk_drv maintains a per-hctx queue of requests awaiting dispatch to the ublk server, and pokes the ubq_daemon to come pick them up via the task_work mechanism when needed. But task_work already supports internal (lockless) queueing. Reuse this queueing mechanism (i.e. have one task_work queue item per request awaiting dispatch) instead of maintaining our own queue in ublk_drv. Signed-off-by: Uday Shankar <ushankar@purestorage.com> --- drivers/block/ublk_drv.c | 34 ++++++---------------------------- 1 file changed, 6 insertions(+), 28 deletions(-) base-commit: 7a84944a4bf7abda16291ff13984960d0df4e74a