diff mbox series

[7/8] io_uring/rw: add write support for IOCB_DIO_DEFER

Message ID 20230720181310.71589-8-axboe@kernel.dk (mailing list archive)
State New
Headers show
Series Improve async iomap DIO performance | expand

Commit Message

Jens Axboe July 20, 2023, 6:13 p.m. UTC
If the filesystem dio handler understands IOCB_DIO_DEFER, we'll get
a kiocb->ki_complete() callback with kiocb->dio_complete set. In that
case, rather than complete the IO directly through task_work, queue
up an intermediate task_work handler that first processes this
callback and then immediately completes the request.

For XFS, this avoids a punt through a workqueue, which is a lot less
efficient and adds latency to lower queue depth (or sync) O_DIRECT
writes.

Only do this for non-polled IO, as polled IO doesn't need this kind
of deferral as it always completes within the task itself. This then
avoids a check for deferral in the polled IO completion handler.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rw.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig July 21, 2023, 6:19 a.m. UTC | #1
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Darrick J. Wong July 21, 2023, 3:50 p.m. UTC | #2
On Thu, Jul 20, 2023 at 12:13:09PM -0600, Jens Axboe wrote:
> If the filesystem dio handler understands IOCB_DIO_DEFER, we'll get
> a kiocb->ki_complete() callback with kiocb->dio_complete set. In that
> case, rather than complete the IO directly through task_work, queue
> up an intermediate task_work handler that first processes this
> callback and then immediately completes the request.
> 
> For XFS, this avoids a punt through a workqueue, which is a lot less
> efficient and adds latency to lower queue depth (or sync) O_DIRECT
> writes.
> 
> Only do this for non-polled IO, as polled IO doesn't need this kind
> of deferral as it always completes within the task itself. This then
> avoids a check for deferral in the polled IO completion handler.
> 
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

Seems pretty obvious to me, though I'm famous for not being an
experienced io_uring user yet...

Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

> ---
>  io_uring/rw.c | 27 ++++++++++++++++++++++++---
>  1 file changed, 24 insertions(+), 3 deletions(-)
> 
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index 1bce2208b65c..f4f700383b4e 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -285,6 +285,14 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
>  
>  void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
>  {
> +	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
> +
> +	if (rw->kiocb.dio_complete) {
> +		long res = rw->kiocb.dio_complete(rw->kiocb.private);
> +
> +		io_req_set_res(req, io_fixup_rw_res(req, res), 0);
> +	}
> +
>  	io_req_io_end(req);
>  
>  	if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
> @@ -300,9 +308,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
>  	struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
>  	struct io_kiocb *req = cmd_to_io_kiocb(rw);
>  
> -	if (__io_complete_rw_common(req, res))
> -		return;
> -	io_req_set_res(req, io_fixup_rw_res(req, res), 0);
> +	if (!rw->kiocb.dio_complete) {
> +		if (__io_complete_rw_common(req, res))
> +			return;
> +		io_req_set_res(req, io_fixup_rw_res(req, res), 0);
> +	}
>  	req->io_task_work.func = io_req_rw_complete;
>  	__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
>  }
> @@ -916,6 +926,17 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
>  	}
>  	kiocb->ki_flags |= IOCB_WRITE;
>  
> +	/*
> +	 * For non-polled IO, set IOCB_DIO_DEFER, stating that our handler
> +	 * groks deferring the completion to task context. This isn't
> +	 * necessary and useful for polled IO as that can always complete
> +	 * directly.
> +	 */
> +	if (!(kiocb->ki_flags & IOCB_HIPRI)) {
> +		kiocb->ki_flags |= IOCB_DIO_DEFER;
> +		kiocb->dio_complete = NULL;
> +	}
> +
>  	if (likely(req->file->f_op->write_iter))
>  		ret2 = call_write_iter(req->file, kiocb, &s->iter);
>  	else if (req->file->f_op->write)
> -- 
> 2.40.1
Jens Axboe July 21, 2023, 3:53 p.m. UTC | #3
On 7/21/23 9:50?AM, Darrick J. Wong wrote:
> On Thu, Jul 20, 2023 at 12:13:09PM -0600, Jens Axboe wrote:
>> If the filesystem dio handler understands IOCB_DIO_DEFER, we'll get
>> a kiocb->ki_complete() callback with kiocb->dio_complete set. In that
>> case, rather than complete the IO directly through task_work, queue
>> up an intermediate task_work handler that first processes this
>> callback and then immediately completes the request.
>>
>> For XFS, this avoids a punt through a workqueue, which is a lot less
>> efficient and adds latency to lower queue depth (or sync) O_DIRECT
>> writes.
>>
>> Only do this for non-polled IO, as polled IO doesn't need this kind
>> of deferral as it always completes within the task itself. This then
>> avoids a check for deferral in the polled IO completion handler.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> 
> Seems pretty obvious to me, though I'm famous for not being an
> experienced io_uring user yet...
> 
> Reviewed-by: Darrick J. Wong <djwong@kernel.org>

Thanks, keyword here is "yet" ;-)
diff mbox series

Patch

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..f4f700383b4e 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -285,6 +285,14 @@  static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
 
 void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
 {
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+	if (rw->kiocb.dio_complete) {
+		long res = rw->kiocb.dio_complete(rw->kiocb.private);
+
+		io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+	}
+
 	io_req_io_end(req);
 
 	if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
@@ -300,9 +308,11 @@  static void io_complete_rw(struct kiocb *kiocb, long res)
 	struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
 	struct io_kiocb *req = cmd_to_io_kiocb(rw);
 
-	if (__io_complete_rw_common(req, res))
-		return;
-	io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+	if (!rw->kiocb.dio_complete) {
+		if (__io_complete_rw_common(req, res))
+			return;
+		io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+	}
 	req->io_task_work.func = io_req_rw_complete;
 	__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
 }
@@ -916,6 +926,17 @@  int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	}
 	kiocb->ki_flags |= IOCB_WRITE;
 
+	/*
+	 * For non-polled IO, set IOCB_DIO_DEFER, stating that our handler
+	 * groks deferring the completion to task context. This isn't
+	 * necessary and useful for polled IO as that can always complete
+	 * directly.
+	 */
+	if (!(kiocb->ki_flags & IOCB_HIPRI)) {
+		kiocb->ki_flags |= IOCB_DIO_DEFER;
+		kiocb->dio_complete = NULL;
+	}
+
 	if (likely(req->file->f_op->write_iter))
 		ret2 = call_write_iter(req->file, kiocb, &s->iter);
 	else if (req->file->f_op->write)