Message ID | 20240329201241.874888-4-axboe@kernel.dk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Cleanup and improve MSG_RING performance | expand |
On 2024-03-29 13:09, Jens Axboe wrote: > Use the exported helper for queueing task_work, rather than rolling our > own. > > This improves peak performance of message passing by about 5x in some > basic testing, with 2 threads just sending messages to each other. > Before this change, it was capped at around 700K/sec, with the change > it's at over 4M/sec. > > Signed-off-by: Jens Axboe <axboe@kernel.dk> > --- > io_uring/msg_ring.c | 24 ++++++------------------ > 1 file changed, 6 insertions(+), 18 deletions(-) > > diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c > index d1f66a40b4b4..af8a5f2947b7 100644 > --- a/io_uring/msg_ring.c > +++ b/io_uring/msg_ring.c > @@ -13,7 +13,6 @@ > #include "filetable.h" > #include "msg_ring.h" > > - > /* All valid masks for MSG_RING */ > #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ > IORING_MSG_RING_FLAGS_PASS) > @@ -21,7 +20,6 @@ > struct io_msg { > struct file *file; > struct file *src_file; > - struct callback_head tw; > u64 user_data; > u32 len; > u32 cmd; > @@ -73,26 +71,18 @@ static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) > return current != target_ctx->submitter_task; > } > > -static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) > +static int io_msg_exec_remote(struct io_kiocb *req, io_req_tw_func_t func) > { > struct io_ring_ctx *ctx = req->file->private_data; > - struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); > - struct task_struct *task = READ_ONCE(ctx->submitter_task); > - > - if (unlikely(!task)) > - return -EOWNERDEAD; > - > - init_task_work(&msg->tw, func); > - if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) > - return -EOWNERDEAD; > > + req->io_task_work.func = func; > + io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); > return IOU_ISSUE_SKIP_COMPLETE; > } This part looks correct. Now with io_req_task_work_add_remote(), req->io_task_work.func is added to tctx->task_list, and queued up for execution on the remote ctx->submitter_task via task_work_add(). The end result is that the argument @func is executed on the remote ctx->submitter_task, which is the same outcome as before. Also, unsure how this hand rolled code interacted with defer taskrun before but now it is handled properly in io_req_task_work_add_remote(). > > -static void io_msg_tw_complete(struct callback_head *head) > +static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts) > { > - struct io_msg *msg = container_of(head, struct io_msg, tw); > - struct io_kiocb *req = cmd_to_io_kiocb(msg); > + struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); > struct io_ring_ctx *target_ctx = req->file->private_data; > int ret = 0; > > @@ -205,10 +195,8 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag > return ret; > } > > -static void io_msg_tw_fd_complete(struct callback_head *head) > +static void io_msg_tw_fd_complete(struct io_kiocb *req, struct io_tw_state *ts) > { > - struct io_msg *msg = container_of(head, struct io_msg, tw); > - struct io_kiocb *req = cmd_to_io_kiocb(msg); > int ret = -EOWNERDEAD; > > if (!(current->flags & PF_EXITING))
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index d1f66a40b4b4..af8a5f2947b7 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -13,7 +13,6 @@ #include "filetable.h" #include "msg_ring.h" - /* All valid masks for MSG_RING */ #define IORING_MSG_RING_MASK (IORING_MSG_RING_CQE_SKIP | \ IORING_MSG_RING_FLAGS_PASS) @@ -21,7 +20,6 @@ struct io_msg { struct file *file; struct file *src_file; - struct callback_head tw; u64 user_data; u32 len; u32 cmd; @@ -73,26 +71,18 @@ static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx) return current != target_ctx->submitter_task; } -static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func) +static int io_msg_exec_remote(struct io_kiocb *req, io_req_tw_func_t func) { struct io_ring_ctx *ctx = req->file->private_data; - struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); - struct task_struct *task = READ_ONCE(ctx->submitter_task); - - if (unlikely(!task)) - return -EOWNERDEAD; - - init_task_work(&msg->tw, func); - if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL)) - return -EOWNERDEAD; + req->io_task_work.func = func; + io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); return IOU_ISSUE_SKIP_COMPLETE; } -static void io_msg_tw_complete(struct callback_head *head) +static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts) { - struct io_msg *msg = container_of(head, struct io_msg, tw); - struct io_kiocb *req = cmd_to_io_kiocb(msg); + struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg); struct io_ring_ctx *target_ctx = req->file->private_data; int ret = 0; @@ -205,10 +195,8 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag return ret; } -static void io_msg_tw_fd_complete(struct callback_head *head) +static void io_msg_tw_fd_complete(struct io_kiocb *req, struct io_tw_state *ts) { - struct io_msg *msg = container_of(head, struct io_msg, tw); - struct io_kiocb *req = cmd_to_io_kiocb(msg); int ret = -EOWNERDEAD; if (!(current->flags & PF_EXITING))
Use the exported helper for queueing task_work, rather than rolling our own. This improves peak performance of message passing by about 5x in some basic testing, with 2 threads just sending messages to each other. Before this change, it was capped at around 700K/sec, with the change it's at over 4M/sec. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- io_uring/msg_ring.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-)