diff mbox series

[8/8] io_uring: skip redundant poll wakeups

Message ID 20250128133927.3989681-9-max.kellermann@ionos.com (mailing list archive)
State New
Headers show
Series Various io_uring micro-optimizations (reducing lock contention) | expand

Commit Message

Max Kellermann Jan. 28, 2025, 1:39 p.m. UTC
Using io_uring with epoll is very expensive because every completion
leads to a __wake_up() call, most of which are unnecessary because the
polling process has already been woken up but has not had a chance to
process the completions.  During this time, wq_has_sleeper() still
returns true, therefore this check is not enough.

Perf diff for a HTTP server pushing a static file with splice() into
the TCP socket:

    37.91%     -2.00%  [kernel.kallsyms]     [k] queued_spin_lock_slowpath
     1.69%     -1.67%  [kernel.kallsyms]     [k] ep_poll_callback
     0.95%     +1.64%  [kernel.kallsyms]     [k] io_wq_free_work
     0.88%     -0.35%  [kernel.kallsyms]     [k] _raw_spin_lock_irqsave
     1.66%     +0.28%  [kernel.kallsyms]     [k] io_worker_handle_work
     1.14%     +0.18%  [kernel.kallsyms]     [k] _raw_spin_lock
     0.24%     -0.17%  [kernel.kallsyms]     [k] __wake_up

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 include/linux/io_uring_types.h | 10 ++++++++++
 io_uring/io_uring.c            |  4 ++++
 io_uring/io_uring.h            |  2 +-
 3 files changed, 15 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 623d8e798a11..01514cb76095 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -384,6 +384,16 @@  struct io_ring_ctx {
 	struct wait_queue_head		poll_wq;
 	struct io_restriction		restrictions;
 
+	/**
+	 * Non-zero if a process is waiting for #poll_wq and reset to
+	 * zero when #poll_wq is woken up.  This is supposed to
+	 * eliminate redundant wakeup calls.  Only checking
+	 * wq_has_sleeper() is not enough because it will return true
+	 * until the sleeper has actually woken up and has been
+	 * scheduled.
+	 */
+	atomic_t poll_wq_waiting;
+
 	u32			pers_next;
 	struct xarray		personalities;
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 137c2066c5a3..b65efd07e9f0 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2760,6 +2760,7 @@  static __cold void io_activate_pollwq_cb(struct callback_head *cb)
 	 * Wake ups for some events between start of polling and activation
 	 * might've been lost due to loose synchronisation.
 	 */
+	atomic_set_release(&ctx->poll_wq_waiting, 0);
 	wake_up_all(&ctx->poll_wq);
 	percpu_ref_put(&ctx->refs);
 }
@@ -2793,6 +2794,9 @@  static __poll_t io_uring_poll(struct file *file, poll_table *wait)
 
 	if (unlikely(!ctx->poll_activated))
 		io_activate_pollwq(ctx);
+
+	atomic_set(&ctx->poll_wq_waiting, 1);
+
 	/*
 	 * provides mb() which pairs with barrier from wq_has_sleeper
 	 * call in io_commit_cqring
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index f65e3f3ede51..186cee066f9f 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -287,7 +287,7 @@  static inline void io_commit_cqring(struct io_ring_ctx *ctx)
 
 static inline void io_poll_wq_wake(struct io_ring_ctx *ctx)
 {
-	if (wq_has_sleeper(&ctx->poll_wq))
+	if (wq_has_sleeper(&ctx->poll_wq) && atomic_xchg_release(&ctx->poll_wq_waiting, 0) > 0)
 		__wake_up(&ctx->poll_wq, TASK_NORMAL, 0,
 				poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
 }