@@ -2495,7 +2495,7 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
* Cannot safely flush overflowed CQEs from here, ensure we wake up
* the task, and the next invocation will do it.
*/
- if (io_should_wake(iowq) || io_has_work(iowq->ctx))
+ if (io_should_wake(iowq) || io_has_work(iowq->ctx) || iowq->hit_timeout)
return autoremove_wake_function(curr, mode, wake_flags, key);
return -1;
}
@@ -2523,6 +2523,37 @@ static bool current_pending_io(void)
return percpu_counter_read_positive(&tctx->inflight);
}
+static enum hrtimer_restart io_cqring_timer_wakeup(struct hrtimer *timer)
+{
+ struct io_wait_queue *iowq = container_of(timer, struct io_wait_queue, t);
+ struct io_ring_ctx *ctx = iowq->ctx;
+
+ WRITE_ONCE(iowq->hit_timeout, 1);
+ if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
+ wake_up_process(ctx->submitter_task);
+ else
+ io_cqring_wake(ctx);
+ return HRTIMER_NORESTART;
+}
+
+static int io_cqring_schedule_timeout(struct io_wait_queue *iowq)
+{
+ iowq->hit_timeout = 0;
+ hrtimer_init_on_stack(&iowq->t, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ iowq->t.function = io_cqring_timer_wakeup;
+ hrtimer_set_expires_range_ns(&iowq->t, iowq->timeout, 0);
+ hrtimer_start_expires(&iowq->t, HRTIMER_MODE_ABS);
+
+ if (!READ_ONCE(iowq->hit_timeout))
+ schedule();
+
+ hrtimer_cancel(&iowq->t);
+ destroy_hrtimer_on_stack(&iowq->t);
+ __set_current_state(TASK_RUNNING);
+
+ return READ_ONCE(iowq->hit_timeout) ? -ETIME : 0;
+}
+
static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
@@ -2536,10 +2567,10 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
io_wait = current->in_iowait;
if (current_pending_io())
current->in_iowait = 1;
- if (iowq->timeout == KTIME_MAX)
+ if (iowq->timeout != KTIME_MAX)
+ ret = io_cqring_schedule_timeout(iowq);
+ else
schedule();
- else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
- ret = -ETIME;
current->in_iowait = io_wait;
return ret;
}
@@ -40,7 +40,9 @@ struct io_wait_queue {
struct io_ring_ctx *ctx;
unsigned cq_tail;
unsigned nr_timeouts;
+ int hit_timeout;
ktime_t timeout;
+ struct hrtimer t;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int napi_busy_poll_to;
In preparation for having two distinct timeouts and avoid waking the task if we don't need to. Signed-off-by: Jens Axboe <axboe@kernel.dk> --- io_uring/io_uring.c | 39 +++++++++++++++++++++++++++++++++++---- io_uring/io_uring.h | 2 ++ 2 files changed, 37 insertions(+), 4 deletions(-)