@@ -185,6 +185,7 @@ struct io_ev_fd {
unsigned int eventfd_async: 1;
struct rcu_head rcu;
atomic_t refs;
+ atomic_t ops;
};
struct io_alloc_cache {
@@ -125,6 +125,11 @@ enum {
IO_CHECK_CQ_DROPPED_BIT,
};
+enum {
+ IO_EVENTFD_OP_SIGNAL_BIT,
+ IO_EVENTFD_OP_FREE_BIT,
+};
+
struct io_defer_entry {
struct list_head list;
struct io_kiocb *req;
@@ -479,29 +484,24 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
}
-static inline void __io_eventfd_put(struct io_ev_fd *ev_fd)
+static void io_eventfd_ops(struct rcu_head *rcu)
{
+ struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+ int ops = atomic_xchg(&ev_fd->ops, 0);
+
+ if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
+ eventfd_signal(ev_fd->cq_ev_fd, 1);
+
+ /* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
+ * ordering in a race but if references are 0 we know we have to free
+ * it regardless.
+ */
if (atomic_dec_and_test(&ev_fd->refs)) {
eventfd_ctx_put(ev_fd->cq_ev_fd);
kfree(ev_fd);
}
}
-static void io_eventfd_signal_put(struct rcu_head *rcu)
-{
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
- eventfd_signal(ev_fd->cq_ev_fd, 1);
- __io_eventfd_put(ev_fd);
-}
-
-static void io_eventfd_put(struct rcu_head *rcu)
-{
- struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
- __io_eventfd_put(ev_fd);
-}
-
static void io_eventfd_signal(struct io_ring_ctx *ctx)
{
struct io_ev_fd *ev_fd = NULL;
@@ -529,7 +529,10 @@ static void io_eventfd_signal(struct io_ring_ctx *ctx)
eventfd_signal(ev_fd->cq_ev_fd, 1);
} else {
atomic_inc(&ev_fd->refs);
- call_rcu(&ev_fd->rcu, io_eventfd_signal_put);
+ if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
+ call_rcu(&ev_fd->rcu, io_eventfd_ops);
+ else
+ atomic_dec(&ev_fd->refs);
}
out:
@@ -2509,6 +2512,7 @@ static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
ctx->has_evfd = true;
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
atomic_set(&ev_fd->refs, 1);
+ atomic_set(&ev_fd->ops, 0);
return 0;
}
@@ -2521,7 +2525,8 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
if (ev_fd) {
ctx->has_evfd = false;
rcu_assign_pointer(ctx->io_ev_fd, NULL);
- call_rcu(&ev_fd->rcu, io_eventfd_put);
+ if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops))
+ call_rcu(&ev_fd->rcu, io_eventfd_ops);
return 0;
}
It is not allowed to use call_rcu twice with the same rcu head. This could have happened with multiple signals occurring concurrently. Instead keep track of ops in a bitset and only queue up the call if it is not already queued up. The refcounting is still required since as far as I can tell there is otherwise no protection from a call to io_eventfd_ops being started and before it completes another call being started. Fixes: "io_uring: signal registered eventfd to process deferred task work" Signed-off-by: Dylan Yudaken <dylany@fb.com> --- Note I did not put a hash in the Fixes tag as it has not yet been merged. You could also just merge it into that commit if you like. Dylan include/linux/io_uring_types.h | 1 + io_uring/io_uring.c | 41 +++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 18 deletions(-) base-commit: 32bde07ca566822d14f5faadcce86629d89b072b