diff mbox series

[for-next] io_uring: do not double call_rcu with eventfd

Message ID 20220901093232.1971404-1-dylany@fb.com (mailing list archive)
State New
Headers show
Series [for-next] io_uring: do not double call_rcu with eventfd | expand

Commit Message

Dylan Yudaken Sept. 1, 2022, 9:32 a.m. UTC
It is not allowed to use call_rcu twice with the same rcu head. This could
have happened with multiple signals occurring concurrently.

Instead keep track of ops in a bitset and only queue up the call if it is
not already queued up.

The refcounting is still required since as far as I can tell there is
otherwise no protection from a call to io_eventfd_ops being started and
before it completes another call being started.

Fixes: "io_uring: signal registered eventfd to process deferred task work"
Signed-off-by: Dylan Yudaken <dylany@fb.com>
---

Note I did not put a hash in the Fixes tag as it has not yet been merged.
You could also just merge it into that commit if you like.

Dylan

 include/linux/io_uring_types.h |  1 +
 io_uring/io_uring.c            | 41 +++++++++++++++++++---------------
 2 files changed, 24 insertions(+), 18 deletions(-)


base-commit: 32bde07ca566822d14f5faadcce86629d89b072b

Comments

Jens Axboe Sept. 1, 2022, 3:20 p.m. UTC | #1
On 9/1/22 3:32 AM, Dylan Yudaken wrote:
> It is not allowed to use call_rcu twice with the same rcu head. This could
> have happened with multiple signals occurring concurrently.
> 
> Instead keep track of ops in a bitset and only queue up the call if it is
> not already queued up.
> 
> The refcounting is still required since as far as I can tell there is
> otherwise no protection from a call to io_eventfd_ops being started and
> before it completes another call being started.
> 
> Fixes: "io_uring: signal registered eventfd to process deferred task work"
> Signed-off-by: Dylan Yudaken <dylany@fb.com>
> ---
> 
> Note I did not put a hash in the Fixes tag as it has not yet been merged.
> You could also just merge it into that commit if you like.

I folded it into that commit, thanks!
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 42494176434a..aa4d90a53866 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -185,6 +185,7 @@  struct io_ev_fd {
 	unsigned int		eventfd_async: 1;
 	struct rcu_head		rcu;
 	atomic_t		refs;
+	atomic_t		ops;
 };
 
 struct io_alloc_cache {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cdd8d10e9638..15c7b2f4c5a3 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -125,6 +125,11 @@  enum {
 	IO_CHECK_CQ_DROPPED_BIT,
 };
 
+enum {
+	IO_EVENTFD_OP_SIGNAL_BIT,
+	IO_EVENTFD_OP_FREE_BIT,
+};
+
 struct io_defer_entry {
 	struct list_head	list;
 	struct io_kiocb		*req;
@@ -479,29 +484,24 @@  static __cold void io_queue_deferred(struct io_ring_ctx *ctx)
 }
 
 
-static inline void __io_eventfd_put(struct io_ev_fd *ev_fd)
+static void io_eventfd_ops(struct rcu_head *rcu)
 {
+	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
+	int ops = atomic_xchg(&ev_fd->ops, 0);
+
+	if (ops & BIT(IO_EVENTFD_OP_SIGNAL_BIT))
+		eventfd_signal(ev_fd->cq_ev_fd, 1);
+
+	/* IO_EVENTFD_OP_FREE_BIT may not be set here depending on callback
+	 * ordering in a race but if references are 0 we know we have to free
+	 * it regardless.
+	 */
 	if (atomic_dec_and_test(&ev_fd->refs)) {
 		eventfd_ctx_put(ev_fd->cq_ev_fd);
 		kfree(ev_fd);
 	}
 }
 
-static void io_eventfd_signal_put(struct rcu_head *rcu)
-{
-	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
-	eventfd_signal(ev_fd->cq_ev_fd, 1);
-	__io_eventfd_put(ev_fd);
-}
-
-static void io_eventfd_put(struct rcu_head *rcu)
-{
-	struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu);
-
-	__io_eventfd_put(ev_fd);
-}
-
 static void io_eventfd_signal(struct io_ring_ctx *ctx)
 {
 	struct io_ev_fd *ev_fd = NULL;
@@ -529,7 +529,10 @@  static void io_eventfd_signal(struct io_ring_ctx *ctx)
 		eventfd_signal(ev_fd->cq_ev_fd, 1);
 	} else {
 		atomic_inc(&ev_fd->refs);
-		call_rcu(&ev_fd->rcu, io_eventfd_signal_put);
+		if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops))
+			call_rcu(&ev_fd->rcu, io_eventfd_ops);
+		else
+			atomic_dec(&ev_fd->refs);
 	}
 
 out:
@@ -2509,6 +2512,7 @@  static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
 	ctx->has_evfd = true;
 	rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
 	atomic_set(&ev_fd->refs, 1);
+	atomic_set(&ev_fd->ops, 0);
 	return 0;
 }
 
@@ -2521,7 +2525,8 @@  static int io_eventfd_unregister(struct io_ring_ctx *ctx)
 	if (ev_fd) {
 		ctx->has_evfd = false;
 		rcu_assign_pointer(ctx->io_ev_fd, NULL);
-		call_rcu(&ev_fd->rcu, io_eventfd_put);
+		if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_FREE_BIT), &ev_fd->ops))
+			call_rcu(&ev_fd->rcu, io_eventfd_ops);
 		return 0;
 	}