@@ -201,7 +201,7 @@ again:
* Sync internal state with kernel ring state on the SQ side. Returns the
* number of pending items in the SQ ring, for the shared ring.
*/
-unsigned __io_uring_flush_sq(struct io_uring *ring)
+static unsigned __io_uring_flush_sq(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
unsigned tail = sq->sqe_tail;
@@ -266,3 +266,36 @@ bool t_probe_defer_taskrun(void)
io_uring_queue_exit(&ring);
return true;
}
+
+/*
+ * Sync internal state with kernel ring state on the SQ side. Returns the
+ * number of pending items in the SQ ring, for the shared ring.
+ */
+unsigned __io_uring_flush_sq(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ unsigned tail = sq->sqe_tail;
+
+ if (sq->sqe_head != tail) {
+ sq->sqe_head = tail;
+ /*
+ * Ensure kernel sees the SQE updates before the tail update.
+ */
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
+ IO_URING_WRITE_ONCE(*sq->ktail, tail);
+ else
+ io_uring_smp_store_release(sq->ktail, tail);
+ }
+ /*
+ * This _may_ look problematic, as we're not supposed to be reading
+ * SQ->head without acquire semantics. When we're in SQPOLL mode, the
+ * kernel submitter could be updating this right now. For non-SQPOLL,
+ * task itself does it, and there's no potential race. But even for
+ * SQPOLL, the load is going to be potentially out-of-date the very
+ * instant it's done, regardless or whether or not it's done
+ * atomically. Worst case, we're going to be over-estimating what
+ * we can submit. The point is, we need to be able to deal with this
+ * situation regardless of any perceived atomicity.
+ */
+ return tail - *sq->khead;
+}
@@ -85,6 +85,8 @@ enum t_setup_ret t_register_buffers(struct io_uring *ring,
bool t_probe_defer_taskrun(void);
+unsigned __io_uring_flush_sq(struct io_uring *ring);
+
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#ifdef __cplusplus
@@ -268,8 +268,6 @@ static int test_io(const char *file, int tc, int read, int sqthread,
return ret;
}
-extern unsigned __io_uring_flush_sq(struct io_uring *ring);
-
/*
* Send a passthrough command that nvme will fail during submission.
* This comes handy for testing error handling.
@@ -201,8 +201,6 @@ err:
return 1;
}
-extern unsigned __io_uring_flush_sq(struct io_uring *ring);
-
/*
* if we are polling io_uring_submit needs to always enter the
* kernel to fetch events