diff mbox series

[7/8] io_uring: cache io_kiocb->flags in variable

Message ID 20250128133927.3989681-8-max.kellermann@ionos.com (mailing list archive)
State New
Headers show
Series Various io_uring micro-optimizations (reducing lock contention) | expand

Commit Message

Max Kellermann Jan. 28, 2025, 1:39 p.m. UTC
This eliminates several redundant reads, some of which probably cannot
be optimized away by the compiler.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 io_uring/io_uring.c | 59 +++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 24 deletions(-)

Comments

Pavel Begunkov Jan. 29, 2025, 7:11 p.m. UTC | #1
On 1/28/25 13:39, Max Kellermann wrote:
> This eliminates several redundant reads, some of which probably cannot
> be optimized away by the compiler.

Let's not, it hurts readability with no clear benefits. In most cases
the compiler will be able to optimise it just where it matters, and
in cold paths we're comparing the overhead of reading a cached variable
with taking locks and doing indirect calls, and even then it'd likely
need to be saved onto the stack and loaded back.

The only place where it might be worth it is io_issue_sqe(), and
even then I'd doubt it.
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7bfbc7c22367..137c2066c5a3 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -391,28 +391,30 @@  static bool req_need_defer(struct io_kiocb *req, u32 seq)
 
 static void io_clean_op(struct io_kiocb *req)
 {
-	if (req->flags & REQ_F_BUFFER_SELECTED) {
+	const unsigned int req_flags = req->flags;
+
+	if (req_flags & REQ_F_BUFFER_SELECTED) {
 		spin_lock(&req->ctx->completion_lock);
 		io_kbuf_drop(req);
 		spin_unlock(&req->ctx->completion_lock);
 	}
 
-	if (req->flags & REQ_F_NEED_CLEANUP) {
+	if (req_flags & REQ_F_NEED_CLEANUP) {
 		const struct io_cold_def *def = &io_cold_defs[req->opcode];
 
 		if (def->cleanup)
 			def->cleanup(req);
 	}
-	if ((req->flags & REQ_F_POLLED) && req->apoll) {
+	if ((req_flags & REQ_F_POLLED) && req->apoll) {
 		kfree(req->apoll->double_poll);
 		kfree(req->apoll);
 		req->apoll = NULL;
 	}
-	if (req->flags & REQ_F_INFLIGHT)
+	if (req_flags & REQ_F_INFLIGHT)
 		atomic_dec(&req->tctx->inflight_tracked);
-	if (req->flags & REQ_F_CREDS)
+	if (req_flags & REQ_F_CREDS)
 		put_cred(req->creds);
-	if (req->flags & REQ_F_ASYNC_DATA) {
+	if (req_flags & REQ_F_ASYNC_DATA) {
 		kfree(req->async_data);
 		req->async_data = NULL;
 	}
@@ -453,31 +455,37 @@  static noinline void __io_arm_ltimeout(struct io_kiocb *req)
 	io_queue_linked_timeout(__io_prep_linked_timeout(req));
 }
 
-static inline void io_arm_ltimeout(struct io_kiocb *req)
+static inline void _io_arm_ltimeout(struct io_kiocb *req, unsigned int req_flags)
 {
-	if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
+	if (unlikely(req_flags & REQ_F_ARM_LTIMEOUT))
 		__io_arm_ltimeout(req);
 }
 
+static inline void io_arm_ltimeout(struct io_kiocb *req)
+{
+	_io_arm_ltimeout(req, req->flags);
+}
+
 static void io_prep_async_work(struct io_kiocb *req)
 {
+	unsigned int req_flags = req->flags;
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
 
-	if (!(req->flags & REQ_F_CREDS)) {
-		req->flags |= REQ_F_CREDS;
+	if (!(req_flags & REQ_F_CREDS)) {
+		req_flags = req->flags |= REQ_F_CREDS;
 		req->creds = get_current_cred();
 	}
 
 	req->work.list.next = NULL;
 	atomic_set(&req->work.flags, 0);
-	if (req->flags & REQ_F_FORCE_ASYNC)
+	if (req_flags & REQ_F_FORCE_ASYNC)
 		atomic_or(IO_WQ_WORK_CONCURRENT, &req->work.flags);
 
-	if (req->file && !(req->flags & REQ_F_FIXED_FILE))
-		req->flags |= io_file_get_flags(req->file);
+	if (req->file && !(req_flags & REQ_F_FIXED_FILE))
+		req_flags = req->flags |= io_file_get_flags(req->file);
 
-	if (req->file && (req->flags & REQ_F_ISREG)) {
+	if (req->file && (req_flags & REQ_F_ISREG)) {
 		bool should_hash = def->hash_reg_file;
 
 		/* don't serialize this request if the fs doesn't need it */
@@ -1703,13 +1711,14 @@  static __cold void io_drain_req(struct io_kiocb *req)
 	spin_unlock(&ctx->completion_lock);
 }
 
-static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
+static bool io_assign_file(struct io_kiocb *req, unsigned int req_flags,
+			   const struct io_issue_def *def,
 			   unsigned int issue_flags)
 {
 	if (req->file || !def->needs_file)
 		return true;
 
-	if (req->flags & REQ_F_FIXED_FILE)
+	if (req_flags & REQ_F_FIXED_FILE)
 		req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
 	else
 		req->file = io_file_get_normal(req, req->cqe.fd);
@@ -1719,14 +1728,15 @@  static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
 
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 {
+	const unsigned int req_flags = req->flags;
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
 	const struct cred *creds = NULL;
 	int ret;
 
-	if (unlikely(!io_assign_file(req, def, issue_flags)))
+	if (unlikely(!io_assign_file(req, req_flags, def, issue_flags)))
 		return -EBADF;
 
-	if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
+	if (unlikely((req_flags & REQ_F_CREDS) && req->creds != current_cred()))
 		creds = override_creds(req->creds);
 
 	if (!def->audit_skip)
@@ -1783,18 +1793,19 @@  struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
 void io_wq_submit_work(struct io_wq_work *work)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+	const unsigned int req_flags = req->flags;
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
 	unsigned int issue_flags = IO_URING_F_UNLOCKED | IO_URING_F_IOWQ;
 	bool needs_poll = false;
 	int ret = 0, err = -ECANCELED;
 
 	/* one will be dropped by ->io_wq_free_work() after returning to io-wq */
-	if (!(req->flags & REQ_F_REFCOUNT))
+	if (!(req_flags & REQ_F_REFCOUNT))
 		__io_req_set_refcount(req, 2);
 	else
 		req_ref_get(req);
 
-	io_arm_ltimeout(req);
+	_io_arm_ltimeout(req, req_flags);
 
 	/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
 	if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
@@ -1802,7 +1813,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 		io_req_task_queue_fail(req, err);
 		return;
 	}
-	if (!io_assign_file(req, def, issue_flags)) {
+	if (!io_assign_file(req, req_flags, def, issue_flags)) {
 		err = -EBADF;
 		atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
 		goto fail;
@@ -1816,7 +1827,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 	 * Don't allow any multishot execution from io-wq. It's more restrictive
 	 * than necessary and also cleaner.
 	 */
-	if (req->flags & REQ_F_APOLL_MULTISHOT) {
+	if (req_flags & REQ_F_APOLL_MULTISHOT) {
 		err = -EBADFD;
 		if (!io_file_can_poll(req))
 			goto fail;
@@ -1831,7 +1842,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 		}
 	}
 
-	if (req->flags & REQ_F_FORCE_ASYNC) {
+	if (req_flags & REQ_F_FORCE_ASYNC) {
 		bool opcode_poll = def->pollin || def->pollout;
 
 		if (opcode_poll && io_file_can_poll(req)) {
@@ -1849,7 +1860,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 		 * If REQ_F_NOWAIT is set, then don't wait or retry with
 		 * poll. -EAGAIN is final for that case.
 		 */
-		if (req->flags & REQ_F_NOWAIT)
+		if (req_flags & REQ_F_NOWAIT)
 			break;
 
 		/*