[7/8] io_uring: cache io_kiocb->flags in variable

Message ID	20250128133927.3989681-8-max.kellermann@ionos.com (mailing list archive)
State	New
Headers	show Received: from mail-wr1-f43.google.com (mail-wr1-f43.google.com [209.85.221.43]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8D5BD1A83E8 for <io-uring@vger.kernel.org>; Tue, 28 Jan 2025 13:39:44 +0000 (UTC) From: Max Kellermann <max.kellermann@ionos.com> To: axboe@kernel.dk, asml.silence@gmail.com, io-uring@vger.kernel.org, linux-kernel@vger.kernel.org Cc: Max Kellermann <max.kellermann@ionos.com> Subject: [PATCH 7/8] io_uring: cache io_kiocb->flags in variable Date: Tue, 28 Jan 2025 14:39:26 +0100 Message-ID: <20250128133927.3989681-8-max.kellermann@ionos.com> In-Reply-To: <20250128133927.3989681-1-max.kellermann@ionos.com> References: <20250128133927.3989681-1-max.kellermann@ionos.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	Various io_uring micro-optimizations (reducing lock contention) \| expand [0/8] Various io_uring micro-optimizations (reducing lock contention) [1/8] io_uring/io-wq: eliminate redundant io_work_get_acct() calls [2/8] io_uring/io-wq: add io_worker.acct pointer [3/8] io_uring/io-wq: move worker lists to struct io_wq_acct [4/8] io_uring/io-wq: cache work->flags in variable [5/8] io_uring/io-wq: do not use bogus hash value [6/8] io_uring/io-wq: pass io_wq to io_get_next_work() [7/8] io_uring: cache io_kiocb->flags in variable [8/8] io_uring: skip redundant poll wakeups

Message ID

20250128133927.3989681-8-max.kellermann@ionos.com (mailing list archive)

State

New

Headers

From: Max Kellermann <max.kellermann@ionos.com>
To: axboe@kernel.dk,
	asml.silence@gmail.com,
	io-uring@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Max Kellermann <max.kellermann@ionos.com>
Subject: [PATCH 7/8] io_uring: cache io_kiocb->flags in variable
Date: Tue, 28 Jan 2025 14:39:26 +0100
Message-ID: <20250128133927.3989681-8-max.kellermann@ionos.com>
In-Reply-To: <20250128133927.3989681-1-max.kellermann@ionos.com>
References: <20250128133927.3989681-1-max.kellermann@ionos.com>
Precedence: bulk
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit

Series

Various io_uring micro-optimizations (reducing lock contention) | expand

Commit Message

Max Kellermann Jan. 28, 2025, 1:39 p.m. UTC

This eliminates several redundant reads, some of which probably cannot
be optimized away by the compiler.

Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
---
 io_uring/io_uring.c | 59 +++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 24 deletions(-)

Comments

Pavel Begunkov Jan. 29, 2025, 7:11 p.m. UTC | #1

On 1/28/25 13:39, Max Kellermann wrote:
> This eliminates several redundant reads, some of which probably cannot
> be optimized away by the compiler.

Let's not, it hurts readability with no clear benefits. In most cases
the compiler will be able to optimise it just where it matters, and
in cold paths we're comparing the overhead of reading a cached variable
with taking locks and doing indirect calls, and even then it'd likely
need to be saved onto the stack and loaded back.

The only place where it might be worth it is io_issue_sqe(), and
even then I'd doubt it.

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7bfbc7c22367..137c2066c5a3 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -391,28 +391,30 @@  static bool req_need_defer(struct io_kiocb *req, u32 seq)
 
 static void io_clean_op(struct io_kiocb *req)
 {
-	if (req->flags & REQ_F_BUFFER_SELECTED) {
+	const unsigned int req_flags = req->flags;
+
+	if (req_flags & REQ_F_BUFFER_SELECTED) {
 		spin_lock(&req->ctx->completion_lock);
 		io_kbuf_drop(req);
 		spin_unlock(&req->ctx->completion_lock);
 	}
 
-	if (req->flags & REQ_F_NEED_CLEANUP) {
+	if (req_flags & REQ_F_NEED_CLEANUP) {
 		const struct io_cold_def *def = &io_cold_defs[req->opcode];
 
 		if (def->cleanup)
 			def->cleanup(req);
 	}
-	if ((req->flags & REQ_F_POLLED) && req->apoll) {
+	if ((req_flags & REQ_F_POLLED) && req->apoll) {
 		kfree(req->apoll->double_poll);
 		kfree(req->apoll);
 		req->apoll = NULL;
 	}
-	if (req->flags & REQ_F_INFLIGHT)
+	if (req_flags & REQ_F_INFLIGHT)
 		atomic_dec(&req->tctx->inflight_tracked);
-	if (req->flags & REQ_F_CREDS)
+	if (req_flags & REQ_F_CREDS)
 		put_cred(req->creds);
-	if (req->flags & REQ_F_ASYNC_DATA) {
+	if (req_flags & REQ_F_ASYNC_DATA) {
 		kfree(req->async_data);
 		req->async_data = NULL;
 	}
@@ -453,31 +455,37 @@  static noinline void __io_arm_ltimeout(struct io_kiocb *req)
 	io_queue_linked_timeout(__io_prep_linked_timeout(req));
 }
 
-static inline void io_arm_ltimeout(struct io_kiocb *req)
+static inline void _io_arm_ltimeout(struct io_kiocb *req, unsigned int req_flags)
 {
-	if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
+	if (unlikely(req_flags & REQ_F_ARM_LTIMEOUT))
 		__io_arm_ltimeout(req);
 }
 
+static inline void io_arm_ltimeout(struct io_kiocb *req)
+{
+	_io_arm_ltimeout(req, req->flags);
+}
+
 static void io_prep_async_work(struct io_kiocb *req)
 {
+	unsigned int req_flags = req->flags;
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
 	struct io_ring_ctx *ctx = req->ctx;
 
-	if (!(req->flags & REQ_F_CREDS)) {
-		req->flags |= REQ_F_CREDS;
+	if (!(req_flags & REQ_F_CREDS)) {
+		req_flags = req->flags |= REQ_F_CREDS;
 		req->creds = get_current_cred();
 	}
 
 	req->work.list.next = NULL;
 	atomic_set(&req->work.flags, 0);
-	if (req->flags & REQ_F_FORCE_ASYNC)
+	if (req_flags & REQ_F_FORCE_ASYNC)
 		atomic_or(IO_WQ_WORK_CONCURRENT, &req->work.flags);
 
-	if (req->file && !(req->flags & REQ_F_FIXED_FILE))
-		req->flags |= io_file_get_flags(req->file);
+	if (req->file && !(req_flags & REQ_F_FIXED_FILE))
+		req_flags = req->flags |= io_file_get_flags(req->file);
 
-	if (req->file && (req->flags & REQ_F_ISREG)) {
+	if (req->file && (req_flags & REQ_F_ISREG)) {
 		bool should_hash = def->hash_reg_file;
 
 		/* don't serialize this request if the fs doesn't need it */
@@ -1703,13 +1711,14 @@  static __cold void io_drain_req(struct io_kiocb *req)
 	spin_unlock(&ctx->completion_lock);
 }
 
-static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
+static bool io_assign_file(struct io_kiocb *req, unsigned int req_flags,
+			   const struct io_issue_def *def,
 			   unsigned int issue_flags)
 {
 	if (req->file || !def->needs_file)
 		return true;
 
-	if (req->flags & REQ_F_FIXED_FILE)
+	if (req_flags & REQ_F_FIXED_FILE)
 		req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags);
 	else
 		req->file = io_file_get_normal(req, req->cqe.fd);
@@ -1719,14 +1728,15 @@  static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
 
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 {
+	const unsigned int req_flags = req->flags;
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
 	const struct cred *creds = NULL;
 	int ret;
 
-	if (unlikely(!io_assign_file(req, def, issue_flags)))
+	if (unlikely(!io_assign_file(req, req_flags, def, issue_flags)))
 		return -EBADF;
 
-	if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
+	if (unlikely((req_flags & REQ_F_CREDS) && req->creds != current_cred()))
 		creds = override_creds(req->creds);
 
 	if (!def->audit_skip)
@@ -1783,18 +1793,19 @@  struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
 void io_wq_submit_work(struct io_wq_work *work)
 {
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+	const unsigned int req_flags = req->flags;
 	const struct io_issue_def *def = &io_issue_defs[req->opcode];
 	unsigned int issue_flags = IO_URING_F_UNLOCKED | IO_URING_F_IOWQ;
 	bool needs_poll = false;
 	int ret = 0, err = -ECANCELED;
 
 	/* one will be dropped by ->io_wq_free_work() after returning to io-wq */
-	if (!(req->flags & REQ_F_REFCOUNT))
+	if (!(req_flags & REQ_F_REFCOUNT))
 		__io_req_set_refcount(req, 2);
 	else
 		req_ref_get(req);
 
-	io_arm_ltimeout(req);
+	_io_arm_ltimeout(req, req_flags);
 
 	/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
 	if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
@@ -1802,7 +1813,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 		io_req_task_queue_fail(req, err);
 		return;
 	}
-	if (!io_assign_file(req, def, issue_flags)) {
+	if (!io_assign_file(req, req_flags, def, issue_flags)) {
 		err = -EBADF;
 		atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
 		goto fail;
@@ -1816,7 +1827,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 	 * Don't allow any multishot execution from io-wq. It's more restrictive
 	 * than necessary and also cleaner.
 	 */
-	if (req->flags & REQ_F_APOLL_MULTISHOT) {
+	if (req_flags & REQ_F_APOLL_MULTISHOT) {
 		err = -EBADFD;
 		if (!io_file_can_poll(req))
 			goto fail;
@@ -1831,7 +1842,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 		}
 	}
 
-	if (req->flags & REQ_F_FORCE_ASYNC) {
+	if (req_flags & REQ_F_FORCE_ASYNC) {
 		bool opcode_poll = def->pollin || def->pollout;
 
 		if (opcode_poll && io_file_can_poll(req)) {
@@ -1849,7 +1860,7 @@  void io_wq_submit_work(struct io_wq_work *work)
 		 * If REQ_F_NOWAIT is set, then don't wait or retry with
 		 * poll. -EAGAIN is final for that case.
 		 */
-		if (req->flags & REQ_F_NOWAIT)
+		if (req_flags & REQ_F_NOWAIT)
 			break;
 
 		/*

[7/8] io_uring: cache io_kiocb->flags in variable

Commit Message

Comments

Patch