diff mbox series

io_uring/net: allow opportunistic initial bundle recv

Message ID 5fa6fc2f-b39f-4327-a195-61997d36b0e8@kernel.dk (mailing list archive)
State New
Headers show
Series io_uring/net: allow opportunistic initial bundle recv | expand

Commit Message

Jens Axboe Aug. 6, 2024, 5:49 p.m. UTC
For bundles, the initial recv operation is always just a single buffer,
as we don't yet know how much data is available in the socket. However,
this can lead to a somewhat imbalanced string of receives, where the
first recv gets a single buffer and the second gets a bunch.

Allow the initial peek operation to get up to 4 buffers, taking
advantage of the fact that there may be more data available, rather
than just doing a single buffer. This has been shown to work well across
a variety of recv workloads, as it's still cheap enough to do, while
ensuring that we do get to amortize the cost of traversing the network
stack and socket operations.

Link: https://github.com/axboe/liburing/issues/1197
Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv")
Signed-off-by: Jens Axboe <axboe@kernel.dk>

---

Comments

Jens Axboe Aug. 8, 2024, 5:56 p.m. UTC | #1
On 8/6/24 11:49 AM, Jens Axboe wrote:
> For bundles, the initial recv operation is always just a single buffer,
> as we don't yet know how much data is available in the socket. However,
> this can lead to a somewhat imbalanced string of receives, where the
> first recv gets a single buffer and the second gets a bunch.
> 
> Allow the initial peek operation to get up to 4 buffers, taking
> advantage of the fact that there may be more data available, rather
> than just doing a single buffer. This has been shown to work well across
> a variety of recv workloads, as it's still cheap enough to do, while
> ensuring that we do get to amortize the cost of traversing the network
> stack and socket operations.

FWIW, I dropped this one. Don't think it's clear cut enough.
diff mbox series

Patch

diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index c95dc1736dd9..2c052996c9bf 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -209,6 +209,7 @@  static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
 	int nr_iovs = arg->nr_iovs;
 	__u16 nr_avail, tail, head;
 	struct io_uring_buf *buf;
+	int needed = 0;
 
 	tail = smp_load_acquire(&br->tail);
 	head = bl->head;
@@ -218,19 +219,22 @@  static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
 
 	buf = io_ring_head_to_buf(br, head, bl->mask);
 	if (arg->max_len) {
-		int needed;
-
 		needed = (arg->max_len + buf->len - 1) / buf->len;
 		needed = min(needed, PEEK_MAX_IMPORT);
-		if (nr_avail > needed)
-			nr_avail = needed;
+	} else if (arg->max_vecs) {
+		needed = arg->max_vecs;
 	}
 
+	if (nr_avail > needed)
+		nr_avail = needed;
+
 	/*
-	 * only alloc a bigger array if we know we have data to map, eg not
-	 * a speculative peek operation.
+	 * Alloc a bigger array if we know we have data to map, or if a
+	 * a speculative peek operation tries to map more than what is
+	 * available.
 	 */
-	if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
+	if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs &&
+	    (arg->max_len || arg->max_vecs)) {
 		iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL);
 		if (unlikely(!iov))
 			return -ENOMEM;
@@ -238,7 +242,7 @@  static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
 			kfree(arg->iovs);
 		arg->iovs = iov;
 		nr_iovs = nr_avail;
-	} else if (nr_avail < nr_iovs) {
+	} else if (nr_iovs > nr_avail) {
 		nr_iovs = nr_avail;
 	}
 
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index b90aca3a57fa..8248ffda3a43 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -53,7 +53,8 @@  struct buf_sel_arg {
 	size_t out_len;
 	size_t max_len;
 	int nr_iovs;
-	int mode;
+	unsigned short mode;
+	unsigned short max_vecs;
 };
 
 void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
diff --git a/io_uring/net.c b/io_uring/net.c
index 594490a1389b..48667f3a2388 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1076,8 +1076,14 @@  static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
 			arg.mode |= KBUF_MODE_FREE;
 		}
 
+		/*
+		 * Use the passed back residual if we have it, if not allow
+		 * peeking of up to 4 buffers.
+		 */
 		if (kmsg->msg.msg_inq > 0)
 			arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
+		else
+			arg.max_vecs = 4;
 
 		ret = io_buffers_peek(req, &arg);
 		if (unlikely(ret < 0))