@@ -209,6 +209,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
int nr_iovs = arg->nr_iovs;
__u16 nr_avail, tail, head;
struct io_uring_buf *buf;
+ int needed = 0;
tail = smp_load_acquire(&br->tail);
head = bl->head;
@@ -218,19 +219,22 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
buf = io_ring_head_to_buf(br, head, bl->mask);
if (arg->max_len) {
- int needed;
-
needed = (arg->max_len + buf->len - 1) / buf->len;
needed = min(needed, PEEK_MAX_IMPORT);
- if (nr_avail > needed)
- nr_avail = needed;
+ } else if (arg->max_vecs) {
+ needed = arg->max_vecs;
}
+ if (nr_avail > needed)
+ nr_avail = needed;
+
/*
- * only alloc a bigger array if we know we have data to map, eg not
- * a speculative peek operation.
+ * Alloc a bigger array if we know we have data to map, or if a
+ * a speculative peek operation tries to map more than what is
+ * available.
*/
- if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
+ if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs &&
+ (arg->max_len || arg->max_vecs)) {
iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL);
if (unlikely(!iov))
return -ENOMEM;
@@ -238,7 +242,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
kfree(arg->iovs);
arg->iovs = iov;
nr_iovs = nr_avail;
- } else if (nr_avail < nr_iovs) {
+ } else if (nr_iovs > nr_avail) {
nr_iovs = nr_avail;
}
@@ -53,7 +53,8 @@ struct buf_sel_arg {
size_t out_len;
size_t max_len;
int nr_iovs;
- int mode;
+ unsigned short mode;
+ unsigned short max_vecs;
};
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
@@ -1076,8 +1076,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
arg.mode |= KBUF_MODE_FREE;
}
+ /*
+ * Use the passed back residual if we have it, if not allow
+ * peeking of up to 4 buffers.
+ */
if (kmsg->msg.msg_inq > 0)
arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
+ else
+ arg.max_vecs = 4;
ret = io_buffers_peek(req, &arg);
if (unlikely(ret < 0))
For bundles, the initial recv operation is always just a single buffer, as we don't yet know how much data is available in the socket. However, this can lead to a somewhat imbalanced string of receives, where the first recv gets a single buffer and the second gets a bunch. Allow the initial peek operation to get up to 4 buffers, taking advantage of the fact that there may be more data available, rather than just doing a single buffer. This has been shown to work well across a variety of recv workloads, as it's still cheap enough to do, while ensuring that we do get to amortize the cost of traversing the network stack and socket operations. Link: https://github.com/axboe/liburing/issues/1197 Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv") Signed-off-by: Jens Axboe <axboe@kernel.dk> ---