@@ -69,8 +69,18 @@ struct io_file_table {
unsigned int alloc_hint;
};
+struct io_alloc_cache {
+ void **entries;
+ unsigned int nr_cached;
+ unsigned int max_cached;
+ unsigned int elem_size;
+ unsigned int init_clear;
+};
+
struct io_buf_table {
struct io_rsrc_data data;
+ struct io_alloc_cache node_cache;
+ struct io_alloc_cache imu_cache;
};
struct io_hash_bucket {
@@ -224,14 +234,6 @@ struct io_submit_state {
struct blk_plug plug;
};
-struct io_alloc_cache {
- void **entries;
- unsigned int nr_cached;
- unsigned int max_cached;
- unsigned int elem_size;
- unsigned int init_clear;
-};
-
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
@@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (slot_index >= ctx->file_table.data.nr)
return -EINVAL;
- node = io_rsrc_node_alloc(IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node)
return -ENOMEM;
@@ -33,6 +33,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
#define IORING_MAX_FIXED_FILES (1U << 20)
#define IORING_MAX_REG_BUFFERS (1U << 14)
+#define IO_CACHED_BVECS_SEGS 32
+
int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
{
unsigned long page_limit, cur_pages, new_pages;
@@ -102,6 +104,22 @@ int io_buffer_validate(struct iovec *iov)
return 0;
}
+static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
+ int nr_bvecs)
+{
+ if (nr_bvecs <= IO_CACHED_BVECS_SEGS)
+ return io_cache_alloc(&ctx->buf_table.imu_cache, GFP_KERNEL);
+ return kvmalloc(struct_size_t(struct io_mapped_ubuf, bvec, nr_bvecs),
+ GFP_KERNEL);
+}
+
+static void io_free_imu(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
+{
+ if (imu->nr_bvecs > IO_CACHED_BVECS_SEGS ||
+ !io_alloc_cache_put(&ctx->buf_table.imu_cache, imu))
+ kvfree(imu);
+}
+
static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
{
struct io_mapped_ubuf *imu = node->buf;
@@ -120,22 +138,35 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
io_unaccount_mem(ctx, imu->acct_pages);
}
- kvfree(imu);
+ io_free_imu(ctx, imu);
}
-struct io_rsrc_node *io_rsrc_node_alloc(int type)
+struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
{
struct io_rsrc_node *node;
- node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (type == IORING_RSRC_FILE)
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+ else
+ node = io_cache_alloc(&ctx->buf_table.node_cache, GFP_KERNEL);
if (node) {
node->type = type;
node->refs = 1;
+ node->tag = 0;
+ node->file_ptr = 0;
}
return node;
}
-__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
+static __cold void __io_rsrc_data_free(struct io_rsrc_data *data)
+{
+ kvfree(data->nodes);
+ data->nodes = NULL;
+ data->nr = 0;
+}
+
+__cold void io_rsrc_data_free(struct io_ring_ctx *ctx,
+ struct io_rsrc_data *data)
{
if (!data->nr)
return;
@@ -143,9 +174,7 @@ __cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data
if (data->nodes[data->nr])
io_put_rsrc_node(ctx, data->nodes[data->nr]);
}
- kvfree(data->nodes);
- data->nodes = NULL;
- data->nr = 0;
+ __io_rsrc_data_free(data);
}
__cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr)
@@ -159,6 +188,31 @@ __cold int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr)
return -ENOMEM;
}
+static __cold int io_rsrc_buffer_alloc(struct io_buf_table *table, unsigned nr)
+{
+ const int imu_cache_size = struct_size_t(struct io_mapped_ubuf, bvec,
+ IO_CACHED_BVECS_SEGS);
+ const int node_size = sizeof(struct io_rsrc_node);
+ int ret;
+
+ ret = io_rsrc_data_alloc(&table->data, nr);
+ if (ret)
+ return ret;
+
+ if (io_alloc_cache_init(&table->node_cache, nr, node_size, 0))
+ goto free_data;
+
+ if (io_alloc_cache_init(&table->imu_cache, nr, imu_cache_size, 0))
+ goto free_cache;
+
+ return 0;
+free_cache:
+ io_alloc_cache_free(&table->node_cache, kfree);
+free_data:
+ __io_rsrc_data_free(&table->data);
+ return -ENOMEM;
+}
+
static int __io_sqe_files_update(struct io_ring_ctx *ctx,
struct io_uring_rsrc_update2 *up,
unsigned nr_args)
@@ -208,7 +262,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
err = -EBADF;
break;
}
- node = io_rsrc_node_alloc(IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node) {
err = -ENOMEM;
fput(file);
@@ -460,6 +514,8 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
case IORING_RSRC_BUFFER:
if (node->buf)
io_buffer_unmap(ctx, node);
+ if (io_alloc_cache_put(&ctx->buf_table.node_cache, node))
+ return;
break;
default:
WARN_ON_ONCE(1);
@@ -528,7 +584,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto fail;
}
ret = -ENOMEM;
- node = io_rsrc_node_alloc(IORING_RSRC_FILE);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
if (!node) {
fput(file);
goto fail;
@@ -548,11 +604,19 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
+static void io_rsrc_buffer_free(struct io_ring_ctx *ctx,
+ struct io_buf_table *table)
+{
+ io_rsrc_data_free(ctx, &table->data);
+ io_alloc_cache_free(&table->node_cache, kfree);
+ io_alloc_cache_free(&table->imu_cache, kfree);
+}
+
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
if (!ctx->buf_table.data.nr)
return -ENXIO;
- io_rsrc_data_free(ctx, &ctx->buf_table.data);
+ io_rsrc_buffer_free(ctx, &ctx->buf_table);
return 0;
}
@@ -733,7 +797,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
if (!iov->iov_base)
return NULL;
- node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!node)
return ERR_PTR(-ENOMEM);
node->buf = NULL;
@@ -753,7 +817,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
coalesced = io_coalesce_buffer(&pages, &nr_pages, &data);
}
- imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
+ imu = io_alloc_imu(ctx, nr_pages);
if (!imu)
goto done;
@@ -789,7 +853,7 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
}
done:
if (ret) {
- kvfree(imu);
+ io_free_imu(ctx, imu);
if (node)
io_put_rsrc_node(ctx, node);
node = ERR_PTR(ret);
@@ -802,9 +866,9 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned int nr_args, u64 __user *tags)
{
struct page *last_hpage = NULL;
- struct io_rsrc_data data;
struct iovec fast_iov, *iov = &fast_iov;
const struct iovec __user *uvec;
+ struct io_buf_table table;
int i, ret;
BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
@@ -813,13 +877,14 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
return -EBUSY;
if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
return -EINVAL;
- ret = io_rsrc_data_alloc(&data, nr_args);
+ ret = io_rsrc_buffer_alloc(&table, nr_args);
if (ret)
return ret;
if (!arg)
memset(iov, 0, sizeof(*iov));
+ ctx->buf_table = table;
for (i = 0; i < nr_args; i++) {
struct io_rsrc_node *node;
u64 tag = 0;
@@ -859,10 +924,8 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
}
node->tag = tag;
}
- data.nodes[i] = node;
+ table.data.nodes[i] = node;
}
-
- ctx->buf_table.data = data;
if (ret)
io_sqe_buffers_unregister(ctx);
return ret;
@@ -894,14 +957,15 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
goto unlock;
}
- node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
+ node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!node) {
ret = -ENOMEM;
goto unlock;
}
nr_bvecs = blk_rq_nr_phys_segments(rq);
- imu = kvmalloc(struct_size(imu, bvec, nr_bvecs), GFP_KERNEL);
+
+ imu = io_alloc_imu(ctx, nr_bvecs);
if (!imu) {
kfree(node);
ret = -ENOMEM;
@@ -1067,7 +1131,7 @@ static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
struct io_uring_clone_buffers *arg)
{
- struct io_rsrc_data data;
+ struct io_buf_table table;
int i, ret, off, nr;
unsigned int nbufs;
@@ -1098,7 +1162,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
return -EOVERFLOW;
- ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.data.nr));
+ ret = io_rsrc_buffer_alloc(&table, max(nbufs, ctx->buf_table.data.nr));
if (ret)
return ret;
@@ -1107,7 +1171,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
struct io_rsrc_node *src_node = ctx->buf_table.data.nodes[i];
if (src_node) {
- data.nodes[i] = src_node;
+ table.data.nodes[i] = src_node;
src_node->refs++;
}
}
@@ -1137,7 +1201,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
if (!src_node) {
dst_node = NULL;
} else {
- dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
+ dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
if (!dst_node) {
ret = -ENOMEM;
goto out_free;
@@ -1146,12 +1210,12 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
refcount_inc(&src_node->buf->refs);
dst_node->buf = src_node->buf;
}
- data.nodes[off++] = dst_node;
+ table.data.nodes[off++] = dst_node;
i++;
}
/*
- * If asked for replace, put the old table. data->nodes[] holds both
+ * If asked for replace, put the old table. table.data->nodes[] holds both
* old and new nodes at this point.
*/
if (arg->flags & IORING_REGISTER_DST_REPLACE)
@@ -1164,10 +1228,10 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
* entry).
*/
WARN_ON_ONCE(ctx->buf_table.data.nr);
- ctx->buf_table.data = data;
+ ctx->buf_table = table;
return 0;
out_free:
- io_rsrc_data_free(ctx, &data);
+ io_rsrc_buffer_free(ctx, &table);
return ret;
}
@@ -47,7 +47,7 @@ struct io_imu_folio_data {
unsigned int nr_folios;
};
-struct io_rsrc_node *io_rsrc_node_alloc(int type);
+struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);