@@ -582,6 +582,7 @@ struct io_overflow_cqe {
struct io_zctap_ifq {
struct net_device *dev;
struct io_ring_ctx *ctx;
+ void *region; /* XXX relocate? */
u16 queue_id;
u16 id;
u16 fill_bgid;
@@ -206,6 +206,7 @@ enum io_uring_op {
IORING_OP_SOCKET,
IORING_OP_URING_CMD,
IORING_OP_SEND_ZC,
+ IORING_OP_PROVIDE_IFQ_REGION,
/* this goes last, obviously */
IORING_OP_LAST,
@@ -33,6 +33,7 @@
#include "poll.h"
#include "cancel.h"
#include "rw.h"
+#include "zctap.h"
static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags)
{
@@ -488,6 +489,14 @@ const struct io_op_def io_op_defs[] = {
.prep = io_eopnotsupp_prep,
#endif
},
+ [IORING_OP_PROVIDE_IFQ_REGION] = {
+ .audit_skip = 1,
+ .iopoll = 1,
+ .buffer_select = 1,
+ .name = "PROVIDE_IFQ_REGION",
+ .prep = io_provide_ifq_region_prep,
+ .issue = io_provide_ifq_region,
+ },
};
const char *io_uring_get_opcode(u8 opcode)
@@ -6,11 +6,14 @@
#include <linux/mm.h>
#include <linux/io_uring.h>
#include <linux/netdevice.h>
+#include <linux/nospec.h>
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
#include "zctap.h"
+#include "rsrc.h"
+#include "kbuf.h"
static DEFINE_XARRAY_ALLOC1(io_zctap_ifq_xa);
@@ -144,3 +147,96 @@ int io_unregister_ifq(struct io_ring_ctx *ctx,
return io_unregister_zctap_ifq(ctx, req.ifq_id);
}
+
+struct io_ifq_region {
+ struct file *file;
+ struct io_zctap_ifq *ifq;
+ __u64 addr;
+ __u32 len;
+ __u32 bgid;
+};
+
+struct ifq_region {
+ struct io_mapped_ubuf *imu;
+ u64 start;
+ u64 end;
+ int count;
+ int imu_idx;
+ int nr_pages;
+ struct page *page[];
+};
+
+int io_provide_ifq_region_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_ifq_region *r = io_kiocb_to_cmd(req, struct io_ifq_region);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_mapped_ubuf *imu;
+ u32 index;
+
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ return -EINVAL;
+
+ r->addr = READ_ONCE(sqe->addr);
+ r->len = READ_ONCE(sqe->len);
+ index = READ_ONCE(sqe->fd);
+
+ if (!r->addr || r->addr & ~PAGE_MASK)
+ return -EFAULT;
+
+ if (!r->len || r->len & ~PAGE_MASK)
+ return -EFAULT;
+
+ r->ifq = xa_load(&ctx->zctap_ifq_xa, index);
+ if (!r->ifq)
+ return -EFAULT;
+
+ /* XXX for now, only allow one region per ifq. */
+ if (r->ifq->region)
+ return -EFAULT;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ imu = ctx->user_bufs[index];
+
+ if (r->addr < imu->ubuf || r->addr + r->len > imu->ubuf_end)
+ return -EFAULT;
+ req->imu = imu;
+
+ io_req_set_rsrc_node(req, ctx, 0);
+
+ return 0;
+}
+
+int io_provide_ifq_region(struct io_kiocb *req, unsigned int issue_flags)
+{
+ struct io_ifq_region *r = io_kiocb_to_cmd(req, struct io_ifq_region);
+ struct ifq_region *ifr;
+ int i, idx, nr_pages;
+ struct page *page;
+
+ nr_pages = r->len >> PAGE_SHIFT;
+ idx = (r->addr - req->imu->ubuf) >> PAGE_SHIFT;
+
+ ifr = kvmalloc(struct_size(ifr, page, nr_pages), GFP_KERNEL);
+ if (!ifr)
+ return -ENOMEM;
+
+
+ ifr->nr_pages = nr_pages;
+ ifr->imu_idx = idx;
+ ifr->count = nr_pages;
+ ifr->imu = req->imu;
+ ifr->start = r->addr;
+ ifr->end = r->addr + r->len;
+
+ for (i = 0; i < nr_pages; i++, idx++) {
+ page = req->imu->bvec[idx].bv_page;
+ ifr->page[i] = page;
+ }
+
+ WRITE_ONCE(r->ifq->region, ifr);
+
+ return 0;
+}
@@ -8,4 +8,8 @@ int io_unregister_ifq(struct io_ring_ctx *ctx,
struct io_uring_ifq_req __user *arg);
int io_unregister_zctap_ifq(struct io_ring_ctx *ctx, unsigned long index);
+int io_provide_ifq_region_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe);
+int io_provide_ifq_region(struct io_kiocb *req, unsigned int issue_flags);
+
#endif
This opcode takes part or all of a memory region that was previously registered with io_uring, and assigns it as the backing store for the specified ifq. The entire region is registered instead of providing individual bufferrs, as this allows the hardware to select the optimal buffer size for incoming packets. Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com> --- include/linux/io_uring_types.h | 1 + include/uapi/linux/io_uring.h | 1 + io_uring/opdef.c | 9 ++++ io_uring/zctap.c | 96 ++++++++++++++++++++++++++++++++++ io_uring/zctap.h | 4 ++ 5 files changed, 111 insertions(+)