@@ -126,6 +126,8 @@ struct socket {
const struct proto_ops *ops; /* Might change with IPV6_ADDRFORM or MPTCP. */
struct socket_wq wq;
+
+ unsigned zc_rx_idx;
};
/*
@@ -550,6 +550,7 @@ enum {
/* register a network interface queue for zerocopy */
IORING_REGISTER_ZC_RX_IFQ = 26,
+ IORING_REGISTER_ZC_RX_SOCK = 27,
/* this goes last */
IORING_REGISTER_LAST,
@@ -788,6 +789,12 @@ struct io_uring_zc_rx_ifq_reg {
struct io_rbuf_cqring_offsets cq_off;
};
+struct io_uring_zc_rx_sock_reg {
+ __u32 sockfd;
+ __u32 zc_rx_ifq_idx;
+ __u32 __resv[2];
+};
+
#ifdef __cplusplus
}
#endif
@@ -4549,6 +4549,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_register_zc_rx_ifq(ctx, arg);
break;
+ case IORING_REGISTER_ZC_RX_SOCK:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_zc_rx_sock(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
@@ -955,6 +955,25 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
return ret;
}
+static __maybe_unused
+struct io_zc_rx_ifq *io_zc_verify_sock(struct io_kiocb *req,
+ struct socket *sock)
+{
+ unsigned token = READ_ONCE(sock->zc_rx_idx);
+ unsigned ifq_idx = token >> IO_ZC_IFQ_IDX_OFFSET;
+ unsigned sock_idx = token & IO_ZC_IFQ_IDX_MASK;
+ struct io_zc_rx_ifq *ifq;
+
+ if (ifq_idx)
+ return NULL;
+ ifq = req->ctx->ifq;
+ if (!ifq || sock_idx >= ifq->nr_sockets)
+ return NULL;
+ if (ifq->sockets[sock_idx] != req->file)
+ return NULL;
+ return ifq;
+}
+
void io_send_zc_cleanup(struct io_kiocb *req)
{
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
@@ -11,6 +11,7 @@
#include "io_uring.h"
#include "kbuf.h"
#include "zc_rx.h"
+#include "rsrc.h"
typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
@@ -129,12 +130,74 @@ int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
int io_unregister_zc_rx_ifq(struct io_ring_ctx *ctx)
{
struct io_zc_rx_ifq *ifq = ctx->ifq;
+ int i;
if (!ifq)
return -EINVAL;
+ for (i = 0; i < ifq->nr_sockets; i++)
+ fput(ifq->sockets[i]);
+
ctx->ifq = NULL;
io_zc_rx_ifq_free(ifq);
return 0;
}
+
+int io_register_zc_rx_sock(struct io_ring_ctx *ctx,
+ struct io_uring_zc_rx_sock_reg __user *arg)
+{
+ struct io_uring_zc_rx_sock_reg sr;
+ struct io_zc_rx_ifq *ifq;
+ struct socket *sock;
+ struct file *file;
+ int ret = -EEXIST;
+ int idx;
+
+ if (copy_from_user(&sr, arg, sizeof(sr)))
+ return -EFAULT;
+ if (sr.__resv[0] || sr.__resv[1])
+ return -EINVAL;
+ if (sr.zc_rx_ifq_idx != 0 || !ctx->ifq)
+ return -EINVAL;
+
+ ifq = ctx->ifq;
+ if (ifq->nr_sockets >= ARRAY_SIZE(ifq->sockets))
+ return -EINVAL;
+
+ BUILD_BUG_ON(ARRAY_SIZE(ifq->sockets) > IO_ZC_IFQ_IDX_MASK);
+
+ file = fget(sr.sockfd);
+ if (!file)
+ return -EBADF;
+
+ if (io_file_need_scm(file)) {
+ fput(file);
+ return -EBADF;
+ }
+
+ sock = sock_from_file(file);
+ if (unlikely(!sock || !sock->sk)) {
+ fput(file);
+ return -ENOTSOCK;
+ }
+
+ idx = ifq->nr_sockets;
+ lock_sock(sock->sk);
+ if (!sock->zc_rx_idx) {
+ unsigned token;
+
+ token = idx + (sr.zc_rx_ifq_idx << IO_ZC_IFQ_IDX_OFFSET);
+ WRITE_ONCE(sock->zc_rx_idx, token);
+ ret = 0;
+ }
+ release_sock(sock->sk);
+
+ if (ret) {
+ fput(file);
+ return -EINVAL;
+ }
+ ifq->sockets[idx] = file;
+ ifq->nr_sockets++;
+ return 0;
+}
#endif
@@ -2,6 +2,13 @@
#ifndef IOU_ZC_RX_H
#define IOU_ZC_RX_H
+#include <linux/io_uring_types.h>
+#include <linux/skbuff.h>
+
+#define IO_ZC_MAX_IFQ_SOCKETS 16
+#define IO_ZC_IFQ_IDX_OFFSET 16
+#define IO_ZC_IFQ_IDX_MASK ((1U << IO_ZC_IFQ_IDX_OFFSET) - 1)
+
struct io_zc_rx_ifq {
struct io_ring_ctx *ctx;
struct net_device *dev;
@@ -11,6 +18,9 @@ struct io_zc_rx_ifq {
u32 rq_entries, cq_entries;
void *pool;
+ unsigned nr_sockets;
+ struct file *sockets[IO_ZC_MAX_IFQ_SOCKETS];
+
/* hw rx descriptor ring id */
u32 if_rxq_id;
};
@@ -19,6 +29,8 @@ struct io_zc_rx_ifq {
int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zc_rx_ifq_reg __user *arg);
int io_unregister_zc_rx_ifq(struct io_ring_ctx *ctx);
+int io_register_zc_rx_sock(struct io_ring_ctx *ctx,
+ struct io_uring_zc_rx_sock_reg __user *arg);
#else
static inline int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zc_rx_ifq_reg __user *arg)
@@ -29,6 +41,11 @@ static inline int io_unregister_zc_rx_ifq(struct io_ring_ctx *ctx)
{
return -EOPNOTSUPP;
}
+static inline int io_register_zc_rx_sock(struct io_ring_ctx *ctx,
+ struct io_uring_zc_rx_sock_reg __user *arg)
+{
+ return -EOPNOTSUPP;
+}
#endif
#endif
@@ -637,6 +637,7 @@ struct socket *sock_alloc(void)
sock = SOCKET_I(inode);
+ sock->zc_rx_idx = 0;
inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();