@@ -6,6 +6,7 @@
#include <linux/io_uring.h>
#include <linux/netdevice.h>
#include <linux/nospec.h>
+#include <net/busy_poll.h>
#include <net/tcp.h>
#include <trace/events/page_pool.h>
@@ -21,6 +22,11 @@ struct io_zc_rx_args {
struct socket *sock;
};
+struct io_zc_refill_data {
+ struct io_zc_rx_ifq *ifq;
+ struct io_zc_rx_buf *buf;
+};
+
typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
static inline u32 io_zc_rx_cqring_entries(struct io_zc_rx_ifq *ifq)
@@ -603,6 +609,39 @@ const struct pp_memory_provider_ops io_uring_pp_zc_ops = {
};
EXPORT_SYMBOL(io_uring_pp_zc_ops);
+static void io_napi_refill(void *data)
+{
+ struct io_zc_refill_data *rd = data;
+ struct io_zc_rx_ifq *ifq = rd->ifq;
+ void *page;
+
+ if (WARN_ON_ONCE(!ifq->pp))
+ return;
+
+ page = page_pool_dev_alloc_pages(ifq->pp);
+ if (!page)
+ return;
+ if (WARN_ON_ONCE(!page_is_page_pool_iov(page)))
+ return;
+
+ rd->buf = io_iov_to_buf(page_to_page_pool_iov(page));
+}
+
+static struct io_zc_rx_buf *io_zc_get_buf_task_safe(struct io_zc_rx_ifq *ifq)
+{
+ struct io_zc_refill_data rd = {
+ .ifq = ifq,
+ };
+
+ napi_execute(ifq->pp->p.napi, io_napi_refill, &rd);
+ return rd.buf;
+}
+
+static inline void io_zc_return_rbuf_cqe(struct io_zc_rx_ifq *ifq)
+{
+ ifq->cached_cq_tail--;
+}
+
static inline struct io_uring_rbuf_cqe *io_zc_get_rbuf_cqe(struct io_zc_rx_ifq *ifq)
{
struct io_uring_rbuf_cqe *cqe;
@@ -622,6 +661,51 @@ static inline struct io_uring_rbuf_cqe *io_zc_get_rbuf_cqe(struct io_zc_rx_ifq *
return cqe;
}
+static ssize_t zc_rx_copy_chunk(struct io_zc_rx_ifq *ifq, void *data,
+ unsigned int offset, size_t len,
+ unsigned sock_idx)
+{
+ size_t copy_size, copied = 0;
+ struct io_uring_rbuf_cqe *cqe;
+ struct io_zc_rx_buf *buf;
+ int ret = 0, off = 0;
+ u8 *vaddr;
+
+ do {
+ cqe = io_zc_get_rbuf_cqe(ifq);
+ if (!cqe) {
+ ret = -ENOBUFS;
+ break;
+ }
+ buf = io_zc_get_buf_task_safe(ifq);
+ if (!buf) {
+ io_zc_return_rbuf_cqe(ifq);
+ ret = -ENOMEM;
+ break;
+ }
+
+ vaddr = kmap_local_page(buf->page);
+ copy_size = min_t(size_t, PAGE_SIZE, len);
+ memcpy(vaddr, data + offset, copy_size);
+ kunmap_local(vaddr);
+
+ cqe->region = 0;
+ cqe->off = io_buf_pgid(ifq->pool, buf) * PAGE_SIZE + off;
+ cqe->len = copy_size;
+ cqe->flags = 0;
+ cqe->sock = sock_idx;
+
+ io_zc_rx_get_buf_uref(buf);
+ page_pool_iov_put_many(&buf->ppiov, 1);
+
+ offset += copy_size;
+ len -= copy_size;
+ copied += copy_size;
+ } while (offset < len);
+
+ return copied ? copied : ret;
+}
+
static int zc_rx_recv_frag(struct io_zc_rx_ifq *ifq, const skb_frag_t *frag,
int off, int len, unsigned sock_idx)
{
@@ -650,7 +734,22 @@ static int zc_rx_recv_frag(struct io_zc_rx_ifq *ifq, const skb_frag_t *frag,
cqe->sock = sock_idx;
cqe->flags = 0;
} else {
- return -EOPNOTSUPP;
+ struct page *page = skb_frag_page(frag);
+ u32 p_off, p_len, t, copied = 0;
+ u8 *vaddr;
+ int ret = 0;
+
+ skb_frag_foreach_page(frag, off, len,
+ page, p_off, p_len, t) {
+ vaddr = kmap_local_page(page);
+ ret = zc_rx_copy_chunk(ifq, vaddr, p_off, p_len, sock_idx);
+ kunmap_local(vaddr);
+
+ if (ret < 0)
+ return copied ? copied : ret;
+ copied += ret;
+ }
+ len = copied;
}
return len;
@@ -665,15 +764,30 @@ zc_rx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
struct socket *sock = args->sock;
unsigned sock_idx = sock->zc_rx_idx & IO_ZC_IFQ_IDX_MASK;
struct sk_buff *frag_iter;
- unsigned start, start_off;
+ unsigned start, start_off = offset;
int i, copy, end, off;
int ret = 0;
- start = skb_headlen(skb);
- start_off = offset;
+ if (unlikely(offset < skb_headlen(skb))) {
+ ssize_t copied;
+ size_t to_copy;
- if (offset < start)
- return -EOPNOTSUPP;
+ to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
+ copied = zc_rx_copy_chunk(ifq, skb->data, offset, to_copy,
+ sock_idx);
+ if (copied < 0) {
+ ret = copied;
+ goto out;
+ }
+ offset += copied;
+ len -= copied;
+ if (!len)
+ goto out;
+ if (offset != skb_headlen(skb))
+ goto out;
+ }
+
+ start = skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
const skb_frag_t *frag;