Message ID | 20241007221603.1703699-15-dw@davidwei.uk (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | io_uring zero copy rx | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply, async |
On 10/7/24 4:16 PM, David Wei wrote: > From: David Wei <davidhwei@meta.com> > > Set the page pool memory provider for the rx queue configured for zero copy to > io_uring. Then the rx queue is reset using netdev_rx_queue_restart() and netdev > core + page pool will take care of filling the rx queue from the io_uring zero > copy memory provider. > > For now, there is only one ifq so its destruction happens implicitly during > io_uring cleanup. Bit wide... > @@ -237,15 +309,20 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, > reg.offsets.tail = offsetof(struct io_uring, tail); > > if (copy_to_user(arg, ®, sizeof(reg))) { > + io_close_zc_rxq(ifq); > ret = -EFAULT; > goto err; > } > if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { > + io_close_zc_rxq(ifq); > ret = -EFAULT; > goto err; > } > ctx->ifq = ifq; > return 0; Not added in this patch, but since I was looking at rtnl lock coverage, it's OK to potentially fault while holding this lock? I'm assuming it is, as I can't imagine any faulting needing to grab it. Not even from nbd ;-) Looks fine to me.
On 10/9/24 19:42, Jens Axboe wrote: > On 10/7/24 4:16 PM, David Wei wrote: >> From: David Wei <davidhwei@meta.com> ... >> if (copy_to_user(arg, ®, sizeof(reg))) { >> + io_close_zc_rxq(ifq); >> ret = -EFAULT; >> goto err; >> } >> if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { >> + io_close_zc_rxq(ifq); >> ret = -EFAULT; >> goto err; >> } >> ctx->ifq = ifq; >> return 0; > > Not added in this patch, but since I was looking at rtnl lock coverage, > it's OK to potentially fault while holding this lock? I'm assuming it > is, as I can't imagine any faulting needing to grab it. Not even from > nbd ;-) I believe it should be fine to fault, but regardless neither this chunk nor page pinning is under rtnl. Only netdev_rx_queue_restart() is under it from heavy stuff, intentionally trying to minimise the section as it's a global lock.
On 10/10/24 7:09 AM, Pavel Begunkov wrote: > On 10/9/24 19:42, Jens Axboe wrote: >> On 10/7/24 4:16 PM, David Wei wrote: >>> From: David Wei <davidhwei@meta.com> > ... >>> if (copy_to_user(arg, ®, sizeof(reg))) { >>> + io_close_zc_rxq(ifq); >>> ret = -EFAULT; >>> goto err; >>> } >>> if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { >>> + io_close_zc_rxq(ifq); >>> ret = -EFAULT; >>> goto err; >>> } >>> ctx->ifq = ifq; >>> return 0; >> >> Not added in this patch, but since I was looking at rtnl lock coverage, >> it's OK to potentially fault while holding this lock? I'm assuming it >> is, as I can't imagine any faulting needing to grab it. Not even from >> nbd ;-) > > I believe it should be fine to fault, but regardless neither this > chunk nor page pinning is under rtnl. Only netdev_rx_queue_restart() > is under it from heavy stuff, intentionally trying to minimise the > section as it's a global lock. Yep you're right, it is dropped before this section anyway.
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index d21e7017deb3..7939f830cf5b 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -6,6 +6,8 @@ #include <linux/netdevice.h> #include <linux/io_uring.h> #include <linux/skbuff_ref.h> +#include <linux/io_uring/net.h> +#include <net/netdev_rx_queue.h> #include <net/busy_poll.h> #include <net/page_pool/helpers.h> #include <trace/events/page_pool.h> @@ -49,6 +51,63 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov) return area->pages[net_iov_idx(niov)]; } +static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx) +{ + struct netdev_rx_queue *rxq; + struct net_device *dev = ifq->dev; + int ret; + + ASSERT_RTNL(); + + if (ifq_idx >= dev->num_rx_queues) + return -EINVAL; + ifq_idx = array_index_nospec(ifq_idx, dev->num_rx_queues); + + rxq = __netif_get_rx_queue(ifq->dev, ifq_idx); + if (rxq->mp_params.mp_priv) + return -EEXIST; + + ifq->if_rxq = ifq_idx; + rxq->mp_params.mp_ops = &io_uring_pp_zc_ops; + rxq->mp_params.mp_priv = ifq; + ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq); + if (ret) { + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + ifq->if_rxq = -1; + } + return ret; +} + +static void io_close_zc_rxq(struct io_zcrx_ifq *ifq) +{ + struct netdev_rx_queue *rxq; + int err; + + if (ifq->if_rxq == -1) + return; + + rtnl_lock(); + if (WARN_ON_ONCE(ifq->if_rxq >= ifq->dev->num_rx_queues)) { + rtnl_unlock(); + return; + } + + rxq = __netif_get_rx_queue(ifq->dev, ifq->if_rxq); + + WARN_ON_ONCE(rxq->mp_params.mp_priv != ifq); + + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + + err = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq); + if (err) + pr_devel("io_uring: can't restart a queue on zcrx close\n"); + + rtnl_unlock(); + ifq->if_rxq = -1; +} + static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, struct io_uring_zcrx_ifq_reg *reg) { @@ -169,9 +228,12 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) { + io_close_zc_rxq(ifq); + if (ifq->area) io_zcrx_free_area(ifq->area); - + if (ifq->dev) + dev_put(ifq->dev); io_free_rbuf_ring(ifq); kfree(ifq); } @@ -227,7 +289,17 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; ifq->rq_entries = reg.rq_entries; - ifq->if_rxq = reg.if_rxq; + + ret = -ENODEV; + rtnl_lock(); + ifq->dev = dev_get_by_index(current->nsproxy->net_ns, reg.if_idx); + if (!ifq->dev) + goto err_rtnl_unlock; + + ret = io_open_zc_rxq(ifq, reg.if_rxq); + if (ret) + goto err_rtnl_unlock; + rtnl_unlock(); ring_sz = sizeof(struct io_uring); rqes_sz = sizeof(struct io_uring_zcrx_rqe) * ifq->rq_entries; @@ -237,15 +309,20 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, reg.offsets.tail = offsetof(struct io_uring, tail); if (copy_to_user(arg, ®, sizeof(reg))) { + io_close_zc_rxq(ifq); ret = -EFAULT; goto err; } if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { + io_close_zc_rxq(ifq); ret = -EFAULT; goto err; } ctx->ifq = ifq; return 0; + +err_rtnl_unlock: + rtnl_unlock(); err: io_zcrx_ifq_free(ifq); return ret; @@ -267,6 +344,9 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) { lockdep_assert_held(&ctx->uring_lock); + + if (ctx->ifq) + io_close_zc_rxq(ctx->ifq); } static void io_zcrx_get_buf_uref(struct net_iov *niov)