Message ID | 20241204172204.4180482-5-dw@davidwei.uk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io_uring zero copy rx | expand |
On Wed, Dec 4, 2024 at 9:22 AM David Wei <dw@davidwei.uk> wrote: > > From: Pavel Begunkov <asml.silence@gmail.com> > > There is a good bunch of places in generic paths assuming that the only > page pool memory provider is devmem TCP. As we want to reuse the net_iov > and provider infrastructure, we need to patch it up and explicitly check > the provider type when we branch into devmem TCP code. > > Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> > Signed-off-by: David Wei <dw@davidwei.uk> > --- > net/core/devmem.c | 10 ++++++++-- > net/core/devmem.h | 8 ++++++++ > net/core/page_pool_user.c | 15 +++++++++------ > net/ipv4/tcp.c | 6 ++++++ > 4 files changed, 31 insertions(+), 8 deletions(-) > > diff --git a/net/core/devmem.c b/net/core/devmem.c > index 01738029e35c..78983a98e5dc 100644 > --- a/net/core/devmem.c > +++ b/net/core/devmem.c > @@ -28,6 +28,12 @@ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); > > static const struct memory_provider_ops dmabuf_devmem_ops; > > +bool net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops) > +{ > + return ops == &dmabuf_devmem_ops; > +} > +EXPORT_SYMBOL_GPL(net_is_devmem_page_pool_ops); > + > static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, > struct gen_pool_chunk *chunk, > void *not_used) > @@ -316,10 +322,10 @@ void dev_dmabuf_uninstall(struct net_device *dev) > unsigned int i; > > for (i = 0; i < dev->real_num_rx_queues; i++) { > - binding = dev->_rx[i].mp_params.mp_priv; > - if (!binding) > + if (dev->_rx[i].mp_params.mp_ops != &dmabuf_devmem_ops) > continue; > Maybe use the net_is_devmem_page_pool_ops helper you defined here. Reviewed-by: Mina Almasry <almasrymina@google.com>
On Wed, 4 Dec 2024 09:21:43 -0800 David Wei wrote: > +EXPORT_SYMBOL_GPL(net_is_devmem_page_pool_ops); Export doesn't seem necessary, no module should need this right? > @@ -316,10 +322,10 @@ void dev_dmabuf_uninstall(struct net_device *dev) > unsigned int i; > > for (i = 0; i < dev->real_num_rx_queues; i++) { > - binding = dev->_rx[i].mp_params.mp_priv; > - if (!binding) > + if (dev->_rx[i].mp_params.mp_ops != &dmabuf_devmem_ops) > continue; > > + binding = dev->_rx[i].mp_params.mp_priv; > xa_for_each(&binding->bound_rxqs, xa_idx, rxq) > if (rxq == &dev->_rx[i]) { > xa_erase(&binding->bound_rxqs, xa_idx); Maybe add an op to mp_ops for queue unbinding? Having an op struct and yet running code under if (ops == X) seems odd. > - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) > - goto err_cancel; > + if (net_is_devmem_page_pool_ops(pool->mp_ops)) { > + binding = pool->mp_priv; > + if (nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) > + goto err_cancel; ditto, all mps should show up in page pool info. Even if it's just an empty nest for now, waiting for attributes to be added later. > + } > > genlmsg_end(rsp, hdr); > > @@ -353,16 +356,16 @@ void page_pool_unlist(struct page_pool *pool) > int page_pool_check_memory_provider(struct net_device *dev, > struct netdev_rx_queue *rxq) > { > - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; > + void *mp_priv = rxq->mp_params.mp_priv; > struct page_pool *pool; > struct hlist_node *n; > > - if (!binding) > + if (!mp_priv) > return 0; > > mutex_lock(&page_pools_lock); > hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) { > - if (pool->mp_priv != binding) > + if (pool->mp_priv != mp_priv) > continue; > > if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) { appears to be unrelated > diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c > index b872de9a8271..f22005c70fd3 100644 > --- a/net/ipv4/tcp.c > +++ b/net/ipv4/tcp.c > @@ -277,6 +277,7 @@ > #include <net/ip.h> > #include <net/sock.h> > #include <net/rstreason.h> > +#include <net/page_pool/types.h> types.h being needed to call a helper is unusual > #include <linux/uaccess.h> > #include <asm/ioctls.h> > @@ -2476,6 +2477,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, > } > > niov = skb_frag_net_iov(frag); > + if (net_is_devmem_page_pool_ops(niov->pp->mp_ops)) { This one seems legit to me, FWIW, checking if its devmem in devmem specific code is fine. > + err = -ENODEV; > + goto out; > + } > + > end = start + skb_frag_size(frag); > copy = end - offset; >
On 12/10/24 03:15, Jakub Kicinski wrote: > On Wed, 4 Dec 2024 09:21:43 -0800 David Wei wrote: >> +EXPORT_SYMBOL_GPL(net_is_devmem_page_pool_ops); > > Export doesn't seem necessary, no module should need this right? I remembered TCP can be modularised, but seems CONFIG_INET is bool. Then yeah, can be killed. >> @@ -316,10 +322,10 @@ void dev_dmabuf_uninstall(struct net_device *dev) >> unsigned int i; >> >> for (i = 0; i < dev->real_num_rx_queues; i++) { >> - binding = dev->_rx[i].mp_params.mp_priv; >> - if (!binding) >> + if (dev->_rx[i].mp_params.mp_ops != &dmabuf_devmem_ops) >> continue; >> >> + binding = dev->_rx[i].mp_params.mp_priv; >> xa_for_each(&binding->bound_rxqs, xa_idx, rxq) >> if (rxq == &dev->_rx[i]) { >> xa_erase(&binding->bound_rxqs, xa_idx); > > Maybe add an op to mp_ops for queue unbinding? > Having an op struct and yet running code under if (ops == X) seems odd. ok >> - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) >> - goto err_cancel; >> + if (net_is_devmem_page_pool_ops(pool->mp_ops)) { >> + binding = pool->mp_priv; >> + if (nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) >> + goto err_cancel; > > ditto, all mps should show up in page pool info. Even if it's just > an empty nest for now, waiting for attributes to be added later. > >> + } >> >> genlmsg_end(rsp, hdr); >> >> @@ -353,16 +356,16 @@ void page_pool_unlist(struct page_pool *pool) >> int page_pool_check_memory_provider(struct net_device *dev, >> struct netdev_rx_queue *rxq) >> { >> - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; >> + void *mp_priv = rxq->mp_params.mp_priv; >> struct page_pool *pool; >> struct hlist_node *n; >> >> - if (!binding) >> + if (!mp_priv) >> return 0; >> >> mutex_lock(&page_pools_lock); >> hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) { >> - if (pool->mp_priv != binding) >> + if (pool->mp_priv != mp_priv) >> continue; >> >> if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) { > > appears to be unrelated The entire chunk? It removes the type, nobody should be blindly casting it to devmem specific binding even if it's not referenced, otherwise it gets pretty ugly pretty fast. E.g. people might assume that it's always the right type to cast to. >> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c >> index b872de9a8271..f22005c70fd3 100644 >> --- a/net/ipv4/tcp.c >> +++ b/net/ipv4/tcp.c >> @@ -277,6 +277,7 @@ >> #include <net/ip.h> >> #include <net/sock.h> >> #include <net/rstreason.h> >> +#include <net/page_pool/types.h> > > types.h being needed to call a helper is unusual it's in devmem.h, so types.h shouldn't be needed indeed
On Tue, 10 Dec 2024 03:53:36 +0000 Pavel Begunkov wrote: > >> @@ -353,16 +356,16 @@ void page_pool_unlist(struct page_pool *pool) > >> int page_pool_check_memory_provider(struct net_device *dev, > >> struct netdev_rx_queue *rxq) > >> { > >> - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; > >> + void *mp_priv = rxq->mp_params.mp_priv; > >> struct page_pool *pool; > >> struct hlist_node *n; > >> > >> - if (!binding) > >> + if (!mp_priv) > >> return 0; > >> > >> mutex_lock(&page_pools_lock); > >> hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) { > >> - if (pool->mp_priv != binding) > >> + if (pool->mp_priv != mp_priv) > >> continue; > >> > >> if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) { > > > > appears to be unrelated > > The entire chunk? It removes the type, nobody should be blindly casting > it to devmem specific binding even if it's not referenced, otherwise it > gets pretty ugly pretty fast. E.g. people might assume that it's always > the right type to cast to. Change is good. It didn't feel very related to the other changes which specifically address devmem code. While this one only removes the type because the code itself isn't devmem specific. Right? if you make this chunk a separate patch #1 in the series I can apply it right away. pp->mp_priv is void *, this is a good cleanup regardless.
On 12/10/24 04:06, Jakub Kicinski wrote: > On Tue, 10 Dec 2024 03:53:36 +0000 Pavel Begunkov wrote: >>>> @@ -353,16 +356,16 @@ void page_pool_unlist(struct page_pool *pool) >>>> int page_pool_check_memory_provider(struct net_device *dev, >>>> struct netdev_rx_queue *rxq) >>>> { >>>> - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; >>>> + void *mp_priv = rxq->mp_params.mp_priv; >>>> struct page_pool *pool; >>>> struct hlist_node *n; >>>> >>>> - if (!binding) >>>> + if (!mp_priv) >>>> return 0; >>>> >>>> mutex_lock(&page_pools_lock); >>>> hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) { >>>> - if (pool->mp_priv != binding) >>>> + if (pool->mp_priv != mp_priv) >>>> continue; >>>> >>>> if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) { >>> >>> appears to be unrelated >> >> The entire chunk? It removes the type, nobody should be blindly casting >> it to devmem specific binding even if it's not referenced, otherwise it >> gets pretty ugly pretty fast. E.g. people might assume that it's always >> the right type to cast to. > > Change is good. It didn't feel very related to the other changes > which specifically address devmem code. While this one only removes > the type because the code itself isn't devmem specific. Right? Right, and nobody cared because there wasn't anyone but devmem prior to the series. > if you make this chunk a separate patch #1 in the series I can apply it > right away. pp->mp_priv is void *, this is a good cleanup regardless. I can throw it into a separate patch, though let's just keep it in the series as it'd complicate merging otherwise. Let's say if it doesn't land this release, I'll send it separately with 1/17 as clean ups.
diff --git a/net/core/devmem.c b/net/core/devmem.c index 01738029e35c..78983a98e5dc 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -28,6 +28,12 @@ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); static const struct memory_provider_ops dmabuf_devmem_ops; +bool net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops) +{ + return ops == &dmabuf_devmem_ops; +} +EXPORT_SYMBOL_GPL(net_is_devmem_page_pool_ops); + static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) @@ -316,10 +322,10 @@ void dev_dmabuf_uninstall(struct net_device *dev) unsigned int i; for (i = 0; i < dev->real_num_rx_queues; i++) { - binding = dev->_rx[i].mp_params.mp_priv; - if (!binding) + if (dev->_rx[i].mp_params.mp_ops != &dmabuf_devmem_ops) continue; + binding = dev->_rx[i].mp_params.mp_priv; xa_for_each(&binding->bound_rxqs, xa_idx, rxq) if (rxq == &dev->_rx[i]) { xa_erase(&binding->bound_rxqs, xa_idx); diff --git a/net/core/devmem.h b/net/core/devmem.h index a2b9913e9a17..a3fdd66bb05b 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -116,6 +116,8 @@ struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); +bool net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops); + #else struct net_devmem_dmabuf_binding; @@ -168,6 +170,12 @@ static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { return 0; } + +static inline bool +net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops) +{ + return false; +} #endif #endif /* _NET_DEVMEM_H */ diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 48335766c1bf..604862a73535 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -214,7 +214,7 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding = pool->mp_priv; + struct net_devmem_dmabuf_binding *binding; size_t inflight, refsz; void *hdr; @@ -244,8 +244,11 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, pool->user.detach_time)) goto err_cancel; - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) - goto err_cancel; + if (net_is_devmem_page_pool_ops(pool->mp_ops)) { + binding = pool->mp_priv; + if (nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) + goto err_cancel; + } genlmsg_end(rsp, hdr); @@ -353,16 +356,16 @@ void page_pool_unlist(struct page_pool *pool) int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq) { - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; + void *mp_priv = rxq->mp_params.mp_priv; struct page_pool *pool; struct hlist_node *n; - if (!binding) + if (!mp_priv) return 0; mutex_lock(&page_pools_lock); hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) { - if (pool->mp_priv != binding) + if (pool->mp_priv != mp_priv) continue; if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b872de9a8271..f22005c70fd3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -277,6 +277,7 @@ #include <net/ip.h> #include <net/sock.h> #include <net/rstreason.h> +#include <net/page_pool/types.h> #include <linux/uaccess.h> #include <asm/ioctls.h> @@ -2476,6 +2477,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, } niov = skb_frag_net_iov(frag); + if (net_is_devmem_page_pool_ops(niov->pp->mp_ops)) { + err = -ENODEV; + goto out; + } + end = start + skb_frag_size(frag); copy = end - offset;