Message ID | 20250309124719.21285-1-toke@redhat.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [RFC,net-next,v2] page_pool: Track DMA-mapped pages and unmap them when destroying the pool | expand |
On Sun, Mar 9, 2025 at 5:50 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > When enabling DMA mapping in page_pool, pages are kept DMA mapped until > they are released from the pool, to avoid the overhead of re-mapping the > pages every time they are used. This causes problems when a device is > torn down, because the page pool can't unmap the pages until they are > returned to the pool. This causes resource leaks and/or crashes when > there are pages still outstanding while the device is torn down, because > page_pool will attempt an unmap of a non-existent DMA device on the > subsequent page return. > > To fix this, implement a simple tracking of outstanding dma-mapped pages > in page pool using an xarray. This was first suggested by Mina[0], and > turns out to be fairly straight forward: We simply store pointers to > pages directly in the xarray with xa_alloc() when they are first DMA > mapped, and remove them from the array on unmap. Then, when a page pool > is torn down, it can simply walk the xarray and unmap all pages still > present there before returning, which also allows us to get rid of the > get/put_device() calls in page_pool. Using xa_cmpxchg(), no additional > synchronisation is needed, as a page will only ever be unmapped once. > > To avoid having to walk the entire xarray on unmap to find the page > reference, we stash the ID assigned by xa_alloc() into the page > structure itself, using the upper bits of the pp_magic field. This > requires a couple of defines to avoid conflicting with the > POINTER_POISON_DELTA define, but this is all evaluated at compile-time, > so should not affect run-time performance. > > Since all the tracking is performed on DMA map/unmap, no additional code > is needed in the fast path, meaning the performance overhead of this > tracking is negligible. The extra memory needed to track the pages is > neatly encapsulated inside xarray, which uses the 'struct xa_node' > structure to track items. This structure is 576 bytes long, with slots > for 64 items, meaning that a full node occurs only 9 bytes of overhead > per slot it tracks (in practice, it probably won't be this efficient, > but in any case it should be an acceptable overhead). > > [0] https://lore.kernel.org/all/CAHS8izPg7B5DwKfSuzz-iOop_YRbk3Sd6Y4rX7KBG9DcVJcyWg@mail.gmail.com/ > > Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code") > Reported-by: Yonglong Liu <liuyonglong@huawei.com> > Suggested-by: Mina Almasry <almasrymina@google.com> > Reviewed-by: Jesper Dangaard Brouer <hawk@kernel.org> > Tested-by: Jesper Dangaard Brouer <hawk@kernel.org> > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> I only have nits and suggestions for improvement. With and without those: Reviewed-by: Mina Almasry <almasrymina@google.com> > --- > v2: > - Stash the id in the pp_magic field of struct page instead of > overwriting the mapping field. This version is compile-tested only. > > include/net/page_pool/types.h | 31 +++++++++++++++++++++++ > mm/page_alloc.c | 3 ++- > net/core/netmem_priv.h | 35 +++++++++++++++++++++++++- > net/core/page_pool.c | 46 +++++++++++++++++++++++++++++------ > 4 files changed, 105 insertions(+), 10 deletions(-) > > diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h > index 36eb57d73abc..d879a505ca4d 100644 > --- a/include/net/page_pool/types.h > +++ b/include/net/page_pool/types.h > @@ -6,6 +6,7 @@ > #include <linux/dma-direction.h> > #include <linux/ptr_ring.h> > #include <linux/types.h> > +#include <linux/xarray.h> > #include <net/netmem.h> > > #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA > @@ -54,6 +55,34 @@ struct pp_alloc_cache { > netmem_ref cache[PP_ALLOC_CACHE_SIZE]; > }; > > +/* > + * DMA mapping IDs > + * > + * When DMA-mapping a page, we allocate an ID (from an xarray) and stash this in > + * the upper bits of page->pp_magic. The number of bits available here is > + * constrained by the size of an unsigned long, and the definition of > + * PP_SIGNATURE. > + */ > +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) > +#define _PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 1) > + > +#if POISON_POINTER_DELTA > 0 > +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA > + * index to not overlap with that if set > + */ > +#define PP_DMA_INDEX_BITS MIN(_PP_DMA_INDEX_BITS, \ > + __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) > +#else > +#define PP_DMA_INDEX_BITS _PP_DMA_INDEX_BITS > +#endif > + > +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ > + PP_DMA_INDEX_SHIFT) > +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) > + > +/* For check in page_alloc.c */ > +#define PP_MAGIC_MASK (~(PP_DMA_INDEX_MASK | 0x3)) > + > /** > * struct page_pool_params - page pool parameters > * @fast: params accessed frequently on hotpath > @@ -221,6 +250,8 @@ struct page_pool { > void *mp_priv; > const struct memory_provider_ops *mp_ops; > > + struct xarray dma_mapped; > + > #ifdef CONFIG_PAGE_POOL_STATS > /* recycle stats are per-cpu to avoid locking */ > struct page_pool_recycle_stats __percpu *recycle_stats; > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 579789600a3c..96776e7b2301 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -55,6 +55,7 @@ > #include <linux/delayacct.h> > #include <linux/cacheinfo.h> > #include <linux/pgalloc_tag.h> > +#include <net/page_pool/types.h> > #include <asm/div64.h> > #include "internal.h" > #include "shuffle.h" > @@ -873,7 +874,7 @@ static inline bool page_expected_state(struct page *page, > page->memcg_data | > #endif > #ifdef CONFIG_PAGE_POOL > - ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) | > + ((page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE) | > #endif > (page->flags & check_flags))) > return false; > diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h > index 7eadb8393e00..59face70f40d 100644 > --- a/net/core/netmem_priv.h > +++ b/net/core/netmem_priv.h > @@ -3,10 +3,19 @@ > #ifndef __NETMEM_PRIV_H > #define __NETMEM_PRIV_H > > -static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) > +static inline unsigned long _netmem_get_pp_magic(netmem_ref netmem) Nit: maybe __netmem_get...() To be consistent (I used double underscore elsewhere). > { > return __netmem_clear_lsb(netmem)->pp_magic; > } nit: newline between function definitions. > +static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) > +{ > + return _netmem_get_pp_magic(netmem) & ~PP_DMA_INDEX_MASK; > +} > + > +static inline void netmem_set_pp_magic(netmem_ref netmem, unsigned long pp_magic) > +{ > + __netmem_clear_lsb(netmem)->pp_magic = pp_magic; > +} > > static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) > { > @@ -28,4 +37,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem, > { > __netmem_clear_lsb(netmem)->dma_addr = dma_addr; > } > + > +static inline unsigned long netmem_get_dma_index(netmem_ref netmem) > +{ > + unsigned long magic; > + > + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) > + return 0; > + > + magic = _netmem_get_pp_magic(netmem); > + > + return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; > +} > + > +static inline void netmem_set_dma_index(netmem_ref netmem, > + unsigned long id) > +{ > + unsigned long magic; > + > + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) > + return; > + > + magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); > + netmem_set_pp_magic(netmem, magic); > +} > #endif > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index acef1fcd8ddc..dceef9b82198 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -226,6 +226,8 @@ static int page_pool_init(struct page_pool *pool, > return -EINVAL; > > pool->dma_map = true; > + > + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); > } > > if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { > @@ -275,9 +277,6 @@ static int page_pool_init(struct page_pool *pool, > /* Driver calling page_pool_create() also call page_pool_destroy() */ > refcount_set(&pool->user_cnt, 1); > > - if (pool->dma_map) > - get_device(pool->p.dev); > - > if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { > /* We rely on rtnl_lock()ing to make sure netdev_rx_queue > * configuration doesn't change while we're initializing > @@ -325,7 +324,7 @@ static void page_pool_uninit(struct page_pool *pool) > ptr_ring_cleanup(&pool->ring, NULL); > > if (pool->dma_map) > - put_device(pool->p.dev); > + xa_destroy(&pool->dma_mapped); > > #ifdef CONFIG_PAGE_POOL_STATS > if (!pool->system) > @@ -470,9 +469,11 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, > __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); > } > > -static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) > +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) > { > dma_addr_t dma; > + int err; > + u32 id; > > /* Setup DMA mapping: use 'struct page' area for storing DMA-addr > * since dma_addr_t can be either 32 or 64 bits and does not always fit > @@ -486,9 +487,19 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) > if (dma_mapping_error(pool->p.dev, dma)) > return false; > > + if (in_softirq()) > + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), > + PP_DMA_INDEX_LIMIT, gfp); > + else > + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), > + PP_DMA_INDEX_LIMIT, gfp); I think there is a bit of discussion on this thread on whether PP_DMA_INDEX_LIMIT is going to be large enough. xa_alloc() returns -EBUSY if there are no available entries in the xarray. How about we do a rate-limited pr_err or DEBUG_NET_WARN_ON_ONCE when that happens? From your math above it looks like we should have enough bits for page_pools on 32-bits systems, but it may be good to warn when we hit that limit.
Mina Almasry <almasrymina@google.com> writes: > On Sun, Mar 9, 2025 at 5:50 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: >> >> When enabling DMA mapping in page_pool, pages are kept DMA mapped until >> they are released from the pool, to avoid the overhead of re-mapping the >> pages every time they are used. This causes problems when a device is >> torn down, because the page pool can't unmap the pages until they are >> returned to the pool. This causes resource leaks and/or crashes when >> there are pages still outstanding while the device is torn down, because >> page_pool will attempt an unmap of a non-existent DMA device on the >> subsequent page return. >> >> To fix this, implement a simple tracking of outstanding dma-mapped pages >> in page pool using an xarray. This was first suggested by Mina[0], and >> turns out to be fairly straight forward: We simply store pointers to >> pages directly in the xarray with xa_alloc() when they are first DMA >> mapped, and remove them from the array on unmap. Then, when a page pool >> is torn down, it can simply walk the xarray and unmap all pages still >> present there before returning, which also allows us to get rid of the >> get/put_device() calls in page_pool. Using xa_cmpxchg(), no additional >> synchronisation is needed, as a page will only ever be unmapped once. >> >> To avoid having to walk the entire xarray on unmap to find the page >> reference, we stash the ID assigned by xa_alloc() into the page >> structure itself, using the upper bits of the pp_magic field. This >> requires a couple of defines to avoid conflicting with the >> POINTER_POISON_DELTA define, but this is all evaluated at compile-time, >> so should not affect run-time performance. >> >> Since all the tracking is performed on DMA map/unmap, no additional code >> is needed in the fast path, meaning the performance overhead of this >> tracking is negligible. The extra memory needed to track the pages is >> neatly encapsulated inside xarray, which uses the 'struct xa_node' >> structure to track items. This structure is 576 bytes long, with slots >> for 64 items, meaning that a full node occurs only 9 bytes of overhead >> per slot it tracks (in practice, it probably won't be this efficient, >> but in any case it should be an acceptable overhead). >> >> [0] https://lore.kernel.org/all/CAHS8izPg7B5DwKfSuzz-iOop_YRbk3Sd6Y4rX7KBG9DcVJcyWg@mail.gmail.com/ >> >> Fixes: ff7d6b27f894 ("page_pool: refurbish version of page_pool code") >> Reported-by: Yonglong Liu <liuyonglong@huawei.com> >> Suggested-by: Mina Almasry <almasrymina@google.com> >> Reviewed-by: Jesper Dangaard Brouer <hawk@kernel.org> >> Tested-by: Jesper Dangaard Brouer <hawk@kernel.org> >> Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> > > I only have nits and suggestions for improvement. With and without those: > > Reviewed-by: Mina Almasry <almasrymina@google.com> Thanks! Fixed your nits and a couple of others and pushed here: https://git.kernel.org/toke/c/df6248a71f85 I'll subject it to some testing and submit a non-RFC version once I've verified that it works and doesn't introduce any new problems :) -Toke
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 36eb57d73abc..d879a505ca4d 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -6,6 +6,7 @@ #include <linux/dma-direction.h> #include <linux/ptr_ring.h> #include <linux/types.h> +#include <linux/xarray.h> #include <net/netmem.h> #define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA @@ -54,6 +55,34 @@ struct pp_alloc_cache { netmem_ref cache[PP_ALLOC_CACHE_SIZE]; }; +/* + * DMA mapping IDs + * + * When DMA-mapping a page, we allocate an ID (from an xarray) and stash this in + * the upper bits of page->pp_magic. The number of bits available here is + * constrained by the size of an unsigned long, and the definition of + * PP_SIGNATURE. + */ +#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA)) +#define _PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 1) + +#if POISON_POINTER_DELTA > 0 +/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA + * index to not overlap with that if set + */ +#define PP_DMA_INDEX_BITS MIN(_PP_DMA_INDEX_BITS, \ + __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT) +#else +#define PP_DMA_INDEX_BITS _PP_DMA_INDEX_BITS +#endif + +#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \ + PP_DMA_INDEX_SHIFT) +#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1) + +/* For check in page_alloc.c */ +#define PP_MAGIC_MASK (~(PP_DMA_INDEX_MASK | 0x3)) + /** * struct page_pool_params - page pool parameters * @fast: params accessed frequently on hotpath @@ -221,6 +250,8 @@ struct page_pool { void *mp_priv; const struct memory_provider_ops *mp_ops; + struct xarray dma_mapped; + #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ struct page_pool_recycle_stats __percpu *recycle_stats; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 579789600a3c..96776e7b2301 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -55,6 +55,7 @@ #include <linux/delayacct.h> #include <linux/cacheinfo.h> #include <linux/pgalloc_tag.h> +#include <net/page_pool/types.h> #include <asm/div64.h> #include "internal.h" #include "shuffle.h" @@ -873,7 +874,7 @@ static inline bool page_expected_state(struct page *page, page->memcg_data | #endif #ifdef CONFIG_PAGE_POOL - ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) | + ((page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE) | #endif (page->flags & check_flags))) return false; diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h index 7eadb8393e00..59face70f40d 100644 --- a/net/core/netmem_priv.h +++ b/net/core/netmem_priv.h @@ -3,10 +3,19 @@ #ifndef __NETMEM_PRIV_H #define __NETMEM_PRIV_H -static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) +static inline unsigned long _netmem_get_pp_magic(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->pp_magic; } +static inline unsigned long netmem_get_pp_magic(netmem_ref netmem) +{ + return _netmem_get_pp_magic(netmem) & ~PP_DMA_INDEX_MASK; +} + +static inline void netmem_set_pp_magic(netmem_ref netmem, unsigned long pp_magic) +{ + __netmem_clear_lsb(netmem)->pp_magic = pp_magic; +} static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic) { @@ -28,4 +37,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem, { __netmem_clear_lsb(netmem)->dma_addr = dma_addr; } + +static inline unsigned long netmem_get_dma_index(netmem_ref netmem) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return 0; + + magic = _netmem_get_pp_magic(netmem); + + return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT; +} + +static inline void netmem_set_dma_index(netmem_ref netmem, + unsigned long id) +{ + unsigned long magic; + + if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) + return; + + magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT); + netmem_set_pp_magic(netmem, magic); +} #endif diff --git a/net/core/page_pool.c b/net/core/page_pool.c index acef1fcd8ddc..dceef9b82198 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -226,6 +226,8 @@ static int page_pool_init(struct page_pool *pool, return -EINVAL; pool->dma_map = true; + + xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1); } if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { @@ -275,9 +277,6 @@ static int page_pool_init(struct page_pool *pool, /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); - if (pool->dma_map) - get_device(pool->p.dev); - if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { /* We rely on rtnl_lock()ing to make sure netdev_rx_queue * configuration doesn't change while we're initializing @@ -325,7 +324,7 @@ static void page_pool_uninit(struct page_pool *pool) ptr_ring_cleanup(&pool->ring, NULL); if (pool->dma_map) - put_device(pool->p.dev); + xa_destroy(&pool->dma_mapped); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) @@ -470,9 +469,11 @@ page_pool_dma_sync_for_device(const struct page_pool *pool, __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); } -static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp) { dma_addr_t dma; + int err; + u32 id; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit @@ -486,9 +487,19 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) if (dma_mapping_error(pool->p.dev, dma)) return false; + if (in_softirq()) + err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + else + err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem), + PP_DMA_INDEX_LIMIT, gfp); + if (err) + goto unmap_failed; + if (page_pool_set_dma_addr_netmem(netmem, dma)) goto unmap_failed; + netmem_set_dma_index(netmem, id); page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; @@ -511,7 +522,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, if (unlikely(!page)) return NULL; - if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { + if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) { put_page(page); return NULL; } @@ -557,7 +568,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, */ for (i = 0; i < nr_pages; i++) { netmem = pool->alloc.cache[i]; - if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { + if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) { put_page(netmem_to_page(netmem)); continue; } @@ -659,6 +670,8 @@ void page_pool_clear_pp_info(netmem_ref netmem) static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, netmem_ref netmem) { + struct page *old, *page = netmem_to_page(netmem); + unsigned long id; dma_addr_t dma; if (!pool->dma_map) @@ -667,6 +680,17 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, */ return; + id = netmem_get_dma_index(netmem); + if (!id) + return; + + if (in_softirq()) + old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0); + else + old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0); + if (old != page) + return; + dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ @@ -674,6 +698,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr_netmem(netmem, 0); + netmem_set_dma_index(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need @@ -1083,8 +1108,13 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool) static void page_pool_scrub(struct page_pool *pool) { + unsigned long id; + void *ptr; + page_pool_empty_alloc_cache_once(pool); - pool->destroy_cnt++; + if (!pool->destroy_cnt++) + xa_for_each(&pool->dma_mapped, id, ptr) + __page_pool_release_page_dma(pool, page_to_netmem(ptr)); /* No more consumers should exist, but producers could still * be in-flight.