Message ID | cd58ca966fbe11cabbd6160decea6ce748ebce9f.1603824486.git.lorenzo@kernel.org (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | xdp: introduce bulking for page_pool tx return path | expand |
Hi Lorenzo, On Tue, Oct 27, 2020 at 08:04:08PM +0100, Lorenzo Bianconi wrote: > Introduce the capability to batch page_pool ptr_ring refill since it is > usually run inside the driver NAPI tx completion loop. > > Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > include/net/page_pool.h | 26 ++++++++++++++++++++++++++ > net/core/page_pool.c | 33 +++++++++++++++++++++++++++++++++ > net/core/xdp.c | 9 ++------- > 3 files changed, 61 insertions(+), 7 deletions(-) > > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > index 81d7773f96cd..b5b195305346 100644 > --- a/include/net/page_pool.h > +++ b/include/net/page_pool.h > @@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); > void page_pool_destroy(struct page_pool *pool); > void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); > void page_pool_release_page(struct page_pool *pool, struct page *page); > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count); > #else > static inline void page_pool_destroy(struct page_pool *pool) > { > @@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool, > struct page *page) > { > } > + > +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count) > +{ > +} > #endif > > void page_pool_put_page(struct page_pool *pool, struct page *page, > @@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) > if (unlikely(pool->p.nid != new_nid)) > page_pool_update_nid(pool, new_nid); > } > + > +static inline void page_pool_ring_lock(struct page_pool *pool) > + __acquires(&pool->ring.producer_lock) > +{ > + if (in_serving_softirq()) > + spin_lock(&pool->ring.producer_lock); > + else > + spin_lock_bh(&pool->ring.producer_lock); > +} > + > +static inline void page_pool_ring_unlock(struct page_pool *pool) > + __releases(&pool->ring.producer_lock) > +{ > + if (in_serving_softirq()) > + spin_unlock(&pool->ring.producer_lock); > + else > + spin_unlock_bh(&pool->ring.producer_lock); > +} > + > #endif /* _NET_PAGE_POOL_H */ > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index ef98372facf6..84fb21f8865e 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -11,6 +11,8 @@ > #include <linux/device.h> > > #include <net/page_pool.h> > +#include <net/xdp.h> > + > #include <linux/dma-direction.h> > #include <linux/dma-mapping.h> > #include <linux/page-flags.h> > @@ -408,6 +410,37 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, > } > EXPORT_SYMBOL(page_pool_put_page); > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count) > +{ > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; > + int i, len = 0; > + > + for (i = 0; i < count; i++) { > + struct page *page = virt_to_head_page(data[i]); > + > + if (unlikely(page_ref_count(page) != 1 || > + !pool_page_reusable(pool, page))) { > + page_pool_release_page(pool, page); Mind switching this similarly to how page_pool_put_page() is using it? unlikely -> likely and remove the ! > + put_page(page); > + continue; > + } > + > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > + page_pool_dma_sync_for_device(pool, page, -1); > + > + page_ring[len++] = page; > + } > + > + page_pool_ring_lock(pool); > + for (i = 0; i < len; i++) { > + if (__ptr_ring_produce(&pool->ring, page_ring[i])) > + page_pool_return_page(pool, page_ring[i]); > + } > + page_pool_ring_unlock(pool); > +} > +EXPORT_SYMBOL(page_pool_put_page_bulk); > + > static void page_pool_empty_ring(struct page_pool *pool) > { > struct page *page; > diff --git a/net/core/xdp.c b/net/core/xdp.c > index 93eabd789246..9f9a8d14df38 100644 > --- a/net/core/xdp.c > +++ b/net/core/xdp.c > @@ -383,16 +383,11 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); > void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) > { > struct xdp_mem_allocator *xa = bq->xa; > - int i; > > - if (unlikely(!xa)) > + if (unlikely(!xa || !bq->count)) > return; > > - for (i = 0; i < bq->count; i++) { > - struct page *page = virt_to_head_page(bq->q[i]); > - > - page_pool_put_full_page(xa->page_pool, page, false); > - } > + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); > bq->count = 0; > } > EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); > -- > 2.26.2 > Cheers /Ilias
On Tue, Oct 27, 2020 at 08:04:08PM +0100, Lorenzo Bianconi wrote: > Introduce the capability to batch page_pool ptr_ring refill since it is > usually run inside the driver NAPI tx completion loop. > > Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > include/net/page_pool.h | 26 ++++++++++++++++++++++++++ > net/core/page_pool.c | 33 +++++++++++++++++++++++++++++++++ > net/core/xdp.c | 9 ++------- > 3 files changed, 61 insertions(+), 7 deletions(-) > > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > index 81d7773f96cd..b5b195305346 100644 > --- a/include/net/page_pool.h > +++ b/include/net/page_pool.h > @@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); > void page_pool_destroy(struct page_pool *pool); > void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); > void page_pool_release_page(struct page_pool *pool, struct page *page); > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count); > #else > static inline void page_pool_destroy(struct page_pool *pool) > { > @@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool, > struct page *page) > { > } > + > +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count) > +{ > +} > #endif > > void page_pool_put_page(struct page_pool *pool, struct page *page, > @@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) > if (unlikely(pool->p.nid != new_nid)) > page_pool_update_nid(pool, new_nid); > } > + > +static inline void page_pool_ring_lock(struct page_pool *pool) > + __acquires(&pool->ring.producer_lock) > +{ > + if (in_serving_softirq()) > + spin_lock(&pool->ring.producer_lock); > + else > + spin_lock_bh(&pool->ring.producer_lock); > +} > + > +static inline void page_pool_ring_unlock(struct page_pool *pool) > + __releases(&pool->ring.producer_lock) > +{ > + if (in_serving_softirq()) > + spin_unlock(&pool->ring.producer_lock); > + else > + spin_unlock_bh(&pool->ring.producer_lock); > +} > + > #endif /* _NET_PAGE_POOL_H */ > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > index ef98372facf6..84fb21f8865e 100644 > --- a/net/core/page_pool.c > +++ b/net/core/page_pool.c > @@ -11,6 +11,8 @@ > #include <linux/device.h> > > #include <net/page_pool.h> > +#include <net/xdp.h> > + > #include <linux/dma-direction.h> > #include <linux/dma-mapping.h> > #include <linux/page-flags.h> > @@ -408,6 +410,37 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, > } > EXPORT_SYMBOL(page_pool_put_page); > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count) > +{ > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; > + int i, len = 0; > + > + for (i = 0; i < count; i++) { > + struct page *page = virt_to_head_page(data[i]); > + > + if (unlikely(page_ref_count(page) != 1 || > + !pool_page_reusable(pool, page))) { > + page_pool_release_page(pool, page); > + put_page(page); > + continue; > + } > + > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > + page_pool_dma_sync_for_device(pool, page, -1); > + > + page_ring[len++] = page; > + } > + > + page_pool_ring_lock(pool); > + for (i = 0; i < len; i++) { > + if (__ptr_ring_produce(&pool->ring, page_ring[i])) > + page_pool_return_page(pool, page_ring[i]); Can we add a comment here on why the explicit spinlock needs to protect page_pool_return_page() as well instead of just using ptr_ring_produce()? > + } > + page_pool_ring_unlock(pool); > +} > +EXPORT_SYMBOL(page_pool_put_page_bulk); > + > static void page_pool_empty_ring(struct page_pool *pool) > { > struct page *page; > diff --git a/net/core/xdp.c b/net/core/xdp.c > index 93eabd789246..9f9a8d14df38 100644 > --- a/net/core/xdp.c > +++ b/net/core/xdp.c > @@ -383,16 +383,11 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); > void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) > { > struct xdp_mem_allocator *xa = bq->xa; > - int i; > > - if (unlikely(!xa)) > + if (unlikely(!xa || !bq->count)) > return; > > - for (i = 0; i < bq->count; i++) { > - struct page *page = virt_to_head_page(bq->q[i]); > - > - page_pool_put_full_page(xa->page_pool, page, false); > - } > + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); > bq->count = 0; > } > EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); > -- > 2.26.2 > Thanks /Ilias
On Tue, 27 Oct 2020 20:04:08 +0100 Lorenzo Bianconi <lorenzo@kernel.org> wrote: > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > + int count) > +{ > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; Maybe we could reuse the 'data' array instead of creating a new array (2 cache-lines long) for the array of pages? > + int i, len = 0; > + > + for (i = 0; i < count; i++) { > + struct page *page = virt_to_head_page(data[i]); > + > + if (unlikely(page_ref_count(page) != 1 || > + !pool_page_reusable(pool, page))) { > + page_pool_release_page(pool, page); > + put_page(page); > + continue; > + } > + > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > + page_pool_dma_sync_for_device(pool, page, -1); Here we sync the entire DMA area (-1), which have a *huge* cost for mvneta (especially on EspressoBin HW). For this xdp_frame->len is unfortunately not enough. We will need the *maximum* length touch by (1) CPU and (2) remote device DMA engine. DMA-TX completion knows the length for (2). The CPU length (1) is max of original xdp_buff size and xdp_frame->len, because BPF-helpers could have shrinked the size. (tricky part is that xdp_frame->len isn't correct in-case of header adjustments, thus like mvneta_run_xdp we to calc dma_sync size, and store this in xdp_frame, maybe via param to xdp_do_redirect). Well, not sure if it is too much work to transfer this info, for this use-case. > + > + page_ring[len++] = page; > + } > + > + page_pool_ring_lock(pool); > + for (i = 0; i < len; i++) { > + if (__ptr_ring_produce(&pool->ring, page_ring[i])) > + page_pool_return_page(pool, page_ring[i]); > + } > + page_pool_ring_unlock(pool); > +} > +EXPORT_SYMBOL(page_pool_put_page_bulk);
> On Tue, 27 Oct 2020 20:04:08 +0100 > Lorenzo Bianconi <lorenzo@kernel.org> wrote: > > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > > + int count) > > +{ > > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; > > Maybe we could reuse the 'data' array instead of creating a new array > (2 cache-lines long) for the array of pages? I agree, I will try to reuse the data array for that > > > + int i, len = 0; > > + > > + for (i = 0; i < count; i++) { > > + struct page *page = virt_to_head_page(data[i]); > > + > > + if (unlikely(page_ref_count(page) != 1 || > > + !pool_page_reusable(pool, page))) { > > + page_pool_release_page(pool, page); > > + put_page(page); > > + continue; > > + } > > + > > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > > + page_pool_dma_sync_for_device(pool, page, -1); > > Here we sync the entire DMA area (-1), which have a *huge* cost for > mvneta (especially on EspressoBin HW). For this xdp_frame->len is > unfortunately not enough. We will need the *maximum* length touch by > (1) CPU and (2) remote device DMA engine. DMA-TX completion knows the > length for (2). The CPU length (1) is max of original xdp_buff size > and xdp_frame->len, because BPF-helpers could have shrinked the size. > (tricky part is that xdp_frame->len isn't correct in-case of header > adjustments, thus like mvneta_run_xdp we to calc dma_sync size, and > store this in xdp_frame, maybe via param to xdp_do_redirect). Well, not > sure if it is too much work to transfer this info, for this use-case. I was thinking about that but I guess point (1) is tricky since "cpu length" can be changed even in the middle by devmaps or cpumaps (not just in the driver rx napi loop). I guess we can try to address this point in a subsequent series. Agree? Regards, Lorenzo > > > + > > + page_ring[len++] = page; > > > + } > > + > > + page_pool_ring_lock(pool); > > + for (i = 0; i < len; i++) { > > + if (__ptr_ring_produce(&pool->ring, page_ring[i])) > > + page_pool_return_page(pool, page_ring[i]); > > + } > > + page_pool_ring_unlock(pool); > > +} > > +EXPORT_SYMBOL(page_pool_put_page_bulk); > > > > -- > Best regards, > Jesper Dangaard Brouer > MSc.CS, Principal Kernel Engineer at Red Hat > LinkedIn: http://www.linkedin.com/in/brouer >
> Hi Lorenzo, > > On Tue, Oct 27, 2020 at 08:04:08PM +0100, Lorenzo Bianconi wrote: > > Introduce the capability to batch page_pool ptr_ring refill since it is > > usually run inside the driver NAPI tx completion loop. > > > > Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > > --- > > include/net/page_pool.h | 26 ++++++++++++++++++++++++++ > > net/core/page_pool.c | 33 +++++++++++++++++++++++++++++++++ > > net/core/xdp.c | 9 ++------- > > 3 files changed, 61 insertions(+), 7 deletions(-) > > > > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > > index 81d7773f96cd..b5b195305346 100644 > > --- a/include/net/page_pool.h > > +++ b/include/net/page_pool.h > > @@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); > > void page_pool_destroy(struct page_pool *pool); > > void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); > > void page_pool_release_page(struct page_pool *pool, struct page *page); > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > > + int count); > > #else > > static inline void page_pool_destroy(struct page_pool *pool) > > { > > @@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool, > > struct page *page) > > { > > } > > + > > +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, > > + int count) > > +{ > > +} > > #endif > > > > void page_pool_put_page(struct page_pool *pool, struct page *page, > > @@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) > > if (unlikely(pool->p.nid != new_nid)) > > page_pool_update_nid(pool, new_nid); > > } > > + > > +static inline void page_pool_ring_lock(struct page_pool *pool) > > + __acquires(&pool->ring.producer_lock) > > +{ > > + if (in_serving_softirq()) > > + spin_lock(&pool->ring.producer_lock); > > + else > > + spin_lock_bh(&pool->ring.producer_lock); > > +} > > + > > +static inline void page_pool_ring_unlock(struct page_pool *pool) > > + __releases(&pool->ring.producer_lock) > > +{ > > + if (in_serving_softirq()) > > + spin_unlock(&pool->ring.producer_lock); > > + else > > + spin_unlock_bh(&pool->ring.producer_lock); > > +} > > + > > #endif /* _NET_PAGE_POOL_H */ > > diff --git a/net/core/page_pool.c b/net/core/page_pool.c > > index ef98372facf6..84fb21f8865e 100644 > > --- a/net/core/page_pool.c > > +++ b/net/core/page_pool.c > > @@ -11,6 +11,8 @@ > > #include <linux/device.h> > > > > #include <net/page_pool.h> > > +#include <net/xdp.h> > > + > > #include <linux/dma-direction.h> > > #include <linux/dma-mapping.h> > > #include <linux/page-flags.h> > > @@ -408,6 +410,37 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, > > } > > EXPORT_SYMBOL(page_pool_put_page); > > > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > > + int count) > > +{ > > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; > > + int i, len = 0; > > + > > + for (i = 0; i < count; i++) { > > + struct page *page = virt_to_head_page(data[i]); > > + > > + if (unlikely(page_ref_count(page) != 1 || > > + !pool_page_reusable(pool, page))) { > > + page_pool_release_page(pool, page); > > Mind switching this similarly to how page_pool_put_page() is using it? > unlikely -> likely and remove the ! Hi Ilias, thx for the review. ack, I will do it in v2 Regards, Lorenzo > > > + put_page(page); > > + continue; > > + } > > + > > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > > + page_pool_dma_sync_for_device(pool, page, -1); > > + > > + page_ring[len++] = page; > > + } > > + > > + page_pool_ring_lock(pool); > > + for (i = 0; i < len; i++) { > > + if (__ptr_ring_produce(&pool->ring, page_ring[i])) > > + page_pool_return_page(pool, page_ring[i]); > > + } > > + page_pool_ring_unlock(pool); > > +} > > +EXPORT_SYMBOL(page_pool_put_page_bulk); > > + > > static void page_pool_empty_ring(struct page_pool *pool) > > { > > struct page *page; > > diff --git a/net/core/xdp.c b/net/core/xdp.c > > index 93eabd789246..9f9a8d14df38 100644 > > --- a/net/core/xdp.c > > +++ b/net/core/xdp.c > > @@ -383,16 +383,11 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); > > void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) > > { > > struct xdp_mem_allocator *xa = bq->xa; > > - int i; > > > > - if (unlikely(!xa)) > > + if (unlikely(!xa || !bq->count)) > > return; > > > > - for (i = 0; i < bq->count; i++) { > > - struct page *page = virt_to_head_page(bq->q[i]); > > - > > - page_pool_put_full_page(xa->page_pool, page, false); > > - } > > + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); > > bq->count = 0; > > } > > EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); > > -- > > 2.26.2 > > > > Cheers > /Ilias >
> On Tue, Oct 27, 2020 at 08:04:08PM +0100, Lorenzo Bianconi wrote: > > Introduce the capability to batch page_pool ptr_ring refill since it is > > usually run inside the driver NAPI tx completion loop. > > > > Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > > --- > > include/net/page_pool.h | 26 ++++++++++++++++++++++++++ > > net/core/page_pool.c | 33 +++++++++++++++++++++++++++++++++ > > net/core/xdp.c | 9 ++------- > > 3 files changed, 61 insertions(+), 7 deletions(-) > > > > diff --git a/include/net/page_pool.h b/include/net/page_pool.h > > index 81d7773f96cd..b5b195305346 100644 [...] > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > > + int count) > > +{ > > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; > > + int i, len = 0; > > + > > + for (i = 0; i < count; i++) { > > + struct page *page = virt_to_head_page(data[i]); > > + > > + if (unlikely(page_ref_count(page) != 1 || > > + !pool_page_reusable(pool, page))) { > > + page_pool_release_page(pool, page); > > + put_page(page); > > + continue; > > + } > > + > > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > > + page_pool_dma_sync_for_device(pool, page, -1); > > + > > + page_ring[len++] = page; > > + } > > + > > + page_pool_ring_lock(pool); > > + for (i = 0; i < len; i++) { > > + if (__ptr_ring_produce(&pool->ring, page_ring[i])) > > + page_pool_return_page(pool, page_ring[i]); > > Can we add a comment here on why the explicit spinlock needs to protect > page_pool_return_page() as well instead of just using ptr_ring_produce()? ack, will do in v2. Regards, Lorenzo > > > + } > > + page_pool_ring_unlock(pool); > > +} > > +EXPORT_SYMBOL(page_pool_put_page_bulk); > > + > > static void page_pool_empty_ring(struct page_pool *pool) > > { > > struct page *page; > > diff --git a/net/core/xdp.c b/net/core/xdp.c > > index 93eabd789246..9f9a8d14df38 100644 > > --- a/net/core/xdp.c > > +++ b/net/core/xdp.c > > @@ -383,16 +383,11 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); > > void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) > > { > > struct xdp_mem_allocator *xa = bq->xa; > > - int i; > > > > - if (unlikely(!xa)) > > + if (unlikely(!xa || !bq->count)) > > return; > > > > - for (i = 0; i < bq->count; i++) { > > - struct page *page = virt_to_head_page(bq->q[i]); > > - > > - page_pool_put_full_page(xa->page_pool, page, false); > > - } > > + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); > > bq->count = 0; > > } > > EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); > > -- > > 2.26.2 > > > > Thanks > /Ilias >
On Thu, 29 Oct 2020 11:31:48 +0100 Lorenzo Bianconi <lorenzo.bianconi@redhat.com> wrote: > > On Tue, 27 Oct 2020 20:04:08 +0100 > > Lorenzo Bianconi <lorenzo@kernel.org> wrote: > > > > > +void page_pool_put_page_bulk(struct page_pool *pool, void **data, > > > + int count) > > > +{ > > > + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; > > > > Maybe we could reuse the 'data' array instead of creating a new array > > (2 cache-lines long) for the array of pages? > > I agree, I will try to reuse the data array for that > > > > > > + int i, len = 0; > > > + > > > + for (i = 0; i < count; i++) { > > > + struct page *page = virt_to_head_page(data[i]); > > > + > > > + if (unlikely(page_ref_count(page) != 1 || > > > + !pool_page_reusable(pool, page))) { > > > + page_pool_release_page(pool, page); > > > + put_page(page); > > > + continue; > > > + } > > > + > > > + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) > > > + page_pool_dma_sync_for_device(pool, page, -1); > > > > Here we sync the entire DMA area (-1), which have a *huge* cost for > > mvneta (especially on EspressoBin HW). For this xdp_frame->len is > > unfortunately not enough. We will need the *maximum* length touch by > > (1) CPU and (2) remote device DMA engine. DMA-TX completion knows the > > length for (2). The CPU length (1) is max of original xdp_buff size > > and xdp_frame->len, because BPF-helpers could have shrinked the size. > > (tricky part is that xdp_frame->len isn't correct in-case of header > > adjustments, thus like mvneta_run_xdp we to calc dma_sync size, and > > store this in xdp_frame, maybe via param to xdp_do_redirect). Well, not > > sure if it is too much work to transfer this info, for this use-case. > > I was thinking about that but I guess point (1) is tricky since "cpu length" > can be changed even in the middle by devmaps or cpumaps (not just in the driver > rx napi loop). I guess we can try to address this point in a subsequent series. > Agree? I agree, that this change request goes beyond this series. But it becomes harder and harder to add later when this API is getting used in more and more drivers. Looking at 1/4 is can be extended later, as you just pass down xdpf in API driver use (and then queue xdpf->data).
diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 81d7773f96cd..b5b195305346 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -152,6 +152,8 @@ struct page_pool *page_pool_create(const struct page_pool_params *params); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *)); void page_pool_release_page(struct page_pool *pool, struct page *page); +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -165,6 +167,11 @@ static inline void page_pool_release_page(struct page_pool *pool, struct page *page) { } + +static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) +{ +} #endif void page_pool_put_page(struct page_pool *pool, struct page *page, @@ -215,4 +222,23 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) if (unlikely(pool->p.nid != new_nid)) page_pool_update_nid(pool, new_nid); } + +static inline void page_pool_ring_lock(struct page_pool *pool) + __acquires(&pool->ring.producer_lock) +{ + if (in_serving_softirq()) + spin_lock(&pool->ring.producer_lock); + else + spin_lock_bh(&pool->ring.producer_lock); +} + +static inline void page_pool_ring_unlock(struct page_pool *pool) + __releases(&pool->ring.producer_lock) +{ + if (in_serving_softirq()) + spin_unlock(&pool->ring.producer_lock); + else + spin_unlock_bh(&pool->ring.producer_lock); +} + #endif /* _NET_PAGE_POOL_H */ diff --git a/net/core/page_pool.c b/net/core/page_pool.c index ef98372facf6..84fb21f8865e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -11,6 +11,8 @@ #include <linux/device.h> #include <net/page_pool.h> +#include <net/xdp.h> + #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/page-flags.h> @@ -408,6 +410,37 @@ void page_pool_put_page(struct page_pool *pool, struct page *page, } EXPORT_SYMBOL(page_pool_put_page); +void page_pool_put_page_bulk(struct page_pool *pool, void **data, + int count) +{ + struct page *page_ring[XDP_BULK_QUEUE_SIZE]; + int i, len = 0; + + for (i = 0; i < count; i++) { + struct page *page = virt_to_head_page(data[i]); + + if (unlikely(page_ref_count(page) != 1 || + !pool_page_reusable(pool, page))) { + page_pool_release_page(pool, page); + put_page(page); + continue; + } + + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, -1); + + page_ring[len++] = page; + } + + page_pool_ring_lock(pool); + for (i = 0; i < len; i++) { + if (__ptr_ring_produce(&pool->ring, page_ring[i])) + page_pool_return_page(pool, page_ring[i]); + } + page_pool_ring_unlock(pool); +} +EXPORT_SYMBOL(page_pool_put_page_bulk); + static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; diff --git a/net/core/xdp.c b/net/core/xdp.c index 93eabd789246..9f9a8d14df38 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -383,16 +383,11 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) { struct xdp_mem_allocator *xa = bq->xa; - int i; - if (unlikely(!xa)) + if (unlikely(!xa || !bq->count)) return; - for (i = 0; i < bq->count; i++) { - struct page *page = virt_to_head_page(bq->q[i]); - - page_pool_put_full_page(xa->page_pool, page, false); - } + page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); bq->count = 0; } EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk);
Introduce the capability to batch page_pool ptr_ring refill since it is usually run inside the driver NAPI tx completion loop. Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> --- include/net/page_pool.h | 26 ++++++++++++++++++++++++++ net/core/page_pool.c | 33 +++++++++++++++++++++++++++++++++ net/core/xdp.c | 9 ++------- 3 files changed, 61 insertions(+), 7 deletions(-)