Message ID | e190c03eac71b20c8407ae0fc2c399eda7835f49.1605267335.git.lorenzo@kernel.org (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | BPF |
Headers | show |
Series | xdp: introduce bulking for page_pool tx return path | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | fail | Series targets non-next tree, but doesn't contain any Fixes tags |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for net |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 7900 this patch: 7900 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | WARNING: line length of 83 exceeds 80 columns |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 8265 this patch: 8265 |
netdev/header_inline | fail | Link |
netdev/stable | success | Stable not CCed |
Lorenzo Bianconi wrote: > XDP bulk APIs introduce a defer/flush mechanism to return > pages belonging to the same xdp_mem_allocator object > (identified via the mem.id field) in bulk to optimize > I-cache and D-cache since xdp_return_frame is usually run > inside the driver NAPI tx completion loop. > The bulk queue size is set to 16 to be aligned to how > XDP_REDIRECT bulking works. The bulk is flushed when > it is full or when mem.id changes. > xdp_frame_bulk is usually stored/allocated on the function > call-stack to avoid locking penalties. > Current implementation considers only page_pool memory model. > > Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> > Co-developed-by: Jesper Dangaard Brouer <brouer@redhat.com> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > include/net/xdp.h | 17 +++++++++++++- > net/core/xdp.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 75 insertions(+), 1 deletion(-) > Acked-by: John Fastabend <john.fastabend@gmail.com>
On Fri, Nov 13, 2020 at 12:48:28PM +0100, Lorenzo Bianconi wrote: > XDP bulk APIs introduce a defer/flush mechanism to return > pages belonging to the same xdp_mem_allocator object > (identified via the mem.id field) in bulk to optimize > I-cache and D-cache since xdp_return_frame is usually run > inside the driver NAPI tx completion loop. > The bulk queue size is set to 16 to be aligned to how > XDP_REDIRECT bulking works. The bulk is flushed when > it is full or when mem.id changes. > xdp_frame_bulk is usually stored/allocated on the function > call-stack to avoid locking penalties. > Current implementation considers only page_pool memory model. > > Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com> > Co-developed-by: Jesper Dangaard Brouer <brouer@redhat.com> > Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > include/net/xdp.h | 17 +++++++++++++- > net/core/xdp.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 75 insertions(+), 1 deletion(-) > > diff --git a/include/net/xdp.h b/include/net/xdp.h > index 3814fb631d52..7d48b2ae217a 100644 > --- a/include/net/xdp.h > +++ b/include/net/xdp.h > @@ -104,6 +104,18 @@ struct xdp_frame { > struct net_device *dev_rx; /* used by cpumap */ > }; > > +#define XDP_BULK_QUEUE_SIZE 16 > +struct xdp_frame_bulk { > + int count; > + void *xa; > + void *q[XDP_BULK_QUEUE_SIZE]; > +}; > + > +static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) > +{ > + /* bq->count will be zero'ed when bq->xa gets updated */ > + bq->xa = NULL; > +} > > static inline struct skb_shared_info * > xdp_get_shared_info_from_frame(struct xdp_frame *frame) > @@ -194,6 +206,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) > void xdp_return_frame(struct xdp_frame *xdpf); > void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); > void xdp_return_buff(struct xdp_buff *xdp); > +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); > +void xdp_return_frame_bulk(struct xdp_frame *xdpf, > + struct xdp_frame_bulk *bq); > > /* When sending xdp_frame into the network stack, then there is no > * return point callback, which is needed to release e.g. DMA-mapping > @@ -245,6 +260,6 @@ bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, > void xdp_attachment_setup(struct xdp_attachment_info *info, > struct netdev_bpf *bpf); > > -#define DEV_MAP_BULK_SIZE 16 > +#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE > > #endif /* __LINUX_NET_XDP_H__ */ > diff --git a/net/core/xdp.c b/net/core/xdp.c > index 48aba933a5a8..bbaee7fdd44f 100644 > --- a/net/core/xdp.c > +++ b/net/core/xdp.c > @@ -380,6 +380,65 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) > } > EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); > > +/* XDP bulk APIs introduce a defer/flush mechanism to return > + * pages belonging to the same xdp_mem_allocator object > + * (identified via the mem.id field) in bulk to optimize > + * I-cache and D-cache. > + * The bulk queue size is set to 16 to be aligned to how > + * XDP_REDIRECT bulking works. The bulk is flushed when > + * it is full or when mem.id changes. > + * xdp_frame_bulk is usually stored/allocated on the function > + * call-stack to avoid locking penalties. > + */ > +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) > +{ > + struct xdp_mem_allocator *xa = bq->xa; > + int i; > + > + if (unlikely(!xa)) > + return; > + > + for (i = 0; i < bq->count; i++) { > + struct page *page = virt_to_head_page(bq->q[i]); > + > + page_pool_put_full_page(xa->page_pool, page, false); > + } > + /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ > + bq->count = 0; > +} > +EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); > + > +/* Must be called with rcu_read_lock held */ > +void xdp_return_frame_bulk(struct xdp_frame *xdpf, > + struct xdp_frame_bulk *bq) > +{ > + struct xdp_mem_info *mem = &xdpf->mem; > + struct xdp_mem_allocator *xa; > + > + if (mem->type != MEM_TYPE_PAGE_POOL) { > + __xdp_return(xdpf->data, &xdpf->mem, false); > + return; > + } > + > + xa = bq->xa; > + if (unlikely(!xa)) { > + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); > + bq->count = 0; > + bq->xa = xa; > + } > + > + if (bq->count == XDP_BULK_QUEUE_SIZE) > + xdp_flush_frame_bulk(bq); > + > + if (unlikely(mem->id != xa->mem.id)) { > + xdp_flush_frame_bulk(bq); > + bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); > + } > + > + bq->q[bq->count++] = xdpf->data; > +} > +EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); > + > void xdp_return_buff(struct xdp_buff *xdp) > { > __xdp_return(xdp->data, &xdp->rxq->mem, true); > -- > 2.26.2 > Could you add the changes in the Documentation as well (which can do in later) Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
diff --git a/include/net/xdp.h b/include/net/xdp.h index 3814fb631d52..7d48b2ae217a 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -104,6 +104,18 @@ struct xdp_frame { struct net_device *dev_rx; /* used by cpumap */ }; +#define XDP_BULK_QUEUE_SIZE 16 +struct xdp_frame_bulk { + int count; + void *xa; + void *q[XDP_BULK_QUEUE_SIZE]; +}; + +static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) +{ + /* bq->count will be zero'ed when bq->xa gets updated */ + bq->xa = NULL; +} static inline struct skb_shared_info * xdp_get_shared_info_from_frame(struct xdp_frame *frame) @@ -194,6 +206,9 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); +void xdp_return_frame_bulk(struct xdp_frame *xdpf, + struct xdp_frame_bulk *bq); /* When sending xdp_frame into the network stack, then there is no * return point callback, which is needed to release e.g. DMA-mapping @@ -245,6 +260,6 @@ bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf); -#define DEV_MAP_BULK_SIZE 16 +#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE #endif /* __LINUX_NET_XDP_H__ */ diff --git a/net/core/xdp.c b/net/core/xdp.c index 48aba933a5a8..bbaee7fdd44f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -380,6 +380,65 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); +/* XDP bulk APIs introduce a defer/flush mechanism to return + * pages belonging to the same xdp_mem_allocator object + * (identified via the mem.id field) in bulk to optimize + * I-cache and D-cache. + * The bulk queue size is set to 16 to be aligned to how + * XDP_REDIRECT bulking works. The bulk is flushed when + * it is full or when mem.id changes. + * xdp_frame_bulk is usually stored/allocated on the function + * call-stack to avoid locking penalties. + */ +void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) +{ + struct xdp_mem_allocator *xa = bq->xa; + int i; + + if (unlikely(!xa)) + return; + + for (i = 0; i < bq->count; i++) { + struct page *page = virt_to_head_page(bq->q[i]); + + page_pool_put_full_page(xa->page_pool, page, false); + } + /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ + bq->count = 0; +} +EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); + +/* Must be called with rcu_read_lock held */ +void xdp_return_frame_bulk(struct xdp_frame *xdpf, + struct xdp_frame_bulk *bq) +{ + struct xdp_mem_info *mem = &xdpf->mem; + struct xdp_mem_allocator *xa; + + if (mem->type != MEM_TYPE_PAGE_POOL) { + __xdp_return(xdpf->data, &xdpf->mem, false); + return; + } + + xa = bq->xa; + if (unlikely(!xa)) { + xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); + bq->count = 0; + bq->xa = xa; + } + + if (bq->count == XDP_BULK_QUEUE_SIZE) + xdp_flush_frame_bulk(bq); + + if (unlikely(mem->id != xa->mem.id)) { + xdp_flush_frame_bulk(bq); + bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); + } + + bq->q[bq->count++] = xdpf->data; +} +EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); + void xdp_return_buff(struct xdp_buff *xdp) { __xdp_return(xdp->data, &xdp->rxq->mem, true);