diff mbox series

[net-next,v5,12/19] xdp: add generic xdp_build_skb_from_buff()

Message ID 20241113152442.4000468-13-aleksander.lobakin@intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series xdp: a fistful of generic changes (+libeth_xdp) | expand

Checks

Context Check Description
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 2 maintainers not CCed: hawk@kernel.org horms@kernel.org
netdev/build_clang success Errors and warnings before: 9 this patch: 9
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 237 this patch: 237
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 68 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-11-15--21-00 (tests: 789)

Commit Message

Alexander Lobakin Nov. 13, 2024, 3:24 p.m. UTC
The code which builds an skb from an &xdp_buff keeps multiplying itself
around the drivers with almost no changes. Let's try to stop that by
adding a generic function.
Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head
using napi_build_skb() and make use of the available xdp_rxq pointer to
assign the Rx queue index. In case of PP-backed buffer, mark the skb to
be recycled, as every PP user's been switched to recycle skbs.

Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
 include/net/xdp.h |  1 +
 net/core/xdp.c    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Ido Schimmel Nov. 14, 2024, 3:06 p.m. UTC | #1
Looks good (no objections to the patch), but I have a question. See
below.

On Wed, Nov 13, 2024 at 04:24:35PM +0100, Alexander Lobakin wrote:
> The code which builds an skb from an &xdp_buff keeps multiplying itself
> around the drivers with almost no changes. Let's try to stop that by
> adding a generic function.
> Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head
> using napi_build_skb() and make use of the available xdp_rxq pointer to
> assign the Rx queue index. In case of PP-backed buffer, mark the skb to
> be recycled, as every PP user's been switched to recycle skbs.
> 
> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>

Reviewed-by: Ido Schimmel <idosch@nvidia.com>

> ---
>  include/net/xdp.h |  1 +
>  net/core/xdp.c    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 4c19042adf80..b0a25b7060ff 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -330,6 +330,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
>  void xdp_warn(const char *msg, const char *func, const int line);
>  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
>  
> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
>  struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
>  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
>  					   struct sk_buff *skb,
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index b1b426a9b146..3a9a3c14b080 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -624,6 +624,61 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
>  }
>  EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
>  
> +/**
> + * xdp_build_skb_from_buff - create an skb from an &xdp_buff
> + * @xdp: &xdp_buff to convert to an skb
> + *
> + * Perform common operations to create a new skb to pass up the stack from
> + * an &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
> + * skb data pointers and offsets, set the recycle bit if the buff is PP-backed,
> + * Rx queue index, protocol and update frags info.
> + *
> + * Return: new &sk_buff on success, %NULL on error.
> + */
> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
> +{
> +	const struct xdp_rxq_info *rxq = xdp->rxq;
> +	const struct skb_shared_info *sinfo;
> +	struct sk_buff *skb;
> +	u32 nr_frags = 0;
> +	int metalen;
> +
> +	if (unlikely(xdp_buff_has_frags(xdp))) {
> +		sinfo = xdp_get_shared_info_from_buff(xdp);
> +		nr_frags = sinfo->nr_frags;
> +	}
> +
> +	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
> +	if (unlikely(!skb))
> +		return NULL;
> +
> +	skb_reserve(skb, xdp->data - xdp->data_hard_start);
> +	__skb_put(skb, xdp->data_end - xdp->data);
> +
> +	metalen = xdp->data - xdp->data_meta;
> +	if (metalen > 0)
> +		skb_metadata_set(skb, metalen);
> +
> +	if (is_page_pool_compiled_in() && rxq->mem.type == MEM_TYPE_PAGE_POOL)
> +		skb_mark_for_recycle(skb);
> +
> +	skb_record_rx_queue(skb, rxq->queue_index);
> +
> +	if (unlikely(nr_frags)) {
> +		u32 tsize;
> +
> +		tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
> +		xdp_update_skb_shared_info(skb, nr_frags,
> +					   sinfo->xdp_frags_size, tsize,
> +					   xdp_buff_is_frag_pfmemalloc(xdp));
> +	}
> +
> +	skb->protocol = eth_type_trans(skb, rxq->dev);

The device we are working with has more ports (net devices) than Rx
queues, so each queue can receive packets from different net devices.
Currently, each Rx queue has its own NAPI instance and its own page
pool. All the Rx NAPI instances are initialized using the same dummy net
device which is allocated using alloc_netdev_dummy().

What are our options with regards to the XDP Rx queue info structure? As
evident by this patch, it does not seem valid to register one such
structure per Rx queue and pass the dummy net device. Would it be valid
to register one such structure per port (net device) and pass zero for
the queue index and NAPI ID?

To be clear, I understand it is not a common use case.

Thanks

> +
> +	return skb;
> +}
> +EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
> +
>  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
>  					   struct sk_buff *skb,
>  					   struct net_device *dev)
> -- 
> 2.47.0
> 
>
Ido Schimmel Nov. 14, 2024, 3:16 p.m. UTC | #2
On Thu, Nov 14, 2024 at 05:06:06PM +0200, Ido Schimmel wrote:
> Looks good (no objections to the patch), but I have a question. See
> below.
> 
> On Wed, Nov 13, 2024 at 04:24:35PM +0100, Alexander Lobakin wrote:
> > The code which builds an skb from an &xdp_buff keeps multiplying itself
> > around the drivers with almost no changes. Let's try to stop that by
> > adding a generic function.
> > Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head
> > using napi_build_skb() and make use of the available xdp_rxq pointer to
> > assign the Rx queue index. In case of PP-backed buffer, mark the skb to
> > be recycled, as every PP user's been switched to recycle skbs.
> > 
> > Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
> > Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> 
> Reviewed-by: Ido Schimmel <idosch@nvidia.com>
> 
> > ---
> >  include/net/xdp.h |  1 +
> >  net/core/xdp.c    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 56 insertions(+)
> > 
> > diff --git a/include/net/xdp.h b/include/net/xdp.h
> > index 4c19042adf80..b0a25b7060ff 100644
> > --- a/include/net/xdp.h
> > +++ b/include/net/xdp.h
> > @@ -330,6 +330,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
> >  void xdp_warn(const char *msg, const char *func, const int line);
> >  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
> >  
> > +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
> >  struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
> >  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
> >  					   struct sk_buff *skb,
> > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > index b1b426a9b146..3a9a3c14b080 100644
> > --- a/net/core/xdp.c
> > +++ b/net/core/xdp.c
> > @@ -624,6 +624,61 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
> >  }
> >  EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
> >  
> > +/**
> > + * xdp_build_skb_from_buff - create an skb from an &xdp_buff
> > + * @xdp: &xdp_buff to convert to an skb
> > + *
> > + * Perform common operations to create a new skb to pass up the stack from
> > + * an &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
> > + * skb data pointers and offsets, set the recycle bit if the buff is PP-backed,
> > + * Rx queue index, protocol and update frags info.
> > + *
> > + * Return: new &sk_buff on success, %NULL on error.
> > + */
> > +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
> > +{
> > +	const struct xdp_rxq_info *rxq = xdp->rxq;
> > +	const struct skb_shared_info *sinfo;
> > +	struct sk_buff *skb;
> > +	u32 nr_frags = 0;
> > +	int metalen;
> > +
> > +	if (unlikely(xdp_buff_has_frags(xdp))) {
> > +		sinfo = xdp_get_shared_info_from_buff(xdp);
> > +		nr_frags = sinfo->nr_frags;
> > +	}
> > +
> > +	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
> > +	if (unlikely(!skb))
> > +		return NULL;
> > +
> > +	skb_reserve(skb, xdp->data - xdp->data_hard_start);
> > +	__skb_put(skb, xdp->data_end - xdp->data);
> > +
> > +	metalen = xdp->data - xdp->data_meta;
> > +	if (metalen > 0)
> > +		skb_metadata_set(skb, metalen);
> > +
> > +	if (is_page_pool_compiled_in() && rxq->mem.type == MEM_TYPE_PAGE_POOL)
> > +		skb_mark_for_recycle(skb);
> > +
> > +	skb_record_rx_queue(skb, rxq->queue_index);
> > +
> > +	if (unlikely(nr_frags)) {
> > +		u32 tsize;
> > +
> > +		tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
> > +		xdp_update_skb_shared_info(skb, nr_frags,
> > +					   sinfo->xdp_frags_size, tsize,
> > +					   xdp_buff_is_frag_pfmemalloc(xdp));
> > +	}
> > +
> > +	skb->protocol = eth_type_trans(skb, rxq->dev);
> 
> The device we are working with has more ports (net devices) than Rx
> queues, so each queue can receive packets from different net devices.
> Currently, each Rx queue has its own NAPI instance and its own page
> pool. All the Rx NAPI instances are initialized using the same dummy net
> device which is allocated using alloc_netdev_dummy().
> 
> What are our options with regards to the XDP Rx queue info structure? As
> evident by this patch, it does not seem valid to register one such
> structure per Rx queue and pass the dummy net device. Would it be valid
> to register one such structure per port (net device) and pass zero for
> the queue index and NAPI ID?

Actually, this does not seem to be valid either as we need to associate
an XDP Rx queue info with the correct page pool :/

> 
> To be clear, I understand it is not a common use case.
> 
> Thanks
> 
> > +
> > +	return skb;
> > +}
> > +EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
> > +
> >  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
> >  					   struct sk_buff *skb,
> >  					   struct net_device *dev)
> > -- 
> > 2.47.0
> > 
> >
Alexander Lobakin Nov. 15, 2024, 2:34 p.m. UTC | #3
From: Ido Schimmel <idosch@idosch.org>
Date: Thu, 14 Nov 2024 17:16:44 +0200

> On Thu, Nov 14, 2024 at 05:06:06PM +0200, Ido Schimmel wrote:
>> Looks good (no objections to the patch), but I have a question. See
>> below.
>>
>> On Wed, Nov 13, 2024 at 04:24:35PM +0100, Alexander Lobakin wrote:
>>> The code which builds an skb from an &xdp_buff keeps multiplying itself
>>> around the drivers with almost no changes. Let's try to stop that by
>>> adding a generic function.
>>> Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head
>>> using napi_build_skb() and make use of the available xdp_rxq pointer to
>>> assign the Rx queue index. In case of PP-backed buffer, mark the skb to
>>> be recycled, as every PP user's been switched to recycle skbs.
>>>
>>> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
>>> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>>
>> Reviewed-by: Ido Schimmel <idosch@nvidia.com>
>>
>>> ---
>>>  include/net/xdp.h |  1 +
>>>  net/core/xdp.c    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
>>>  2 files changed, 56 insertions(+)
>>>
>>> diff --git a/include/net/xdp.h b/include/net/xdp.h
>>> index 4c19042adf80..b0a25b7060ff 100644
>>> --- a/include/net/xdp.h
>>> +++ b/include/net/xdp.h
>>> @@ -330,6 +330,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
>>>  void xdp_warn(const char *msg, const char *func, const int line);
>>>  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
>>>  
>>> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
>>>  struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
>>>  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
>>>  					   struct sk_buff *skb,
>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>> index b1b426a9b146..3a9a3c14b080 100644
>>> --- a/net/core/xdp.c
>>> +++ b/net/core/xdp.c
>>> @@ -624,6 +624,61 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
>>>  }
>>>  EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
>>>  
>>> +/**
>>> + * xdp_build_skb_from_buff - create an skb from an &xdp_buff
>>> + * @xdp: &xdp_buff to convert to an skb
>>> + *
>>> + * Perform common operations to create a new skb to pass up the stack from
>>> + * an &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
>>> + * skb data pointers and offsets, set the recycle bit if the buff is PP-backed,
>>> + * Rx queue index, protocol and update frags info.
>>> + *
>>> + * Return: new &sk_buff on success, %NULL on error.
>>> + */
>>> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
>>> +{
>>> +	const struct xdp_rxq_info *rxq = xdp->rxq;
>>> +	const struct skb_shared_info *sinfo;
>>> +	struct sk_buff *skb;
>>> +	u32 nr_frags = 0;
>>> +	int metalen;
>>> +
>>> +	if (unlikely(xdp_buff_has_frags(xdp))) {
>>> +		sinfo = xdp_get_shared_info_from_buff(xdp);
>>> +		nr_frags = sinfo->nr_frags;
>>> +	}
>>> +
>>> +	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
>>> +	if (unlikely(!skb))
>>> +		return NULL;
>>> +
>>> +	skb_reserve(skb, xdp->data - xdp->data_hard_start);
>>> +	__skb_put(skb, xdp->data_end - xdp->data);
>>> +
>>> +	metalen = xdp->data - xdp->data_meta;
>>> +	if (metalen > 0)
>>> +		skb_metadata_set(skb, metalen);
>>> +
>>> +	if (is_page_pool_compiled_in() && rxq->mem.type == MEM_TYPE_PAGE_POOL)
>>> +		skb_mark_for_recycle(skb);
>>> +
>>> +	skb_record_rx_queue(skb, rxq->queue_index);
>>> +
>>> +	if (unlikely(nr_frags)) {
>>> +		u32 tsize;
>>> +
>>> +		tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
>>> +		xdp_update_skb_shared_info(skb, nr_frags,
>>> +					   sinfo->xdp_frags_size, tsize,
>>> +					   xdp_buff_is_frag_pfmemalloc(xdp));
>>> +	}
>>> +
>>> +	skb->protocol = eth_type_trans(skb, rxq->dev);
>>
>> The device we are working with has more ports (net devices) than Rx
>> queues, so each queue can receive packets from different net devices.
>> Currently, each Rx queue has its own NAPI instance and its own page
>> pool. All the Rx NAPI instances are initialized using the same dummy net
>> device which is allocated using alloc_netdev_dummy().
>>
>> What are our options with regards to the XDP Rx queue info structure? As
>> evident by this patch, it does not seem valid to register one such
>> structure per Rx queue and pass the dummy net device. Would it be valid
>> to register one such structure per port (net device) and pass zero for
>> the queue index and NAPI ID?
> 
> Actually, this does not seem to be valid either as we need to associate
> an XDP Rx queue info with the correct page pool :/

Right.
But I'd say, this assoc slowly becomes redundant. For example, idpf has
up to 4 page_pools per queue and I only pass 1 of them to rxq_info as
there are no other options. Regardless, its frames get processed
correctly thanks to that we have struct page::pp pointer + patch 9 from
this series which teaches put_page_bulk() to handle mixed bulks.

Regarding your usecase -- after calling this function, you are free to
overwrite any skb fields as this helper doesn't pass it up the stack.
For example, in ice driver we have port reps and sometimes we need to
pass a different net_device, not the one saved in rxq_info. So when
switching to this function, we'll do eth_type_trans() once again (it's
either way under unlikely() in our code as it's swichdev slowpath).
Same for the queue number in rxq_info.

> 
>>
>> To be clear, I understand it is not a common use case.
>>
>> Thanks

Thanks,
Olek
Amit Cohen Nov. 17, 2024, 12:42 p.m. UTC | #4
> -----Original Message-----
> From: Alexander Lobakin <aleksander.lobakin@intel.com>
> Sent: Friday, 15 November 2024 16:35
> To: Ido Schimmel <idosch@idosch.org>
> Cc: David S. Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
> <pabeni@redhat.com>; Toke Høiland-Jørgensen <toke@redhat.com>; Alexei Starovoitov <ast@kernel.org>; Daniel Borkmann
> <daniel@iogearbox.net>; John Fastabend <john.fastabend@gmail.com>; Andrii Nakryiko <andrii@kernel.org>; Maciej Fijalkowski
> <maciej.fijalkowski@intel.com>; Stanislav Fomichev <sdf@fomichev.me>; Magnus Karlsson <magnus.karlsson@intel.com>;
> nex.sw.ncis.osdt.itp.upstreaming@intel.com; bpf@vger.kernel.org; netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next v5 12/19] xdp: add generic xdp_build_skb_from_buff()
> 
> From: Ido Schimmel <idosch@idosch.org>
> Date: Thu, 14 Nov 2024 17:16:44 +0200
> 
> > On Thu, Nov 14, 2024 at 05:06:06PM +0200, Ido Schimmel wrote:
> >> Looks good (no objections to the patch), but I have a question. See
> >> below.
> >>
> >> On Wed, Nov 13, 2024 at 04:24:35PM +0100, Alexander Lobakin wrote:
> >>> The code which builds an skb from an &xdp_buff keeps multiplying itself
> >>> around the drivers with almost no changes. Let's try to stop that by
> >>> adding a generic function.
> >>> Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head
> >>> using napi_build_skb() and make use of the available xdp_rxq pointer to
> >>> assign the Rx queue index. In case of PP-backed buffer, mark the skb to
> >>> be recycled, as every PP user's been switched to recycle skbs.
> >>>
> >>> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
> >>> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> >>
> >> Reviewed-by: Ido Schimmel <idosch@nvidia.com>
> >>
> >>> ---
> >>>  include/net/xdp.h |  1 +
> >>>  net/core/xdp.c    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
> >>>  2 files changed, 56 insertions(+)
> >>>
> >>> diff --git a/include/net/xdp.h b/include/net/xdp.h
> >>> index 4c19042adf80..b0a25b7060ff 100644
> >>> --- a/include/net/xdp.h
> >>> +++ b/include/net/xdp.h
> >>> @@ -330,6 +330,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
> >>>  void xdp_warn(const char *msg, const char *func, const int line);
> >>>  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
> >>>
> >>> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
> >>>  struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
> >>>  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
> >>>  					   struct sk_buff *skb,
> >>> diff --git a/net/core/xdp.c b/net/core/xdp.c
> >>> index b1b426a9b146..3a9a3c14b080 100644
> >>> --- a/net/core/xdp.c
> >>> +++ b/net/core/xdp.c
> >>> @@ -624,6 +624,61 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
> >>>
> >>> +/**
> >>> + * xdp_build_skb_from_buff - create an skb from an &xdp_buff
> >>> + * @xdp: &xdp_buff to convert to an skb
> >>> + *
> >>> + * Perform common operations to create a new skb to pass up the stack from
> >>> + * an &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
> >>> + * skb data pointers and offsets, set the recycle bit if the buff is PP-backed,
> >>> + * Rx queue index, protocol and update frags info.
> >>> + *
> >>> + * Return: new &sk_buff on success, %NULL on error.
> >>> + */
> >>> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
> >>> +{
> >>> +	const struct xdp_rxq_info *rxq = xdp->rxq;
> >>> +	const struct skb_shared_info *sinfo;
> >>> +	struct sk_buff *skb;
> >>> +	u32 nr_frags = 0;
> >>> +	int metalen;
> >>> +
> >>> +	if (unlikely(xdp_buff_has_frags(xdp))) {
> >>> +		sinfo = xdp_get_shared_info_from_buff(xdp);
> >>> +		nr_frags = sinfo->nr_frags;
> >>> +	}
> >>> +
> >>> +	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
> >>> +	if (unlikely(!skb))
> >>> +		return NULL;
> >>> +
> >>> +	skb_reserve(skb, xdp->data - xdp->data_hard_start);
> >>> +	__skb_put(skb, xdp->data_end - xdp->data);
> >>> +
> >>> +	metalen = xdp->data - xdp->data_meta;
> >>> +	if (metalen > 0)
> >>> +		skb_metadata_set(skb, metalen);
> >>> +
> >>> +	if (is_page_pool_compiled_in() && rxq->mem.type == MEM_TYPE_PAGE_POOL)
> >>> +		skb_mark_for_recycle(skb);
> >>> +
> >>> +	skb_record_rx_queue(skb, rxq->queue_index);
> >>> +
> >>> +	if (unlikely(nr_frags)) {
> >>> +		u32 tsize;
> >>> +
> >>> +		tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
> >>> +		xdp_update_skb_shared_info(skb, nr_frags,
> >>> +					   sinfo->xdp_frags_size, tsize,
> >>> +					   xdp_buff_is_frag_pfmemalloc(xdp));
> >>> +	}
> >>> +
> >>> +	skb->protocol = eth_type_trans(skb, rxq->dev);
> >>
> >> The device we are working with has more ports (net devices) than Rx
> >> queues, so each queue can receive packets from different net devices.
> >> Currently, each Rx queue has its own NAPI instance and its own page
> >> pool. All the Rx NAPI instances are initialized using the same dummy net
> >> device which is allocated using alloc_netdev_dummy().
> >>
> >> What are our options with regards to the XDP Rx queue info structure? As
> >> evident by this patch, it does not seem valid to register one such
> >> structure per Rx queue and pass the dummy net device. Would it be valid
> >> to register one such structure per port (net device) and pass zero for
> >> the queue index and NAPI ID?
> >
> > Actually, this does not seem to be valid either as we need to associate
> > an XDP Rx queue info with the correct page pool :/
> 
> Right.
> But I'd say, this assoc slowly becomes redundant. For example, idpf has
> up to 4 page_pools per queue and I only pass 1 of them to rxq_info as
> there are no other options. Regardless, its frames get processed
> correctly thanks to that we have struct page::pp pointer + patch 9 from
> this series which teaches put_page_bulk() to handle mixed bulks.
> 
> Regarding your usecase -- after calling this function, you are free to
> overwrite any skb fields as this helper doesn't pass it up the stack.
> For example, in ice driver we have port reps and sometimes we need to
> pass a different net_device, not the one saved in rxq_info. So when
> switching to this function, we'll do eth_type_trans() once again (it's
> either way under unlikely() in our code as it's swichdev slowpath).
> Same for the queue number in rxq_info.

With this series, maintaining 'struct xdp_mem_allocator' in hash-table looks unnecessary.
If so, xdp_reg_mem_model() does not need 'allocator' when mem_type is Page-Pool.

Is there a reason for not removing 'mem_id_ht'? With this patch, the nodes are no longer used.

> 
> >
> >>
> >> To be clear, I understand it is not a common use case.
> >>
> >> Thanks
> 
> Thanks,
> Olek
Alexander Lobakin Nov. 19, 2024, 12:05 p.m. UTC | #5
From: Amit Cohen <amcohen@nvidia.com>
Date: Sun, 17 Nov 2024 12:42:11 +0000

> 
> 
>> -----Original Message-----
>> From: Alexander Lobakin <aleksander.lobakin@intel.com>
>> Sent: Friday, 15 November 2024 16:35
>> To: Ido Schimmel <idosch@idosch.org>
>> Cc: David S. Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
>> <pabeni@redhat.com>; Toke Høiland-Jørgensen <toke@redhat.com>; Alexei Starovoitov <ast@kernel.org>; Daniel Borkmann
>> <daniel@iogearbox.net>; John Fastabend <john.fastabend@gmail.com>; Andrii Nakryiko <andrii@kernel.org>; Maciej Fijalkowski
>> <maciej.fijalkowski@intel.com>; Stanislav Fomichev <sdf@fomichev.me>; Magnus Karlsson <magnus.karlsson@intel.com>;
>> nex.sw.ncis.osdt.itp.upstreaming@intel.com; bpf@vger.kernel.org; netdev@vger.kernel.org; linux-kernel@vger.kernel.org
>> Subject: Re: [PATCH net-next v5 12/19] xdp: add generic xdp_build_skb_from_buff()
>>
>> From: Ido Schimmel <idosch@idosch.org>
>> Date: Thu, 14 Nov 2024 17:16:44 +0200
>>
>>> On Thu, Nov 14, 2024 at 05:06:06PM +0200, Ido Schimmel wrote:
>>>> Looks good (no objections to the patch), but I have a question. See
>>>> below.
>>>>
>>>> On Wed, Nov 13, 2024 at 04:24:35PM +0100, Alexander Lobakin wrote:
>>>>> The code which builds an skb from an &xdp_buff keeps multiplying itself
>>>>> around the drivers with almost no changes. Let's try to stop that by
>>>>> adding a generic function.
>>>>> Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head
>>>>> using napi_build_skb() and make use of the available xdp_rxq pointer to
>>>>> assign the Rx queue index. In case of PP-backed buffer, mark the skb to
>>>>> be recycled, as every PP user's been switched to recycle skbs.
>>>>>
>>>>> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
>>>>> Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>>>>
>>>> Reviewed-by: Ido Schimmel <idosch@nvidia.com>
>>>>
>>>>> ---
>>>>>  include/net/xdp.h |  1 +
>>>>>  net/core/xdp.c    | 55 +++++++++++++++++++++++++++++++++++++++++++++++
>>>>>  2 files changed, 56 insertions(+)
>>>>>
>>>>> diff --git a/include/net/xdp.h b/include/net/xdp.h
>>>>> index 4c19042adf80..b0a25b7060ff 100644
>>>>> --- a/include/net/xdp.h
>>>>> +++ b/include/net/xdp.h
>>>>> @@ -330,6 +330,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
>>>>>  void xdp_warn(const char *msg, const char *func, const int line);
>>>>>  #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
>>>>>
>>>>> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
>>>>>  struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
>>>>>  struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
>>>>>  					   struct sk_buff *skb,
>>>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>>>> index b1b426a9b146..3a9a3c14b080 100644
>>>>> --- a/net/core/xdp.c
>>>>> +++ b/net/core/xdp.c
>>>>> @@ -624,6 +624,61 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
>>>>>  }
>>>>>  EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
>>>>>
>>>>> +/**
>>>>> + * xdp_build_skb_from_buff - create an skb from an &xdp_buff
>>>>> + * @xdp: &xdp_buff to convert to an skb
>>>>> + *
>>>>> + * Perform common operations to create a new skb to pass up the stack from
>>>>> + * an &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
>>>>> + * skb data pointers and offsets, set the recycle bit if the buff is PP-backed,
>>>>> + * Rx queue index, protocol and update frags info.
>>>>> + *
>>>>> + * Return: new &sk_buff on success, %NULL on error.
>>>>> + */
>>>>> +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
>>>>> +{
>>>>> +	const struct xdp_rxq_info *rxq = xdp->rxq;
>>>>> +	const struct skb_shared_info *sinfo;
>>>>> +	struct sk_buff *skb;
>>>>> +	u32 nr_frags = 0;
>>>>> +	int metalen;
>>>>> +
>>>>> +	if (unlikely(xdp_buff_has_frags(xdp))) {
>>>>> +		sinfo = xdp_get_shared_info_from_buff(xdp);
>>>>> +		nr_frags = sinfo->nr_frags;
>>>>> +	}
>>>>> +
>>>>> +	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
>>>>> +	if (unlikely(!skb))
>>>>> +		return NULL;
>>>>> +
>>>>> +	skb_reserve(skb, xdp->data - xdp->data_hard_start);
>>>>> +	__skb_put(skb, xdp->data_end - xdp->data);
>>>>> +
>>>>> +	metalen = xdp->data - xdp->data_meta;
>>>>> +	if (metalen > 0)
>>>>> +		skb_metadata_set(skb, metalen);
>>>>> +
>>>>> +	if (is_page_pool_compiled_in() && rxq->mem.type == MEM_TYPE_PAGE_POOL)
>>>>> +		skb_mark_for_recycle(skb);
>>>>> +
>>>>> +	skb_record_rx_queue(skb, rxq->queue_index);
>>>>> +
>>>>> +	if (unlikely(nr_frags)) {
>>>>> +		u32 tsize;
>>>>> +
>>>>> +		tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
>>>>> +		xdp_update_skb_shared_info(skb, nr_frags,
>>>>> +					   sinfo->xdp_frags_size, tsize,
>>>>> +					   xdp_buff_is_frag_pfmemalloc(xdp));
>>>>> +	}
>>>>> +
>>>>> +	skb->protocol = eth_type_trans(skb, rxq->dev);
>>>>
>>>> The device we are working with has more ports (net devices) than Rx
>>>> queues, so each queue can receive packets from different net devices.
>>>> Currently, each Rx queue has its own NAPI instance and its own page
>>>> pool. All the Rx NAPI instances are initialized using the same dummy net
>>>> device which is allocated using alloc_netdev_dummy().
>>>>
>>>> What are our options with regards to the XDP Rx queue info structure? As
>>>> evident by this patch, it does not seem valid to register one such
>>>> structure per Rx queue and pass the dummy net device. Would it be valid
>>>> to register one such structure per port (net device) and pass zero for
>>>> the queue index and NAPI ID?
>>>
>>> Actually, this does not seem to be valid either as we need to associate
>>> an XDP Rx queue info with the correct page pool :/
>>
>> Right.
>> But I'd say, this assoc slowly becomes redundant. For example, idpf has
>> up to 4 page_pools per queue and I only pass 1 of them to rxq_info as
>> there are no other options. Regardless, its frames get processed
>> correctly thanks to that we have struct page::pp pointer + patch 9 from
>> this series which teaches put_page_bulk() to handle mixed bulks.
>>
>> Regarding your usecase -- after calling this function, you are free to
>> overwrite any skb fields as this helper doesn't pass it up the stack.
>> For example, in ice driver we have port reps and sometimes we need to
>> pass a different net_device, not the one saved in rxq_info. So when
>> switching to this function, we'll do eth_type_trans() once again (it's
>> either way under unlikely() in our code as it's swichdev slowpath).
>> Same for the queue number in rxq_info.
> 
> With this series, maintaining 'struct xdp_mem_allocator' in hash-table looks unnecessary.
> If so, xdp_reg_mem_model() does not need 'allocator' when mem_type is Page-Pool.
> 
> Is there a reason for not removing 'mem_id_ht'? With this patch, the nodes are no longer used.

Let me review this once again since I need to rebase it anyway.
Maybe we really could drop more code.

Thanks,
Olek
diff mbox series

Patch

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 4c19042adf80..b0a25b7060ff 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -330,6 +330,7 @@  xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags,
 void xdp_warn(const char *msg, const char *func, const int line);
 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
 
+struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp);
 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 					   struct sk_buff *skb,
diff --git a/net/core/xdp.c b/net/core/xdp.c
index b1b426a9b146..3a9a3c14b080 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -624,6 +624,61 @@  int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk);
 
+/**
+ * xdp_build_skb_from_buff - create an skb from an &xdp_buff
+ * @xdp: &xdp_buff to convert to an skb
+ *
+ * Perform common operations to create a new skb to pass up the stack from
+ * an &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize
+ * skb data pointers and offsets, set the recycle bit if the buff is PP-backed,
+ * Rx queue index, protocol and update frags info.
+ *
+ * Return: new &sk_buff on success, %NULL on error.
+ */
+struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp)
+{
+	const struct xdp_rxq_info *rxq = xdp->rxq;
+	const struct skb_shared_info *sinfo;
+	struct sk_buff *skb;
+	u32 nr_frags = 0;
+	int metalen;
+
+	if (unlikely(xdp_buff_has_frags(xdp))) {
+		sinfo = xdp_get_shared_info_from_buff(xdp);
+		nr_frags = sinfo->nr_frags;
+	}
+
+	skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
+	if (unlikely(!skb))
+		return NULL;
+
+	skb_reserve(skb, xdp->data - xdp->data_hard_start);
+	__skb_put(skb, xdp->data_end - xdp->data);
+
+	metalen = xdp->data - xdp->data_meta;
+	if (metalen > 0)
+		skb_metadata_set(skb, metalen);
+
+	if (is_page_pool_compiled_in() && rxq->mem.type == MEM_TYPE_PAGE_POOL)
+		skb_mark_for_recycle(skb);
+
+	skb_record_rx_queue(skb, rxq->queue_index);
+
+	if (unlikely(nr_frags)) {
+		u32 tsize;
+
+		tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz;
+		xdp_update_skb_shared_info(skb, nr_frags,
+					   sinfo->xdp_frags_size, tsize,
+					   xdp_buff_is_frag_pfmemalloc(xdp));
+	}
+
+	skb->protocol = eth_type_trans(skb, rxq->dev);
+
+	return skb;
+}
+EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff);
+
 struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 					   struct sk_buff *skb,
 					   struct net_device *dev)