diff mbox series

[bpf-next,01/21] xsk: prepare 'options' in xdp_desc for multi-buffer use

Message ID 20230518180545.159100-2-maciej.fijalkowski@intel.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series xsk: multi-buffer support | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-29 success Logs for veristat
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_parallel on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on s390x with gcc
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 35 this patch: 35
netdev/cc_maintainers warning 7 maintainers not CCed: kuba@kernel.org hawk@kernel.org john.fastabend@gmail.com davem@davemloft.net jonathan.lemon@gmail.com pabeni@redhat.com edumazet@google.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 35 this patch: 35
netdev/checkpatch warning WARNING: line length of 86 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-6 success Logs for set-matrix

Commit Message

Fijalkowski, Maciej May 18, 2023, 6:05 p.m. UTC
From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>

Use the 'options' field in xdp_desc as a packet continuity marker. Since
'options' field was unused till now and was expected to be set to 0, the
'eop' descriptor will have it set to 0, while the non-eop descriptors
will have to set it to 1. This ensures legacy applications continue to
work without needing any change for single-buffer packets.

Add helper functions and extend xskq_prod_reserve_desc() to use the
'options' field.

Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
---
 include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
 net/xdp/xsk.c               |  8 ++++----
 net/xdp/xsk_queue.h         | 12 +++++++++---
 3 files changed, 29 insertions(+), 7 deletions(-)

Comments

Stanislav Fomichev May 18, 2023, 7:22 p.m. UTC | #1
On 05/18, Maciej Fijalkowski wrote:
> From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> 
> Use the 'options' field in xdp_desc as a packet continuity marker. Since
> 'options' field was unused till now and was expected to be set to 0, the
> 'eop' descriptor will have it set to 0, while the non-eop descriptors
> will have to set it to 1. This ensures legacy applications continue to
> work without needing any change for single-buffer packets.
> 
> Add helper functions and extend xskq_prod_reserve_desc() to use the
> 'options' field.
> 
> Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> ---
>  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
>  net/xdp/xsk.c               |  8 ++++----
>  net/xdp/xsk_queue.h         | 12 +++++++++---
>  3 files changed, 29 insertions(+), 7 deletions(-)
> 
> diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> index a78a8096f4ce..4acc3a9430f3 100644
> --- a/include/uapi/linux/if_xdp.h
> +++ b/include/uapi/linux/if_xdp.h
> @@ -108,4 +108,20 @@ struct xdp_desc {
>  
>  /* UMEM descriptor is __u64 */
>  
> +/* Flag indicating that the packet continues with the buffer pointed out by the
> + * next frame in the ring. The end of the packet is signalled by setting this
> + * bit to zero. For single buffer packets, every descriptor has 'options' set
> + * to 0 and this maintains backward compatibility.
> + */
> +#define XDP_PKT_CONTD (1 << 0)
> +
> +/* Maximum number of descriptors supported as frags for a packet. So the total
> + * number of descriptors supported for a packet is XSK_DESC_MAX_FRAGS + 1. The
> + * max frags supported by skb is 16 for page sizes greater than 4K and 17 or

This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
directly?
Alexei Starovoitov May 19, 2023, 5:13 p.m. UTC | #2
On Thu, May 18, 2023 at 12:22 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> On 05/18, Maciej Fijalkowski wrote:
> > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> >
> > Use the 'options' field in xdp_desc as a packet continuity marker. Since
> > 'options' field was unused till now and was expected to be set to 0, the
> > 'eop' descriptor will have it set to 0, while the non-eop descriptors
> > will have to set it to 1. This ensures legacy applications continue to
> > work without needing any change for single-buffer packets.
> >
> > Add helper functions and extend xskq_prod_reserve_desc() to use the
> > 'options' field.
> >
> > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > ---
> >  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
> >  net/xdp/xsk.c               |  8 ++++----
> >  net/xdp/xsk_queue.h         | 12 +++++++++---
> >  3 files changed, 29 insertions(+), 7 deletions(-)
> >
> > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> > index a78a8096f4ce..4acc3a9430f3 100644
> > --- a/include/uapi/linux/if_xdp.h
> > +++ b/include/uapi/linux/if_xdp.h
> > @@ -108,4 +108,20 @@ struct xdp_desc {
> >
> >  /* UMEM descriptor is __u64 */
> >
> > +/* Flag indicating that the packet continues with the buffer pointed out by the
> > + * next frame in the ring. The end of the packet is signalled by setting this
> > + * bit to zero. For single buffer packets, every descriptor has 'options' set
> > + * to 0 and this maintains backward compatibility.
> > + */
> > +#define XDP_PKT_CONTD (1 << 0)
> > +
> > +/* Maximum number of descriptors supported as frags for a packet. So the total
> > + * number of descriptors supported for a packet is XSK_DESC_MAX_FRAGS + 1. The
> > + * max frags supported by skb is 16 for page sizes greater than 4K and 17 or
>
> This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
> directly?

Also it doesn't look right to expose kernel internal config in uapi
especially since XSK_DESC_MAX_FRAGS is not guaranteed to be 16.
Tirthendu Sarkar May 24, 2023, 8:56 a.m. UTC | #3
> -----Original Message-----
> From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> Sent: Friday, May 19, 2023 10:44 PM
> To: Stanislav Fomichev <sdf@google.com>
> Cc: Fijalkowski, Maciej <maciej.fijalkowski@intel.com>; bpf
> <bpf@vger.kernel.org>; Alexei Starovoitov <ast@kernel.org>; Daniel
> Borkmann <daniel@iogearbox.net>; Andrii Nakryiko <andrii@kernel.org>;
> Network Development <netdev@vger.kernel.org>; Karlsson, Magnus
> <magnus.karlsson@intel.com>; Sarkar, Tirthendu
> <tirthendu.sarkar@intel.com>; Björn Töpel <bjorn@kernel.org>
> Subject: Re: [PATCH bpf-next 01/21] xsk: prepare 'options' in xdp_desc for
> multi-buffer use
> 
> On Thu, May 18, 2023 at 12:22 PM Stanislav Fomichev <sdf@google.com>
> wrote:
> >
> > On 05/18, Maciej Fijalkowski wrote:
> > > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > >
> > > Use the 'options' field in xdp_desc as a packet continuity marker. Since
> > > 'options' field was unused till now and was expected to be set to 0, the
> > > 'eop' descriptor will have it set to 0, while the non-eop descriptors
> > > will have to set it to 1. This ensures legacy applications continue to
> > > work without needing any change for single-buffer packets.
> > >
> > > Add helper functions and extend xskq_prod_reserve_desc() to use the
> > > 'options' field.
> > >
> > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > ---
> > >  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
> > >  net/xdp/xsk.c               |  8 ++++----
> > >  net/xdp/xsk_queue.h         | 12 +++++++++---
> > >  3 files changed, 29 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> > > index a78a8096f4ce..4acc3a9430f3 100644
> > > --- a/include/uapi/linux/if_xdp.h
> > > +++ b/include/uapi/linux/if_xdp.h
> > > @@ -108,4 +108,20 @@ struct xdp_desc {
> > >
> > >  /* UMEM descriptor is __u64 */
> > >
> > > +/* Flag indicating that the packet continues with the buffer pointed out
> by the
> > > + * next frame in the ring. The end of the packet is signalled by setting
> this
> > > + * bit to zero. For single buffer packets, every descriptor has 'options'
> set
> > > + * to 0 and this maintains backward compatibility.
> > > + */
> > > +#define XDP_PKT_CONTD (1 << 0)
> > > +
> > > +/* Maximum number of descriptors supported as frags for a packet. So
> the total
> > > + * number of descriptors supported for a packet is
> XSK_DESC_MAX_FRAGS + 1. The
> > > + * max frags supported by skb is 16 for page sizes greater than 4K and 17
> or
> >
> > This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
> > directly?
> 
> Also it doesn't look right to expose kernel internal config in uapi
> especially since XSK_DESC_MAX_FRAGS is not guaranteed to be 16.

Ok, we have couple of options here:

Option 1:  We will define XSK_DESC_MAX_FRAGS to 17 now. This will ensure AF_XDP
 applications will work on any system without any change since the MAX_SKB_FRAGS
 is guaranteed to be at least 17.

Option 2: Instead of defining a new macro, we say max frags supported is same as
 MAX_SKB_FRAGS as configured in your system. So use 17 or less frags if you want 
 your app to work everywhere but you can go larger if you control the system.

Any suggestions ?

Also Alexei could you please clarify what you meant by ".. since XSK_DESC_MAX_FRAGS
 is not guaranteed to be 16." ?
Fijalkowski, Maciej May 24, 2023, 10:27 a.m. UTC | #4
On Wed, May 24, 2023 at 10:56:21AM +0200, Sarkar, Tirthendu wrote:
> > -----Original Message-----
> > From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> > Sent: Friday, May 19, 2023 10:44 PM
> > To: Stanislav Fomichev <sdf@google.com>
> > Cc: Fijalkowski, Maciej <maciej.fijalkowski@intel.com>; bpf
> > <bpf@vger.kernel.org>; Alexei Starovoitov <ast@kernel.org>; Daniel
> > Borkmann <daniel@iogearbox.net>; Andrii Nakryiko <andrii@kernel.org>;
> > Network Development <netdev@vger.kernel.org>; Karlsson, Magnus
> > <magnus.karlsson@intel.com>; Sarkar, Tirthendu
> > <tirthendu.sarkar@intel.com>; Björn Töpel <bjorn@kernel.org>
> > Subject: Re: [PATCH bpf-next 01/21] xsk: prepare 'options' in xdp_desc for
> > multi-buffer use
> > 
> > On Thu, May 18, 2023 at 12:22 PM Stanislav Fomichev <sdf@google.com>
> > wrote:
> > >
> > > On 05/18, Maciej Fijalkowski wrote:
> > > > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > >
> > > > Use the 'options' field in xdp_desc as a packet continuity marker. Since
> > > > 'options' field was unused till now and was expected to be set to 0, the
> > > > 'eop' descriptor will have it set to 0, while the non-eop descriptors
> > > > will have to set it to 1. This ensures legacy applications continue to
> > > > work without needing any change for single-buffer packets.
> > > >
> > > > Add helper functions and extend xskq_prod_reserve_desc() to use the
> > > > 'options' field.
> > > >
> > > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > ---
> > > >  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
> > > >  net/xdp/xsk.c               |  8 ++++----
> > > >  net/xdp/xsk_queue.h         | 12 +++++++++---
> > > >  3 files changed, 29 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> > > > index a78a8096f4ce..4acc3a9430f3 100644
> > > > --- a/include/uapi/linux/if_xdp.h
> > > > +++ b/include/uapi/linux/if_xdp.h
> > > > @@ -108,4 +108,20 @@ struct xdp_desc {
> > > >
> > > >  /* UMEM descriptor is __u64 */
> > > >
> > > > +/* Flag indicating that the packet continues with the buffer pointed out
> > by the
> > > > + * next frame in the ring. The end of the packet is signalled by setting
> > this
> > > > + * bit to zero. For single buffer packets, every descriptor has 'options'
> > set
> > > > + * to 0 and this maintains backward compatibility.
> > > > + */
> > > > +#define XDP_PKT_CONTD (1 << 0)
> > > > +
> > > > +/* Maximum number of descriptors supported as frags for a packet. So
> > the total
> > > > + * number of descriptors supported for a packet is
> > XSK_DESC_MAX_FRAGS + 1. The
> > > > + * max frags supported by skb is 16 for page sizes greater than 4K and 17
> > or
> > >
> > > This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
> > > directly?
> > 
> > Also it doesn't look right to expose kernel internal config in uapi
> > especially since XSK_DESC_MAX_FRAGS is not guaranteed to be 16.
> 
> Ok, we have couple of options here:
> 
> Option 1:  We will define XSK_DESC_MAX_FRAGS to 17 now. This will ensure AF_XDP
>  applications will work on any system without any change since the MAX_SKB_FRAGS
>  is guaranteed to be at least 17.
> 
> Option 2: Instead of defining a new macro, we say max frags supported is same as
>  MAX_SKB_FRAGS as configured in your system. So use 17 or less frags if you want 
>  your app to work everywhere but you can go larger if you control the system.
> 
> Any suggestions ?
> 
> Also Alexei could you please clarify what you meant by ".. since XSK_DESC_MAX_FRAGS
>  is not guaranteed to be 16." ?

Maybe it would be better to put this define onto patch 08 so people would
see how it is used and get a feeling of it? Although it has a description
nothing says about it in commit message.

FWIW i'm voting for option 2, but also Alexei's comment is a bit unclear
to me, would be nice to hear more about it.
Alexei Starovoitov May 24, 2023, 2:12 p.m. UTC | #5
On Wed, May 24, 2023 at 3:27 AM Maciej Fijalkowski
<maciej.fijalkowski@intel.com> wrote:
>
> On Wed, May 24, 2023 at 10:56:21AM +0200, Sarkar, Tirthendu wrote:
> > > -----Original Message-----
> > > From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> > > Sent: Friday, May 19, 2023 10:44 PM
> > > To: Stanislav Fomichev <sdf@google.com>
> > > Cc: Fijalkowski, Maciej <maciej.fijalkowski@intel.com>; bpf
> > > <bpf@vger.kernel.org>; Alexei Starovoitov <ast@kernel.org>; Daniel
> > > Borkmann <daniel@iogearbox.net>; Andrii Nakryiko <andrii@kernel.org>;
> > > Network Development <netdev@vger.kernel.org>; Karlsson, Magnus
> > > <magnus.karlsson@intel.com>; Sarkar, Tirthendu
> > > <tirthendu.sarkar@intel.com>; Björn Töpel <bjorn@kernel.org>
> > > Subject: Re: [PATCH bpf-next 01/21] xsk: prepare 'options' in xdp_desc for
> > > multi-buffer use
> > >
> > > On Thu, May 18, 2023 at 12:22 PM Stanislav Fomichev <sdf@google.com>
> > > wrote:
> > > >
> > > > On 05/18, Maciej Fijalkowski wrote:
> > > > > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > >
> > > > > Use the 'options' field in xdp_desc as a packet continuity marker. Since
> > > > > 'options' field was unused till now and was expected to be set to 0, the
> > > > > 'eop' descriptor will have it set to 0, while the non-eop descriptors
> > > > > will have to set it to 1. This ensures legacy applications continue to
> > > > > work without needing any change for single-buffer packets.
> > > > >
> > > > > Add helper functions and extend xskq_prod_reserve_desc() to use the
> > > > > 'options' field.
> > > > >
> > > > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > > ---
> > > > >  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
> > > > >  net/xdp/xsk.c               |  8 ++++----
> > > > >  net/xdp/xsk_queue.h         | 12 +++++++++---
> > > > >  3 files changed, 29 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> > > > > index a78a8096f4ce..4acc3a9430f3 100644
> > > > > --- a/include/uapi/linux/if_xdp.h
> > > > > +++ b/include/uapi/linux/if_xdp.h
> > > > > @@ -108,4 +108,20 @@ struct xdp_desc {
> > > > >
> > > > >  /* UMEM descriptor is __u64 */
> > > > >
> > > > > +/* Flag indicating that the packet continues with the buffer pointed out
> > > by the
> > > > > + * next frame in the ring. The end of the packet is signalled by setting
> > > this
> > > > > + * bit to zero. For single buffer packets, every descriptor has 'options'
> > > set
> > > > > + * to 0 and this maintains backward compatibility.
> > > > > + */
> > > > > +#define XDP_PKT_CONTD (1 << 0)
> > > > > +
> > > > > +/* Maximum number of descriptors supported as frags for a packet. So
> > > the total
> > > > > + * number of descriptors supported for a packet is
> > > XSK_DESC_MAX_FRAGS + 1. The
> > > > > + * max frags supported by skb is 16 for page sizes greater than 4K and 17
> > > or
> > > >
> > > > This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
> > > > directly?
> > >
> > > Also it doesn't look right to expose kernel internal config in uapi
> > > especially since XSK_DESC_MAX_FRAGS is not guaranteed to be 16.
> >
> > Ok, we have couple of options here:
> >
> > Option 1:  We will define XSK_DESC_MAX_FRAGS to 17 now. This will ensure AF_XDP
> >  applications will work on any system without any change since the MAX_SKB_FRAGS
> >  is guaranteed to be at least 17.
> >
> > Option 2: Instead of defining a new macro, we say max frags supported is same as
> >  MAX_SKB_FRAGS as configured in your system. So use 17 or less frags if you want
> >  your app to work everywhere but you can go larger if you control the system.
> >
> > Any suggestions ?
> >
> > Also Alexei could you please clarify what you meant by ".. since XSK_DESC_MAX_FRAGS
> >  is not guaranteed to be 16." ?
>
> Maybe it would be better to put this define onto patch 08 so people would
> see how it is used and get a feeling of it? Although it has a description
> nothing says about it in commit message.
>
> FWIW i'm voting for option 2, but also Alexei's comment is a bit unclear
> to me, would be nice to hear more about it.

Meaning that uapi can only have fixed constants.
We cannot put *_MAX_FRAGS there, since it's config dependent.
Stanislav Fomichev May 24, 2023, 4:20 p.m. UTC | #6
On Wed, May 24, 2023 at 7:12 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Wed, May 24, 2023 at 3:27 AM Maciej Fijalkowski
> <maciej.fijalkowski@intel.com> wrote:
> >
> > On Wed, May 24, 2023 at 10:56:21AM +0200, Sarkar, Tirthendu wrote:
> > > > -----Original Message-----
> > > > From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> > > > Sent: Friday, May 19, 2023 10:44 PM
> > > > To: Stanislav Fomichev <sdf@google.com>
> > > > Cc: Fijalkowski, Maciej <maciej.fijalkowski@intel.com>; bpf
> > > > <bpf@vger.kernel.org>; Alexei Starovoitov <ast@kernel.org>; Daniel
> > > > Borkmann <daniel@iogearbox.net>; Andrii Nakryiko <andrii@kernel.org>;
> > > > Network Development <netdev@vger.kernel.org>; Karlsson, Magnus
> > > > <magnus.karlsson@intel.com>; Sarkar, Tirthendu
> > > > <tirthendu.sarkar@intel.com>; Björn Töpel <bjorn@kernel.org>
> > > > Subject: Re: [PATCH bpf-next 01/21] xsk: prepare 'options' in xdp_desc for
> > > > multi-buffer use
> > > >
> > > > On Thu, May 18, 2023 at 12:22 PM Stanislav Fomichev <sdf@google.com>
> > > > wrote:
> > > > >
> > > > > On 05/18, Maciej Fijalkowski wrote:
> > > > > > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > > >
> > > > > > Use the 'options' field in xdp_desc as a packet continuity marker. Since
> > > > > > 'options' field was unused till now and was expected to be set to 0, the
> > > > > > 'eop' descriptor will have it set to 0, while the non-eop descriptors
> > > > > > will have to set it to 1. This ensures legacy applications continue to
> > > > > > work without needing any change for single-buffer packets.
> > > > > >
> > > > > > Add helper functions and extend xskq_prod_reserve_desc() to use the
> > > > > > 'options' field.
> > > > > >
> > > > > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > > > ---
> > > > > >  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
> > > > > >  net/xdp/xsk.c               |  8 ++++----
> > > > > >  net/xdp/xsk_queue.h         | 12 +++++++++---
> > > > > >  3 files changed, 29 insertions(+), 7 deletions(-)
> > > > > >
> > > > > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> > > > > > index a78a8096f4ce..4acc3a9430f3 100644
> > > > > > --- a/include/uapi/linux/if_xdp.h
> > > > > > +++ b/include/uapi/linux/if_xdp.h
> > > > > > @@ -108,4 +108,20 @@ struct xdp_desc {
> > > > > >
> > > > > >  /* UMEM descriptor is __u64 */
> > > > > >
> > > > > > +/* Flag indicating that the packet continues with the buffer pointed out
> > > > by the
> > > > > > + * next frame in the ring. The end of the packet is signalled by setting
> > > > this
> > > > > > + * bit to zero. For single buffer packets, every descriptor has 'options'
> > > > set
> > > > > > + * to 0 and this maintains backward compatibility.
> > > > > > + */
> > > > > > +#define XDP_PKT_CONTD (1 << 0)
> > > > > > +
> > > > > > +/* Maximum number of descriptors supported as frags for a packet. So
> > > > the total
> > > > > > + * number of descriptors supported for a packet is
> > > > XSK_DESC_MAX_FRAGS + 1. The
> > > > > > + * max frags supported by skb is 16 for page sizes greater than 4K and 17
> > > > or
> > > > >
> > > > > This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
> > > > > directly?
> > > >
> > > > Also it doesn't look right to expose kernel internal config in uapi
> > > > especially since XSK_DESC_MAX_FRAGS is not guaranteed to be 16.
> > >
> > > Ok, we have couple of options here:
> > >
> > > Option 1:  We will define XSK_DESC_MAX_FRAGS to 17 now. This will ensure AF_XDP
> > >  applications will work on any system without any change since the MAX_SKB_FRAGS
> > >  is guaranteed to be at least 17.
> > >
> > > Option 2: Instead of defining a new macro, we say max frags supported is same as
> > >  MAX_SKB_FRAGS as configured in your system. So use 17 or less frags if you want
> > >  your app to work everywhere but you can go larger if you control the system.
> > >
> > > Any suggestions ?
> > >
> > > Also Alexei could you please clarify what you meant by ".. since XSK_DESC_MAX_FRAGS
> > >  is not guaranteed to be 16." ?
> >
> > Maybe it would be better to put this define onto patch 08 so people would
> > see how it is used and get a feeling of it? Although it has a description
> > nothing says about it in commit message.
> >
> > FWIW i'm voting for option 2, but also Alexei's comment is a bit unclear
> > to me, would be nice to hear more about it.
>
> Meaning that uapi can only have fixed constants.
> We cannot put *_MAX_FRAGS there, since it's config dependent.

Same here, would prefer option 2. And don't put it in the uapi. That's
something the users can try to probe maybe?
Fijalkowski, Maciej May 24, 2023, 4:27 p.m. UTC | #7
On Wed, May 24, 2023 at 09:20:05AM -0700, Stanislav Fomichev wrote:
> On Wed, May 24, 2023 at 7:12 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Wed, May 24, 2023 at 3:27 AM Maciej Fijalkowski
> > <maciej.fijalkowski@intel.com> wrote:
> > >
> > > On Wed, May 24, 2023 at 10:56:21AM +0200, Sarkar, Tirthendu wrote:
> > > > > -----Original Message-----
> > > > > From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
> > > > > Sent: Friday, May 19, 2023 10:44 PM
> > > > > To: Stanislav Fomichev <sdf@google.com>
> > > > > Cc: Fijalkowski, Maciej <maciej.fijalkowski@intel.com>; bpf
> > > > > <bpf@vger.kernel.org>; Alexei Starovoitov <ast@kernel.org>; Daniel
> > > > > Borkmann <daniel@iogearbox.net>; Andrii Nakryiko <andrii@kernel.org>;
> > > > > Network Development <netdev@vger.kernel.org>; Karlsson, Magnus
> > > > > <magnus.karlsson@intel.com>; Sarkar, Tirthendu
> > > > > <tirthendu.sarkar@intel.com>; Björn Töpel <bjorn@kernel.org>
> > > > > Subject: Re: [PATCH bpf-next 01/21] xsk: prepare 'options' in xdp_desc for
> > > > > multi-buffer use
> > > > >
> > > > > On Thu, May 18, 2023 at 12:22 PM Stanislav Fomichev <sdf@google.com>
> > > > > wrote:
> > > > > >
> > > > > > On 05/18, Maciej Fijalkowski wrote:
> > > > > > > From: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > > > >
> > > > > > > Use the 'options' field in xdp_desc as a packet continuity marker. Since
> > > > > > > 'options' field was unused till now and was expected to be set to 0, the
> > > > > > > 'eop' descriptor will have it set to 0, while the non-eop descriptors
> > > > > > > will have to set it to 1. This ensures legacy applications continue to
> > > > > > > work without needing any change for single-buffer packets.
> > > > > > >
> > > > > > > Add helper functions and extend xskq_prod_reserve_desc() to use the
> > > > > > > 'options' field.
> > > > > > >
> > > > > > > Signed-off-by: Tirthendu Sarkar <tirthendu.sarkar@intel.com>
> > > > > > > ---
> > > > > > >  include/uapi/linux/if_xdp.h | 16 ++++++++++++++++
> > > > > > >  net/xdp/xsk.c               |  8 ++++----
> > > > > > >  net/xdp/xsk_queue.h         | 12 +++++++++---
> > > > > > >  3 files changed, 29 insertions(+), 7 deletions(-)
> > > > > > >
> > > > > > > diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
> > > > > > > index a78a8096f4ce..4acc3a9430f3 100644
> > > > > > > --- a/include/uapi/linux/if_xdp.h
> > > > > > > +++ b/include/uapi/linux/if_xdp.h
> > > > > > > @@ -108,4 +108,20 @@ struct xdp_desc {
> > > > > > >
> > > > > > >  /* UMEM descriptor is __u64 */
> > > > > > >
> > > > > > > +/* Flag indicating that the packet continues with the buffer pointed out
> > > > > by the
> > > > > > > + * next frame in the ring. The end of the packet is signalled by setting
> > > > > this
> > > > > > > + * bit to zero. For single buffer packets, every descriptor has 'options'
> > > > > set
> > > > > > > + * to 0 and this maintains backward compatibility.
> > > > > > > + */
> > > > > > > +#define XDP_PKT_CONTD (1 << 0)
> > > > > > > +
> > > > > > > +/* Maximum number of descriptors supported as frags for a packet. So
> > > > > the total
> > > > > > > + * number of descriptors supported for a packet is
> > > > > XSK_DESC_MAX_FRAGS + 1. The
> > > > > > > + * max frags supported by skb is 16 for page sizes greater than 4K and 17
> > > > > or
> > > > > >
> > > > > > This is now a config option CONFIG_MAX_SKB_FRAGS. Can we use it
> > > > > > directly?
> > > > >
> > > > > Also it doesn't look right to expose kernel internal config in uapi
> > > > > especially since XSK_DESC_MAX_FRAGS is not guaranteed to be 16.
> > > >
> > > > Ok, we have couple of options here:
> > > >
> > > > Option 1:  We will define XSK_DESC_MAX_FRAGS to 17 now. This will ensure AF_XDP
> > > >  applications will work on any system without any change since the MAX_SKB_FRAGS
> > > >  is guaranteed to be at least 17.
> > > >
> > > > Option 2: Instead of defining a new macro, we say max frags supported is same as
> > > >  MAX_SKB_FRAGS as configured in your system. So use 17 or less frags if you want
> > > >  your app to work everywhere but you can go larger if you control the system.
> > > >
> > > > Any suggestions ?
> > > >
> > > > Also Alexei could you please clarify what you meant by ".. since XSK_DESC_MAX_FRAGS
> > > >  is not guaranteed to be 16." ?
> > >
> > > Maybe it would be better to put this define onto patch 08 so people would
> > > see how it is used and get a feeling of it? Although it has a description
> > > nothing says about it in commit message.
> > >
> > > FWIW i'm voting for option 2, but also Alexei's comment is a bit unclear
> > > to me, would be nice to hear more about it.
> >
> > Meaning that uapi can only have fixed constants.
> > We cannot put *_MAX_FRAGS there, since it's config dependent.

Got it.

> 
> Same here, would prefer option 2. And don't put it in the uapi. That's
> something the users can try to probe maybe?

Yeah now I see no reason to put this in uapi. You can probe
/proc/sys/net/core/max_skb_frags from userspace.

>
diff mbox series

Patch

diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h
index a78a8096f4ce..4acc3a9430f3 100644
--- a/include/uapi/linux/if_xdp.h
+++ b/include/uapi/linux/if_xdp.h
@@ -108,4 +108,20 @@  struct xdp_desc {
 
 /* UMEM descriptor is __u64 */
 
+/* Flag indicating that the packet continues with the buffer pointed out by the
+ * next frame in the ring. The end of the packet is signalled by setting this
+ * bit to zero. For single buffer packets, every descriptor has 'options' set
+ * to 0 and this maintains backward compatibility.
+ */
+#define XDP_PKT_CONTD (1 << 0)
+
+/* Maximum number of descriptors supported as frags for a packet. So the total
+ * number of descriptors supported for a packet is XSK_DESC_MAX_FRAGS + 1. The
+ * max frags supported by skb is 16 for page sizes greater than 4K and 17 or
+ * more for 4K or lesser page sizes. XSK_DESC_MAX_FRAGS is set as the minimum
+ * value of 16 so that xsk applications see the same behavior for all
+ * architectures.
+ */
+#define XSK_DESC_MAX_FRAGS 16
+
 #endif /* _LINUX_IF_XDP_H */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index cc1e7f15fa73..99f90a0d04ae 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -135,14 +135,14 @@  int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool,
 	return 0;
 }
 
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, u32 flags)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
 	u64 addr;
 	int err;
 
 	addr = xp_get_handle(xskb);
-	err = xskq_prod_reserve_desc(xs->rx, addr, len);
+	err = xskq_prod_reserve_desc(xs->rx, addr, len, flags);
 	if (err) {
 		xs->rx_queue_full++;
 		return err;
@@ -189,7 +189,7 @@  static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	}
 
 	xsk_copy_xdp(xsk_xdp, xdp, len);
-	err = __xsk_rcv_zc(xs, xsk_xdp, len);
+	err = __xsk_rcv_zc(xs, xsk_xdp, len, 0);
 	if (err) {
 		xsk_buff_free(xsk_xdp);
 		return err;
@@ -259,7 +259,7 @@  static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
 		len = xdp->data_end - xdp->data;
-		return __xsk_rcv_zc(xs, xdp, len);
+		return __xsk_rcv_zc(xs, xdp, len, 0);
 	}
 
 	err = __xsk_rcv(xs, xdp);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 6d40a77fccbe..ad81b19e6fdf 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -130,6 +130,11 @@  static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
 	return false;
 }
 
+static inline bool xp_unused_options_set(u16 options)
+{
+	return options & ~XDP_PKT_CONTD;
+}
+
 static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 					    struct xdp_desc *desc)
 {
@@ -141,7 +146,7 @@  static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
 	if (desc->addr >= pool->addrs_cnt)
 		return false;
 
-	if (desc->options)
+	if (xp_unused_options_set(desc->options))
 		return false;
 	return true;
 }
@@ -158,7 +163,7 @@  static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
 	    xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
 		return false;
 
-	if (desc->options)
+	if (xp_unused_options_set(desc->options))
 		return false;
 	return true;
 }
@@ -360,7 +365,7 @@  static inline void xskq_prod_write_addr_batch(struct xsk_queue *q, struct xdp_de
 }
 
 static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
-					 u64 addr, u32 len)
+					 u64 addr, u32 len, u32 flags)
 {
 	struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
 	u32 idx;
@@ -372,6 +377,7 @@  static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
 	idx = q->cached_prod++ & q->ring_mask;
 	ring->desc[idx].addr = addr;
 	ring->desc[idx].len = len;
+	ring->desc[idx].options = flags;
 
 	return 0;
 }