diff mbox series

[net] net/mlx5e: xsk: Discard unaligned XSK frames on striding RQ

Message ID 20220729121356.3990867-1-maximmi@nvidia.com (mailing list archive)
State Accepted
Commit 8eaa1d110800fac050bab44001732747a1c39894
Delegated to: Netdev Maintainers
Headers show
Series [net] net/mlx5e: xsk: Discard unaligned XSK frames on striding RQ | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/cc_maintainers warning 3 maintainers not CCed: leon@kernel.org bpf@vger.kernel.org linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 3 this patch: 3
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 55 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Maxim Mikityanskiy July 29, 2022, 12:13 p.m. UTC
Striding RQ uses MTT page mapping, where each page corresponds to an XSK
frame. MTT pages have alignment requirements, and XSK frames don't have
any alignment guarantees in the unaligned mode. Frames with improper
alignment must be discarded, otherwise the packet data will be written
at a wrong address.

Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    | 14 ++++++++++++++
 include/net/xdp_sock_drv.h                         | 11 +++++++++++
 2 files changed, 25 insertions(+)

Comments

Maxim Mikityanskiy Aug. 1, 2022, 8:08 a.m. UTC | #1
Any comments on this patch, or can it be merged?

Saeed reviewed the mlx5 part.

Björn, Magnus, Maciej, anything to say about the XSK drv part?

On Fri, 2022-07-29 at 15:13 +0300, Maxim Mikityanskiy wrote:
> Striding RQ uses MTT page mapping, where each page corresponds to an XSK
> frame. MTT pages have alignment requirements, and XSK frames don't have
> any alignment guarantees in the unaligned mode. Frames with improper
> alignment must be discarded, otherwise the packet data will be written
> at a wrong address.
> 
> Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
> Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    | 14 ++++++++++++++
>  include/net/xdp_sock_drv.h                         | 11 +++++++++++
>  2 files changed, 25 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> index a8cfab4a393c..cc18d97d8ee0 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> @@ -7,6 +7,8 @@
>  #include "en.h"
>  #include <net/xdp_sock_drv.h>
>  
> +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL
> +
>  /* RX data path */
>  
>  struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
> @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
>  static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
>  					    struct mlx5e_dma_info *dma_info)
>  {
> +retry:
>  	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
>  	if (!dma_info->xsk)
>  		return -ENOMEM;
> @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
>  	 */
>  	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
>  
> +	/* MTT page mapping has alignment requirements. If they are not
> +	 * satisfied, leak the descriptor so that it won't come again, and try
> +	 * to allocate a new one.
> +	 */
> +	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> +		if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
> +			xsk_buff_discard(dma_info->xsk);
> +			goto retry;
> +		}
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> index 4aa031849668..0774ce97c2f1 100644
> --- a/include/net/xdp_sock_drv.h
> +++ b/include/net/xdp_sock_drv.h
> @@ -95,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
>  	xp_free(xskb);
>  }
>  
> +static inline void xsk_buff_discard(struct xdp_buff *xdp)
> +{
> +	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
> +
> +	xp_release(xskb);
> +}
> +
>  static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
>  {
>  	xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
> @@ -238,6 +245,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
>  {
>  }
>  
> +static inline void xsk_buff_discard(struct xdp_buff *xdp)
> +{
> +}
> +
>  static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
>  {
>  }
Fijalkowski, Maciej Aug. 1, 2022, 1:41 p.m. UTC | #2
On Fri, Jul 29, 2022 at 03:13:56PM +0300, Maxim Mikityanskiy wrote:
> Striding RQ uses MTT page mapping, where each page corresponds to an XSK
> frame. MTT pages have alignment requirements, and XSK frames don't have
> any alignment guarantees in the unaligned mode. Frames with improper
> alignment must be discarded, otherwise the packet data will be written
> at a wrong address.

Hey Maxim,
can you explain what MTT stands for?

> 
> Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
> Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    | 14 ++++++++++++++
>  include/net/xdp_sock_drv.h                         | 11 +++++++++++
>  2 files changed, 25 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> index a8cfab4a393c..cc18d97d8ee0 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> @@ -7,6 +7,8 @@
>  #include "en.h"
>  #include <net/xdp_sock_drv.h>
>  
> +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL

What if PAGE_SIZE != 4096 ? Is aligned mode with 2k frame fine for MTT
case?

> +
>  /* RX data path */
>  
>  struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
> @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
>  static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
>  					    struct mlx5e_dma_info *dma_info)
>  {
> +retry:
>  	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
>  	if (!dma_info->xsk)
>  		return -ENOMEM;
> @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
>  	 */
>  	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
>  
> +	/* MTT page mapping has alignment requirements. If they are not
> +	 * satisfied, leak the descriptor so that it won't come again, and try
> +	 * to allocate a new one.
> +	 */
> +	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> +		if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
> +			xsk_buff_discard(dma_info->xsk);
> +			goto retry;
> +		}
> +	}

I don't know your hardware much, but how would this work out performance
wise? Are there any config combos (page size vs chunk size in unaligned
mode) that you would forbid during pool attach to queue or would you
better allow anything?

Also would be helpful if you would describe the use case you're fixing.

Thanks!

> +
>  	return 0;
>  }
>  
> diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> index 4aa031849668..0774ce97c2f1 100644
> --- a/include/net/xdp_sock_drv.h
> +++ b/include/net/xdp_sock_drv.h
> @@ -95,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
>  	xp_free(xskb);
>  }
>  
> +static inline void xsk_buff_discard(struct xdp_buff *xdp)
> +{
> +	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
> +
> +	xp_release(xskb);
> +}
> +
>  static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
>  {
>  	xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
> @@ -238,6 +245,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
>  {
>  }
>  
> +static inline void xsk_buff_discard(struct xdp_buff *xdp)
> +{
> +}
> +
>  static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
>  {
>  }
> -- 
> 2.25.1
>
Maxim Mikityanskiy Aug. 1, 2022, 3:49 p.m. UTC | #3
First of all, this patch is a temporary kludge. I found a bug in the
current implementation of the unaligned mode: frames not aligned at
least to 8 are misplaced. There is a proper fix in the driver, but it
will be pushed to net-next, because it's huge. In the meanwhile, this
workaround that drops packets not aligned to 8 will go to stable
kernels.

On Mon, 2022-08-01 at 15:41 +0200, Maciej Fijalkowski wrote:
> On Fri, Jul 29, 2022 at 03:13:56PM +0300, Maxim Mikityanskiy wrote:
> > Striding RQ uses MTT page mapping, where each page corresponds to an XSK
> > frame. MTT pages have alignment requirements, and XSK frames don't have
> > any alignment guarantees in the unaligned mode. Frames with improper
> > alignment must be discarded, otherwise the packet data will be written
> > at a wrong address.
> 
> Hey Maxim,
> can you explain what MTT stands for?

MTT is Memory Translation Table, it's a mechanism for virtual mapping
in the NIC. It's essentially a table of pages, where each virtual page
maps to a physical page.

> 
> > 
> > Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
> > Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> > Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> > Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
> > ---
> >  .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    | 14 ++++++++++++++
> >  include/net/xdp_sock_drv.h                         | 11 +++++++++++
> >  2 files changed, 25 insertions(+)
> > 
> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > index a8cfab4a393c..cc18d97d8ee0 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > @@ -7,6 +7,8 @@
> >  #include "en.h"
> >  #include <net/xdp_sock_drv.h>
> >  
> > +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL
> 
> What if PAGE_SIZE != 4096 ? Is aligned mode with 2k frame fine for MTT
> case?

PAGE_SIZE doesn't affect this value. Aligned mode doesn't suffer from
this bug, because 2k or bigger frames are all aligned to 8.

> 
> > +
> >  /* RX data path */
> >  
> >  struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
> > @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
> >  static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
> >  					    struct mlx5e_dma_info *dma_info)
> >  {
> > +retry:
> >  	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
> >  	if (!dma_info->xsk)
> >  		return -ENOMEM;
> > @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
> >  	 */
> >  	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
> >  
> > +	/* MTT page mapping has alignment requirements. If they are not
> > +	 * satisfied, leak the descriptor so that it won't come again, and try
> > +	 * to allocate a new one.
> > +	 */
> > +	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> > +		if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
> > +			xsk_buff_discard(dma_info->xsk);
> > +			goto retry;
> > +		}
> > +	}
> 
> I don't know your hardware much, but how would this work out performance
> wise? Are there any config combos (page size vs chunk size in unaligned
> mode) that you would forbid during pool attach to queue or would you
> better allow anything?

This issue isn't related to page or frame sizes, but rather to frame
locations. As far as I understand, frames can be located at any places
in the unaligned mode (even at odd addresses), regardless of their
size. Frames whose addr % 8 != 0 don't really work with MTT, but it's
not something that can be enforced on attach. Enforcing it in xp_alloc
won't be any faster either (well, only a tiny bit, because of one fewer
function call).

In any case, next kernels will get another page mapping mechanism,
which supports arbitrary addresses, and it's almost as fast as MTT, as
the preliminary testing shows. It will be used for the unaligned XSK,
this kludge will be removed altogether, and I also plan to remove
xsk_buff_discard.

> Also would be helpful if you would describe the use case you're fixing.

Sure - described in the beginning of the email.

> 
> Thanks!
> 
> > +
> >  	return 0;
> >  }
> >  
> > diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
> > index 4aa031849668..0774ce97c2f1 100644
> > --- a/include/net/xdp_sock_drv.h
> > +++ b/include/net/xdp_sock_drv.h
> > @@ -95,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
> >  	xp_free(xskb);
> >  }
> >  
> > +static inline void xsk_buff_discard(struct xdp_buff *xdp)
> > +{
> > +	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
> > +
> > +	xp_release(xskb);
> > +}
> > +
> >  static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
> >  {
> >  	xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
> > @@ -238,6 +245,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
> >  {
> >  }
> >  
> > +static inline void xsk_buff_discard(struct xdp_buff *xdp)
> > +{
> > +}
> > +
> >  static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
> >  {
> >  }
> > -- 
> > 2.25.1
> >
Paolo Abeni Aug. 2, 2022, 10:54 a.m. UTC | #4
On Mon, 2022-08-01 at 15:49 +0000, Maxim Mikityanskiy wrote:
> First of all, this patch is a temporary kludge. I found a bug in the
> current implementation of the unaligned mode: frames not aligned at
> least to 8 are misplaced. There is a proper fix in the driver, but it
> will be pushed to net-next, because it's huge. In the meanwhile, this
> workaround that drops packets not aligned to 8 will go to stable
> kernels.
> 
> On Mon, 2022-08-01 at 15:41 +0200, Maciej Fijalkowski wrote:
> > On Fri, Jul 29, 2022 at 03:13:56PM +0300, Maxim Mikityanskiy wrote:
> > > Striding RQ uses MTT page mapping, where each page corresponds to an XSK
> > > frame. MTT pages have alignment requirements, and XSK frames don't have
> > > any alignment guarantees in the unaligned mode. Frames with improper
> > > alignment must be discarded, otherwise the packet data will be written
> > > at a wrong address.
> > 
> > Hey Maxim,
> > can you explain what MTT stands for?
> 
> MTT is Memory Translation Table, it's a mechanism for virtual mapping
> in the NIC. It's essentially a table of pages, where each virtual page
> maps to a physical page.
> 
> > 
> > > 
> > > Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
> > > Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> > > Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> > > Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
> > > ---
> > >  .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    | 14 ++++++++++++++
> > >  include/net/xdp_sock_drv.h                         | 11 +++++++++++
> > >  2 files changed, 25 insertions(+)
> > > 
> > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > > index a8cfab4a393c..cc18d97d8ee0 100644
> > > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > > @@ -7,6 +7,8 @@
> > >  #include "en.h"
> > >  #include <net/xdp_sock_drv.h>
> > >  
> > > +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL
> > 
> > What if PAGE_SIZE != 4096 ? Is aligned mode with 2k frame fine for MTT
> > case?
> 
> PAGE_SIZE doesn't affect this value. Aligned mode doesn't suffer from
> this bug, because 2k or bigger frames are all aligned to 8.
> 
> > 
> > > +
> > >  /* RX data path */
> > >  
> > >  struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
> > > @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
> > >  static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
> > >  					    struct mlx5e_dma_info *dma_info)
> > >  {
> > > +retry:
> > >  	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
> > >  	if (!dma_info->xsk)
> > >  		return -ENOMEM;
> > > @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
> > >  	 */
> > >  	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
> > >  
> > > +	/* MTT page mapping has alignment requirements. If they are not
> > > +	 * satisfied, leak the descriptor so that it won't come again, and try
> > > +	 * to allocate a new one.
> > > +	 */
> > > +	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> > > +		if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
> > > +			xsk_buff_discard(dma_info->xsk);
> > > +			goto retry;
> > > +		}
> > > +	}
> > 
> > I don't know your hardware much, but how would this work out performance
> > wise? Are there any config combos (page size vs chunk size in unaligned
> > mode) that you would forbid during pool attach to queue or would you
> > better allow anything?
> 
> This issue isn't related to page or frame sizes, but rather to frame
> locations. As far as I understand, frames can be located at any places
> in the unaligned mode (even at odd addresses), regardless of their
> size. Frames whose addr % 8 != 0 don't really work with MTT, but it's
> not something that can be enforced on attach. Enforcing it in xp_alloc
> won't be any faster either (well, only a tiny bit, because of one fewer
> function call).
> 
> In any case, next kernels will get another page mapping mechanism,
> which supports arbitrary addresses, and it's almost as fast as MTT, as
> the preliminary testing shows. It will be used for the unaligned XSK,
> this kludge will be removed altogether, and I also plan to remove
> xsk_buff_discard.
> 
> > Also would be helpful if you would describe the use case you're fixing.
> 
> Sure - described in the beginning of the email.

@Maciej: are you satisfied by Maxim's answers?

/P
Fijalkowski, Maciej Aug. 2, 2022, 12:54 p.m. UTC | #5
On Tue, Aug 02, 2022 at 12:54:15PM +0200, Paolo Abeni wrote:
> On Mon, 2022-08-01 at 15:49 +0000, Maxim Mikityanskiy wrote:
> > First of all, this patch is a temporary kludge. I found a bug in the
> > current implementation of the unaligned mode: frames not aligned at
> > least to 8 are misplaced. There is a proper fix in the driver, but it
> > will be pushed to net-next, because it's huge. In the meanwhile, this
> > workaround that drops packets not aligned to 8 will go to stable
> > kernels.
> > 
> > On Mon, 2022-08-01 at 15:41 +0200, Maciej Fijalkowski wrote:
> > > On Fri, Jul 29, 2022 at 03:13:56PM +0300, Maxim Mikityanskiy wrote:
> > > > Striding RQ uses MTT page mapping, where each page corresponds to an XSK
> > > > frame. MTT pages have alignment requirements, and XSK frames don't have
> > > > any alignment guarantees in the unaligned mode. Frames with improper
> > > > alignment must be discarded, otherwise the packet data will be written
> > > > at a wrong address.
> > > 
> > > Hey Maxim,
> > > can you explain what MTT stands for?
> > 
> > MTT is Memory Translation Table, it's a mechanism for virtual mapping
> > in the NIC. It's essentially a table of pages, where each virtual page
> > maps to a physical page.
> > 
> > > 
> > > > 
> > > > Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
> > > > Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> > > > Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> > > > Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
> > > > ---
> > > >  .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    | 14 ++++++++++++++
> > > >  include/net/xdp_sock_drv.h                         | 11 +++++++++++
> > > >  2 files changed, 25 insertions(+)
> > > > 
> > > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > > > index a8cfab4a393c..cc18d97d8ee0 100644
> > > > --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > > > +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
> > > > @@ -7,6 +7,8 @@
> > > >  #include "en.h"
> > > >  #include <net/xdp_sock_drv.h>
> > > >  
> > > > +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL
> > > 
> > > What if PAGE_SIZE != 4096 ? Is aligned mode with 2k frame fine for MTT
> > > case?
> > 
> > PAGE_SIZE doesn't affect this value. Aligned mode doesn't suffer from
> > this bug, because 2k or bigger frames are all aligned to 8.
> > 
> > > 
> > > > +
> > > >  /* RX data path */
> > > >  
> > > >  struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
> > > > @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
> > > >  static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
> > > >  					    struct mlx5e_dma_info *dma_info)
> > > >  {
> > > > +retry:
> > > >  	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
> > > >  	if (!dma_info->xsk)
> > > >  		return -ENOMEM;
> > > > @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
> > > >  	 */
> > > >  	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
> > > >  
> > > > +	/* MTT page mapping has alignment requirements. If they are not
> > > > +	 * satisfied, leak the descriptor so that it won't come again, and try
> > > > +	 * to allocate a new one.
> > > > +	 */
> > > > +	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
> > > > +		if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
> > > > +			xsk_buff_discard(dma_info->xsk);
> > > > +			goto retry;
> > > > +		}
> > > > +	}
> > > 
> > > I don't know your hardware much, but how would this work out performance
> > > wise? Are there any config combos (page size vs chunk size in unaligned
> > > mode) that you would forbid during pool attach to queue or would you
> > > better allow anything?
> > 
> > This issue isn't related to page or frame sizes, but rather to frame
> > locations. As far as I understand, frames can be located at any places
> > in the unaligned mode (even at odd addresses), regardless of their
> > size. Frames whose addr % 8 != 0 don't really work with MTT, but it's
> > not something that can be enforced on attach. Enforcing it in xp_alloc
> > won't be any faster either (well, only a tiny bit, because of one fewer
> > function call).
> > 
> > In any case, next kernels will get another page mapping mechanism,
> > which supports arbitrary addresses, and it's almost as fast as MTT, as
> > the preliminary testing shows. It will be used for the unaligned XSK,
> > this kludge will be removed altogether, and I also plan to remove
> > xsk_buff_discard.

Ok makes sense. I only misread the mask though, so maybe use
GENMASK_ULL(63, 3) ? Also, saying explicitly about MTT's requirement
issues (8) in the commit message probably wouldn't make me to misread it
:p

Besides:
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>

> > 
> > > Also would be helpful if you would describe the use case you're fixing.
> > 
> > Sure - described in the beginning of the email.
> 
> @Maciej: are you satisfied by Maxim's answers?

Yep!

> 
> /P
>
patchwork-bot+netdevbpf@kernel.org Aug. 2, 2022, 1:40 p.m. UTC | #6
Hello:

This patch was applied to netdev/net.git (master)
by Paolo Abeni <pabeni@redhat.com>:

On Fri, 29 Jul 2022 15:13:56 +0300 you wrote:
> Striding RQ uses MTT page mapping, where each page corresponds to an XSK
> frame. MTT pages have alignment requirements, and XSK frames don't have
> any alignment guarantees in the unaligned mode. Frames with improper
> alignment must be discarded, otherwise the packet data will be written
> at a wrong address.
> 
> Fixes: 282c0c798f8e ("net/mlx5e: Allow XSK frames smaller than a page")
> Signed-off-by: Maxim Mikityanskiy <maximmi@nvidia.com>
> Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
> Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
> 
> [...]

Here is the summary with links:
  - [net] net/mlx5e: xsk: Discard unaligned XSK frames on striding RQ
    https://git.kernel.org/netdev/net/c/8eaa1d110800

You are awesome, thank you!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index a8cfab4a393c..cc18d97d8ee0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -7,6 +7,8 @@ 
 #include "en.h"
 #include <net/xdp_sock_drv.h>
 
+#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL
+
 /* RX data path */
 
 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
@@ -21,6 +23,7 @@  struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
 					    struct mlx5e_dma_info *dma_info)
 {
+retry:
 	dma_info->xsk = xsk_buff_alloc(rq->xsk_pool);
 	if (!dma_info->xsk)
 		return -ENOMEM;
@@ -32,6 +35,17 @@  static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq,
 	 */
 	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
 
+	/* MTT page mapping has alignment requirements. If they are not
+	 * satisfied, leak the descriptor so that it won't come again, and try
+	 * to allocate a new one.
+	 */
+	if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+		if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) {
+			xsk_buff_discard(dma_info->xsk);
+			goto retry;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 4aa031849668..0774ce97c2f1 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -95,6 +95,13 @@  static inline void xsk_buff_free(struct xdp_buff *xdp)
 	xp_free(xskb);
 }
 
+static inline void xsk_buff_discard(struct xdp_buff *xdp)
+{
+	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+	xp_release(xskb);
+}
+
 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
 {
 	xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM;
@@ -238,6 +245,10 @@  static inline void xsk_buff_free(struct xdp_buff *xdp)
 {
 }
 
+static inline void xsk_buff_discard(struct xdp_buff *xdp)
+{
+}
+
 static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size)
 {
 }