diff mbox series

[net-next,v3,4/4] eth: mlx4: use the page pool for Rx buffers

Message ID 20250213010635.1354034-5-kuba@kernel.org (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series eth: mlx4: use the page pool for Rx buffers | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 12 this patch: 12
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 161 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-02-15--03-00 (tests: 891)

Commit Message

Jakub Kicinski Feb. 13, 2025, 1:06 a.m. UTC
Simple conversion to page pool. Preserve the current fragmentation
logic / page splitting. Each page starts with a single frag reference,
and then we bump that when attaching to skbs. This can likely be
optimized further.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
v2:
 - remove unnecessary .max_len setting
v1: https://lore.kernel.org/20250205031213.358973-5-kuba@kernel.org
---
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |  1 -
 drivers/net/ethernet/mellanox/mlx4/en_rx.c   | 55 +++++++-------------
 drivers/net/ethernet/mellanox/mlx4/en_tx.c   |  8 +--
 3 files changed, 25 insertions(+), 39 deletions(-)

Comments

Tariq Toukan Feb. 18, 2025, 12:13 p.m. UTC | #1
On 13/02/2025 3:06, Jakub Kicinski wrote:
> Simple conversion to page pool. Preserve the current fragmentation
> logic / page splitting. Each page starts with a single frag reference,
> and then we bump that when attaching to skbs. This can likely be
> optimized further.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> v2:
>   - remove unnecessary .max_len setting
> v1: https://lore.kernel.org/20250205031213.358973-5-kuba@kernel.org
> ---
>   drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |  1 -
>   drivers/net/ethernet/mellanox/mlx4/en_rx.c   | 55 +++++++-------------
>   drivers/net/ethernet/mellanox/mlx4/en_tx.c   |  8 +--
>   3 files changed, 25 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> index 97311c98569f..ad0d91a75184 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
> @@ -247,7 +247,6 @@ struct mlx4_en_tx_desc {
>   
>   struct mlx4_en_rx_alloc {
>   	struct page	*page;
> -	dma_addr_t	dma;
>   	u32		page_offset;
>   };
>   
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> index d2cfbf2e38d9..b33285d755b9 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
> @@ -52,57 +52,39 @@
>   
>   #include "mlx4_en.h"
>   
> -static int mlx4_alloc_page(struct mlx4_en_priv *priv,
> -			   struct mlx4_en_rx_alloc *frag,
> -			   gfp_t gfp)
> -{
> -	struct page *page;
> -	dma_addr_t dma;
> -
> -	page = alloc_page(gfp);
> -	if (unlikely(!page))
> -		return -ENOMEM;
> -	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
> -	if (unlikely(dma_mapping_error(priv->ddev, dma))) {
> -		__free_page(page);
> -		return -ENOMEM;
> -	}
> -	frag->page = page;
> -	frag->dma = dma;
> -	frag->page_offset = priv->rx_headroom;
> -	return 0;
> -}
> -
>   static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
>   			       struct mlx4_en_rx_ring *ring,
>   			       struct mlx4_en_rx_desc *rx_desc,
>   			       struct mlx4_en_rx_alloc *frags,
>   			       gfp_t gfp)
>   {
> +	dma_addr_t dma;
>   	int i;
>   
>   	for (i = 0; i < priv->num_frags; i++, frags++) {
>   		if (!frags->page) {
> -			if (mlx4_alloc_page(priv, frags, gfp)) {
> +			frags->page = page_pool_alloc_pages(ring->pp, gfp);
> +			if (!frags->page) {
>   				ring->alloc_fail++;
>   				return -ENOMEM;
>   			}
> +			page_pool_fragment_page(frags->page, 1);
> +			frags->page_offset = priv->rx_headroom;
> +
>   			ring->rx_alloc_pages++;
>   		}
> -		rx_desc->data[i].addr = cpu_to_be64(frags->dma +
> -						    frags->page_offset);
> +		dma = page_pool_get_dma_addr(frags->page);
> +		rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset);
>   	}
>   	return 0;
>   }
>   
>   static void mlx4_en_free_frag(const struct mlx4_en_priv *priv,
> +			      struct mlx4_en_rx_ring *ring,
>   			      struct mlx4_en_rx_alloc *frag)
>   {
> -	if (frag->page) {
> -		dma_unmap_page(priv->ddev, frag->dma,
> -			       PAGE_SIZE, priv->dma_dir);
> -		__free_page(frag->page);
> -	}
> +	if (frag->page)
> +		page_pool_put_full_page(ring->pp, frag->page, false);
>   	/* We need to clear all fields, otherwise a change of priv->log_rx_info
>   	 * could lead to see garbage later in frag->page.
>   	 */
> @@ -167,7 +149,7 @@ static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
>   	frags = ring->rx_info + (index << priv->log_rx_info);
>   	for (nr = 0; nr < priv->num_frags; nr++) {
>   		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
> -		mlx4_en_free_frag(priv, frags + nr);
> +		mlx4_en_free_frag(priv, ring, frags + nr);
>   	}
>   }
>   
> @@ -469,7 +451,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
>   		if (unlikely(!page))
>   			goto fail;
>   
> -		dma = frags->dma;
> +		dma = page_pool_get_dma_addr(page);
>   		dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset,
>   					      frag_size, priv->dma_dir);
>   
> @@ -480,6 +462,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
>   		if (frag_info->frag_stride == PAGE_SIZE / 2) {
>   			frags->page_offset ^= PAGE_SIZE / 2;
>   			release = page_count(page) != 1 ||
> +				  atomic_long_read(&page->pp_ref_count) != 1 ||
>   				  page_is_pfmemalloc(page) ||
>   				  page_to_nid(page) != numa_mem_id();
>   		} else if (!priv->rx_headroom) {
> @@ -493,10 +476,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
>   			release = frags->page_offset + frag_info->frag_size > PAGE_SIZE;
>   		}
>   		if (release) {
> -			dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir);
>   			frags->page = NULL;
>   		} else {
> -			page_ref_inc(page);
> +			page_pool_ref_page(page);
>   		}
>   
>   		nr++;
> @@ -766,7 +748,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>   			/* Get pointer to first fragment since we haven't
>   			 * skb yet and cast it to ethhdr struct
>   			 */
> -			dma = frags[0].dma + frags[0].page_offset;
> +			dma = page_pool_get_dma_addr(frags[0].page);
> +			dma += frags[0].page_offset;
>   			dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
>   						DMA_FROM_DEVICE);
>   
> @@ -805,7 +788,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>   			void *orig_data;
>   			u32 act;
>   
> -			dma = frags[0].dma + frags[0].page_offset;
> +			dma = page_pool_get_dma_addr(frags[0].page);
> +			dma += frags[0].page_offset;
>   			dma_sync_single_for_cpu(priv->ddev, dma,
>   						priv->frag_info[0].frag_size,
>   						DMA_FROM_DEVICE);
> @@ -868,6 +852,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
>   		skb = napi_get_frags(&cq->napi);
>   		if (unlikely(!skb))
>   			goto next;
> +		skb_mark_for_recycle(skb);
>   
>   		if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
>   			u64 timestamp = mlx4_en_get_cqe_ts(cqe);
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> index fe1378a689a1..87f35bcbeff8 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
> @@ -44,6 +44,7 @@
>   #include <linux/ipv6.h>
>   #include <linux/indirect_call_wrapper.h>
>   #include <net/ipv6.h>
> +#include <net/page_pool/helpers.h>
>   
>   #include "mlx4_en.h"
>   
> @@ -350,9 +351,10 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
>   			    int napi_mode)
>   {
>   	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
> +	struct page_pool *pool = ring->recycle_ring->pp;
>   
> -	dma_unmap_page(priv->ddev, tx_info->map0_dma, PAGE_SIZE, priv->dma_dir);
> -	put_page(tx_info->page);
> +	/* Note that napi_mode = 0 means ndo_close() path, not budget = 0 */
> +	page_pool_put_full_page(pool, tx_info->page, !!napi_mode);
>   
>   	return tx_info->nr_txbb;
>   }
> @@ -1189,7 +1191,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
>   	tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
>   	data = &tx_desc->data;
>   
> -	dma = frame->dma;
> +	dma = page_pool_get_dma_addr(frame->page);
>   
>   	tx_info->page = frame->page;
>   	frame->page = NULL;

Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 97311c98569f..ad0d91a75184 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -247,7 +247,6 @@  struct mlx4_en_tx_desc {
 
 struct mlx4_en_rx_alloc {
 	struct page	*page;
-	dma_addr_t	dma;
 	u32		page_offset;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index d2cfbf2e38d9..b33285d755b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -52,57 +52,39 @@ 
 
 #include "mlx4_en.h"
 
-static int mlx4_alloc_page(struct mlx4_en_priv *priv,
-			   struct mlx4_en_rx_alloc *frag,
-			   gfp_t gfp)
-{
-	struct page *page;
-	dma_addr_t dma;
-
-	page = alloc_page(gfp);
-	if (unlikely(!page))
-		return -ENOMEM;
-	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
-	if (unlikely(dma_mapping_error(priv->ddev, dma))) {
-		__free_page(page);
-		return -ENOMEM;
-	}
-	frag->page = page;
-	frag->dma = dma;
-	frag->page_offset = priv->rx_headroom;
-	return 0;
-}
-
 static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
 			       struct mlx4_en_rx_ring *ring,
 			       struct mlx4_en_rx_desc *rx_desc,
 			       struct mlx4_en_rx_alloc *frags,
 			       gfp_t gfp)
 {
+	dma_addr_t dma;
 	int i;
 
 	for (i = 0; i < priv->num_frags; i++, frags++) {
 		if (!frags->page) {
-			if (mlx4_alloc_page(priv, frags, gfp)) {
+			frags->page = page_pool_alloc_pages(ring->pp, gfp);
+			if (!frags->page) {
 				ring->alloc_fail++;
 				return -ENOMEM;
 			}
+			page_pool_fragment_page(frags->page, 1);
+			frags->page_offset = priv->rx_headroom;
+
 			ring->rx_alloc_pages++;
 		}
-		rx_desc->data[i].addr = cpu_to_be64(frags->dma +
-						    frags->page_offset);
+		dma = page_pool_get_dma_addr(frags->page);
+		rx_desc->data[i].addr = cpu_to_be64(dma + frags->page_offset);
 	}
 	return 0;
 }
 
 static void mlx4_en_free_frag(const struct mlx4_en_priv *priv,
+			      struct mlx4_en_rx_ring *ring,
 			      struct mlx4_en_rx_alloc *frag)
 {
-	if (frag->page) {
-		dma_unmap_page(priv->ddev, frag->dma,
-			       PAGE_SIZE, priv->dma_dir);
-		__free_page(frag->page);
-	}
+	if (frag->page)
+		page_pool_put_full_page(ring->pp, frag->page, false);
 	/* We need to clear all fields, otherwise a change of priv->log_rx_info
 	 * could lead to see garbage later in frag->page.
 	 */
@@ -167,7 +149,7 @@  static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv,
 	frags = ring->rx_info + (index << priv->log_rx_info);
 	for (nr = 0; nr < priv->num_frags; nr++) {
 		en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
-		mlx4_en_free_frag(priv, frags + nr);
+		mlx4_en_free_frag(priv, ring, frags + nr);
 	}
 }
 
@@ -469,7 +451,7 @@  static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 		if (unlikely(!page))
 			goto fail;
 
-		dma = frags->dma;
+		dma = page_pool_get_dma_addr(page);
 		dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset,
 					      frag_size, priv->dma_dir);
 
@@ -480,6 +462,7 @@  static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 		if (frag_info->frag_stride == PAGE_SIZE / 2) {
 			frags->page_offset ^= PAGE_SIZE / 2;
 			release = page_count(page) != 1 ||
+				  atomic_long_read(&page->pp_ref_count) != 1 ||
 				  page_is_pfmemalloc(page) ||
 				  page_to_nid(page) != numa_mem_id();
 		} else if (!priv->rx_headroom) {
@@ -493,10 +476,9 @@  static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 			release = frags->page_offset + frag_info->frag_size > PAGE_SIZE;
 		}
 		if (release) {
-			dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir);
 			frags->page = NULL;
 		} else {
-			page_ref_inc(page);
+			page_pool_ref_page(page);
 		}
 
 		nr++;
@@ -766,7 +748,8 @@  int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			/* Get pointer to first fragment since we haven't
 			 * skb yet and cast it to ethhdr struct
 			 */
-			dma = frags[0].dma + frags[0].page_offset;
+			dma = page_pool_get_dma_addr(frags[0].page);
+			dma += frags[0].page_offset;
 			dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
 						DMA_FROM_DEVICE);
 
@@ -805,7 +788,8 @@  int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			void *orig_data;
 			u32 act;
 
-			dma = frags[0].dma + frags[0].page_offset;
+			dma = page_pool_get_dma_addr(frags[0].page);
+			dma += frags[0].page_offset;
 			dma_sync_single_for_cpu(priv->ddev, dma,
 						priv->frag_info[0].frag_size,
 						DMA_FROM_DEVICE);
@@ -868,6 +852,7 @@  int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		skb = napi_get_frags(&cq->napi);
 		if (unlikely(!skb))
 			goto next;
+		skb_mark_for_recycle(skb);
 
 		if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) {
 			u64 timestamp = mlx4_en_get_cqe_ts(cqe);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index fe1378a689a1..87f35bcbeff8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -44,6 +44,7 @@ 
 #include <linux/ipv6.h>
 #include <linux/indirect_call_wrapper.h>
 #include <net/ipv6.h>
+#include <net/page_pool/helpers.h>
 
 #include "mlx4_en.h"
 
@@ -350,9 +351,10 @@  u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 			    int napi_mode)
 {
 	struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
+	struct page_pool *pool = ring->recycle_ring->pp;
 
-	dma_unmap_page(priv->ddev, tx_info->map0_dma, PAGE_SIZE, priv->dma_dir);
-	put_page(tx_info->page);
+	/* Note that napi_mode = 0 means ndo_close() path, not budget = 0 */
+	page_pool_put_full_page(pool, tx_info->page, !!napi_mode);
 
 	return tx_info->nr_txbb;
 }
@@ -1189,7 +1191,7 @@  netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 	tx_desc = ring->buf + (index << LOG_TXBB_SIZE);
 	data = &tx_desc->data;
 
-	dma = frame->dma;
+	dma = page_pool_get_dma_addr(frame->page);
 
 	tx_info->page = frame->page;
 	frame->page = NULL;