diff mbox series

Revert "octeontx2-pf: Use the napi_alloc_frag() to alloc the pool buffers"

Message ID 1611118955-13146-1-git-send-email-sundeep.lkml@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Revert "octeontx2-pf: Use the napi_alloc_frag() to alloc the pool buffers" | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 155 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 14 this patch: 14
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

sundeep subbaraya Jan. 20, 2021, 5:02 a.m. UTC
From: Subbaraya Sundeep <sbhatta@marvell.com>

Octeontx2 hardware needs buffer pointers which are 128 byte
aligned. napi_alloc_frag() returns buffer pointers which are
not 128 byte aligned sometimes because if napi_alloc_skb() is
called when a packet is received then subsequent napi_alloc_frag()
returns buffer which is not 128 byte aligned and those buffers
cannot be issued to NPA hardware. Hence reverting this commit.

Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
---
 .../ethernet/marvell/octeontx2/nic/otx2_common.c   | 52 +++++++++++++---------
 .../ethernet/marvell/octeontx2/nic/otx2_common.h   | 15 ++++++-
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c |  3 +-
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h |  4 ++
 4 files changed, 50 insertions(+), 24 deletions(-)

Comments

Kevin Hao Jan. 21, 2021, 4:20 a.m. UTC | #1
On Wed, Jan 20, 2021 at 10:32:35AM +0530, sundeep.lkml@gmail.com wrote:
> From: Subbaraya Sundeep <sbhatta@marvell.com>
> 
> Octeontx2 hardware needs buffer pointers which are 128 byte
> aligned. napi_alloc_frag() returns buffer pointers which are
> not 128 byte aligned sometimes because if napi_alloc_skb() is
> called when a packet is received then subsequent napi_alloc_frag()
> returns buffer which is not 128 byte aligned and those buffers
> cannot be issued to NPA hardware. Hence reverting this commit.
> 
> Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
> Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
> ---
>  .../ethernet/marvell/octeontx2/nic/otx2_common.c   | 52 +++++++++++++---------
>  .../ethernet/marvell/octeontx2/nic/otx2_common.h   | 15 ++++++-
>  .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c |  3 +-
>  .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h |  4 ++
>  4 files changed, 50 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> index bdfa2e2..921cd86 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
> @@ -483,35 +483,40 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
>  		     (pfvf->hw.cq_ecount_wait - 1));
>  }
>  
> -dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
> +dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
> +			   gfp_t gfp)
>  {
>  	dma_addr_t iova;
> -	u8 *buf;
>  
> -	buf = napi_alloc_frag(pool->rbsize);
> -	if (unlikely(!buf))

Hmm, why not?
  buf = napi_alloc_frag(pool->rbsize + 128);
  buf = PTR_ALIGN(buf, 128);

Or we can teach the napi_alloc_frag to return an aligned addr, something like this (just build test):
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index bdfa2e293531..a2a98a30d18e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -488,7 +488,7 @@ dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
 	dma_addr_t iova;
 	u8 *buf;
 
-	buf = napi_alloc_frag(pool->rbsize);
+	buf = napi_alloc_frag_align(pool->rbsize, 128);
 	if (unlikely(!buf))
 		return -ENOMEM;
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 53caa9846854..03eb8f88b714 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -583,6 +583,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
 
 struct page_frag_cache;
 extern void __page_frag_cache_drain(struct page *page, unsigned int count);
+extern void *page_frag_alloc_align(struct page_frag_cache *nc,
+				   unsigned int fragsz, gfp_t gfp_mask, int align);
 extern void *page_frag_alloc(struct page_frag_cache *nc,
 			     unsigned int fragsz, gfp_t gfp_mask);
 extern void page_frag_free(void *addr);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c9568cf60c2a..6bd15956a39f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2877,6 +2877,7 @@ static inline void skb_free_frag(void *addr)
 	page_frag_free(addr);
 }
 
+void *napi_alloc_frag_align(unsigned int fragsz, int align);
 void *napi_alloc_frag(unsigned int fragsz);
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
 				 unsigned int length, gfp_t gfp_mask);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b031a5ae0bd5..7e5ff9a7f00c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5137,8 +5137,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
 }
 EXPORT_SYMBOL(__page_frag_cache_drain);
 
-void *page_frag_alloc(struct page_frag_cache *nc,
-		      unsigned int fragsz, gfp_t gfp_mask)
+void *page_frag_alloc_align(struct page_frag_cache *nc,
+		      unsigned int fragsz, gfp_t gfp_mask, int align)
 {
 	unsigned int size = PAGE_SIZE;
 	struct page *page;
@@ -5190,10 +5190,17 @@ void *page_frag_alloc(struct page_frag_cache *nc,
 	}
 
 	nc->pagecnt_bias--;
-	nc->offset = offset;
+	nc->offset = ALIGN_DOWN(offset, align);
 
 	return nc->va + offset;
 }
+EXPORT_SYMBOL(page_frag_alloc_align);
+
+void *page_frag_alloc(struct page_frag_cache *nc,
+		      unsigned int fragsz, gfp_t gfp_mask)
+{
+	return page_frag_alloc_align(nc, fragsz, gfp_mask, 0);
+}
 EXPORT_SYMBOL(page_frag_alloc);
 
 /*
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cce237d0ec2e..ee7e6a2bfe55 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -374,18 +374,24 @@ struct napi_alloc_cache {
 static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
 static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
 
-static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask, int align)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
-	return page_frag_alloc(&nc->page, fragsz, gfp_mask);
+	return page_frag_alloc_align(&nc->page, fragsz, gfp_mask, align);
 }
 
-void *napi_alloc_frag(unsigned int fragsz)
+void *napi_alloc_frag_align(unsigned int fragsz, int align)
 {
 	fragsz = SKB_DATA_ALIGN(fragsz);
 
-	return __napi_alloc_frag(fragsz, GFP_ATOMIC);
+	return __napi_alloc_frag(fragsz, GFP_ATOMIC, align);
+}
+EXPORT_SYMBOL(napi_alloc_frag_align);
+
+void *napi_alloc_frag(unsigned int fragsz)
+{
+	return __napi_alloc_frag(fragsz, GFP_ATOMIC, 0);
 }
 EXPORT_SYMBOL(napi_alloc_frag);
 
@@ -407,7 +413,7 @@ void *netdev_alloc_frag(unsigned int fragsz)
 		data = page_frag_alloc(nc, fragsz, GFP_ATOMIC);
 	} else {
 		local_bh_disable();
-		data = __napi_alloc_frag(fragsz, GFP_ATOMIC);
+		data = __napi_alloc_frag(fragsz, GFP_ATOMIC, 0);
 		local_bh_enable();
 	}
 	return data;
Thanks,
Kevin

> +	/* Check if request can be accommodated in previous allocated page */
> +	if (pool->page && ((pool->page_offset + pool->rbsize) <=
> +	    (PAGE_SIZE << pool->rbpage_order))) {
> +		pool->pageref++;
> +		goto ret;
> +	}
> +
> +	otx2_get_page(pool);
> +
> +	/* Allocate a new page */
> +	pool->page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
> +				 pool->rbpage_order);
> +	if (unlikely(!pool->page))
>  		return -ENOMEM;
>  
> -	iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize,
> -				    DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
> -	if (unlikely(dma_mapping_error(pfvf->dev, iova))) {
> -		page_frag_free(buf);
> +	pool->page_offset = 0;
> +ret:
> +	iova = (u64)otx2_dma_map_page(pfvf, pool->page, pool->page_offset,
> +				      pool->rbsize, DMA_FROM_DEVICE);
> +	if (!iova) {
> +		if (!pool->page_offset)
> +			__free_pages(pool->page, pool->rbpage_order);
> +		pool->page = NULL;
>  		return -ENOMEM;
>  	}
> -
> +	pool->page_offset += pool->rbsize;
>  	return iova;
>  }
>  
> -static dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
> -{
> -	dma_addr_t addr;
> -
> -	local_bh_disable();
> -	addr = __otx2_alloc_rbuf(pfvf, pool);
> -	local_bh_enable();
> -	return addr;
> -}
> -
>  void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
>  {
>  	struct otx2_nic *pfvf = netdev_priv(netdev);
> @@ -913,7 +918,7 @@ static void otx2_pool_refill_task(struct work_struct *work)
>  	free_ptrs = cq->pool_ptrs;
>  
>  	while (cq->pool_ptrs) {
> -		bufptr = otx2_alloc_rbuf(pfvf, rbpool);
> +		bufptr = otx2_alloc_rbuf(pfvf, rbpool, GFP_KERNEL);
>  		if (bufptr <= 0) {
>  			/* Schedule a WQ if we fails to free atleast half of the
>  			 * pointers else enable napi for this RQ.
> @@ -1172,6 +1177,7 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
>  		return err;
>  
>  	pool->rbsize = buf_size;
> +	pool->rbpage_order = get_order(buf_size);
>  
>  	/* Initialize this pool's context via AF */
>  	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
> @@ -1259,12 +1265,13 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
>  			return -ENOMEM;
>  
>  		for (ptr = 0; ptr < num_sqbs; ptr++) {
> -			bufptr = otx2_alloc_rbuf(pfvf, pool);
> +			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
>  			if (bufptr <= 0)
>  				return bufptr;
>  			otx2_aura_freeptr(pfvf, pool_id, bufptr);
>  			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
>  		}
> +		otx2_get_page(pool);
>  	}
>  
>  	return 0;
> @@ -1310,12 +1317,13 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
>  	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
>  		pool = &pfvf->qset.pool[pool_id];
>  		for (ptr = 0; ptr < num_ptrs; ptr++) {
> -			bufptr = otx2_alloc_rbuf(pfvf, pool);
> +			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
>  			if (bufptr <= 0)
>  				return bufptr;
>  			otx2_aura_freeptr(pfvf, pool_id,
>  					  bufptr + OTX2_HEAD_ROOM);
>  		}
> +		otx2_get_page(pool);
>  	}
>  
>  	return 0;
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> index 143ae04..f670da9 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
> @@ -491,6 +491,18 @@ static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
>  		      otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
>  }
>  
> +/* Update page ref count */
> +static inline void otx2_get_page(struct otx2_pool *pool)
> +{
> +	if (!pool->page)
> +		return;
> +
> +	if (pool->pageref)
> +		page_ref_add(pool->page, pool->pageref);
> +	pool->pageref = 0;
> +	pool->page = NULL;
> +}
> +
>  static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
>  {
>  	if (type == AURA_NIX_SQ)
> @@ -636,7 +648,8 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl);
>  int otx2_txsch_alloc(struct otx2_nic *pfvf);
>  int otx2_txschq_stop(struct otx2_nic *pfvf);
>  void otx2_sqb_flush(struct otx2_nic *pfvf);
> -dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool);
> +dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
> +			   gfp_t gfp);
>  int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
>  void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
>  int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> index d0e2541..7774d9a 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
> @@ -333,7 +333,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
>  
>  	/* Refill pool with new buffers */
>  	while (cq->pool_ptrs) {
> -		bufptr = __otx2_alloc_rbuf(pfvf, cq->rbpool);
> +		bufptr = otx2_alloc_rbuf(pfvf, cq->rbpool, GFP_ATOMIC);
>  		if (unlikely(bufptr <= 0)) {
>  			struct refill_work *work;
>  			struct delayed_work *dwork;
> @@ -351,6 +351,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
>  		otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM);
>  		cq->pool_ptrs--;
>  	}
> +	otx2_get_page(cq->rbpool);
>  
>  	return processed_cqe;
>  }
> diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
> index 73af156..9cd20c7 100644
> --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
> +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
> @@ -114,7 +114,11 @@ struct otx2_cq_poll {
>  struct otx2_pool {
>  	struct qmem		*stack;
>  	struct qmem		*fc_addr;
> +	u8			rbpage_order;
>  	u16			rbsize;
> +	u32			page_offset;
> +	u16			pageref;
> +	struct page		*page;
>  };
>  
>  struct otx2_cq_queue {
> -- 
> 2.7.4
>
Jakub Kicinski Jan. 21, 2021, 4:59 a.m. UTC | #2
On Thu, 21 Jan 2021 12:20:35 +0800 Kevin Hao wrote:
> Hmm, why not?
>   buf = napi_alloc_frag(pool->rbsize + 128);
>   buf = PTR_ALIGN(buf, 128);

I'd keep the aligning in the driver until there are more users
needing this but yes, I agree, aligning the page frag buffers 
seems like a much better fix.
Kevin Hao Jan. 21, 2021, 5:09 a.m. UTC | #3
On Wed, Jan 20, 2021 at 08:59:14PM -0800, Jakub Kicinski wrote:
> On Thu, 21 Jan 2021 12:20:35 +0800 Kevin Hao wrote:
> > Hmm, why not?
> >   buf = napi_alloc_frag(pool->rbsize + 128);
> >   buf = PTR_ALIGN(buf, 128);
> 
> I'd keep the aligning in the driver until there are more users
> needing this but yes, I agree, aligning the page frag buffers 
> seems like a much better fix.

It seems that the DPAA2 driver also need this (drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c):
	/* Prepare the HW SGT structure */
	sgt_buf_size = priv->tx_data_offset +
		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
	sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
	if (unlikely(!sgt_buf)) {
		err = -ENOMEM;
		goto sgt_buf_alloc_failed;
	}
	sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);

Thanks,
Kevin
Jakub Kicinski Jan. 21, 2021, 5:13 a.m. UTC | #4
On Thu, 21 Jan 2021 13:09:10 +0800 Kevin Hao wrote:
> On Wed, Jan 20, 2021 at 08:59:14PM -0800, Jakub Kicinski wrote:
> > On Thu, 21 Jan 2021 12:20:35 +0800 Kevin Hao wrote:  
> > > Hmm, why not?
> > >   buf = napi_alloc_frag(pool->rbsize + 128);
> > >   buf = PTR_ALIGN(buf, 128);  
> > 
> > I'd keep the aligning in the driver until there are more users
> > needing this but yes, I agree, aligning the page frag buffers 
> > seems like a much better fix.  
> 
> It seems that the DPAA2 driver also need this (drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c):
> 	/* Prepare the HW SGT structure */
> 	sgt_buf_size = priv->tx_data_offset +
> 		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
> 	sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
> 	if (unlikely(!sgt_buf)) {
> 		err = -ENOMEM;
> 		goto sgt_buf_alloc_failed;
> 	}
> 	sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);

We can fix them both up as a follow up in net-next, then?

Let's keep the patch small and local for the fix.
Kevin Hao Jan. 21, 2021, 5:35 a.m. UTC | #5
On Wed, Jan 20, 2021 at 09:13:20PM -0800, Jakub Kicinski wrote:
> On Thu, 21 Jan 2021 13:09:10 +0800 Kevin Hao wrote:
> > On Wed, Jan 20, 2021 at 08:59:14PM -0800, Jakub Kicinski wrote:
> > > On Thu, 21 Jan 2021 12:20:35 +0800 Kevin Hao wrote:  
> > > > Hmm, why not?
> > > >   buf = napi_alloc_frag(pool->rbsize + 128);
> > > >   buf = PTR_ALIGN(buf, 128);  
> > > 
> > > I'd keep the aligning in the driver until there are more users
> > > needing this but yes, I agree, aligning the page frag buffers 
> > > seems like a much better fix.  
> > 
> > It seems that the DPAA2 driver also need this (drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c):
> > 	/* Prepare the HW SGT structure */
> > 	sgt_buf_size = priv->tx_data_offset +
> > 		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
> > 	sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
> > 	if (unlikely(!sgt_buf)) {
> > 		err = -ENOMEM;
> > 		goto sgt_buf_alloc_failed;
> > 	}
> > 	sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
> 
> We can fix them both up as a follow up in net-next, then?
> 
> Let's keep the patch small and local for the fix.

OK, I will send a patch to align the buffer in the octeontx2 driver and then
introduce the napi_alloc_frag_align() for the net-next.

Thanks,
Kevin
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index bdfa2e2..921cd86 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -483,35 +483,40 @@  void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
 		     (pfvf->hw.cq_ecount_wait - 1));
 }
 
-dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
+dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			   gfp_t gfp)
 {
 	dma_addr_t iova;
-	u8 *buf;
 
-	buf = napi_alloc_frag(pool->rbsize);
-	if (unlikely(!buf))
+	/* Check if request can be accommodated in previous allocated page */
+	if (pool->page && ((pool->page_offset + pool->rbsize) <=
+	    (PAGE_SIZE << pool->rbpage_order))) {
+		pool->pageref++;
+		goto ret;
+	}
+
+	otx2_get_page(pool);
+
+	/* Allocate a new page */
+	pool->page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
+				 pool->rbpage_order);
+	if (unlikely(!pool->page))
 		return -ENOMEM;
 
-	iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize,
-				    DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
-	if (unlikely(dma_mapping_error(pfvf->dev, iova))) {
-		page_frag_free(buf);
+	pool->page_offset = 0;
+ret:
+	iova = (u64)otx2_dma_map_page(pfvf, pool->page, pool->page_offset,
+				      pool->rbsize, DMA_FROM_DEVICE);
+	if (!iova) {
+		if (!pool->page_offset)
+			__free_pages(pool->page, pool->rbpage_order);
+		pool->page = NULL;
 		return -ENOMEM;
 	}
-
+	pool->page_offset += pool->rbsize;
 	return iova;
 }
 
-static dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
-{
-	dma_addr_t addr;
-
-	local_bh_disable();
-	addr = __otx2_alloc_rbuf(pfvf, pool);
-	local_bh_enable();
-	return addr;
-}
-
 void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
 {
 	struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -913,7 +918,7 @@  static void otx2_pool_refill_task(struct work_struct *work)
 	free_ptrs = cq->pool_ptrs;
 
 	while (cq->pool_ptrs) {
-		bufptr = otx2_alloc_rbuf(pfvf, rbpool);
+		bufptr = otx2_alloc_rbuf(pfvf, rbpool, GFP_KERNEL);
 		if (bufptr <= 0) {
 			/* Schedule a WQ if we fails to free atleast half of the
 			 * pointers else enable napi for this RQ.
@@ -1172,6 +1177,7 @@  static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 		return err;
 
 	pool->rbsize = buf_size;
+	pool->rbpage_order = get_order(buf_size);
 
 	/* Initialize this pool's context via AF */
 	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
@@ -1259,12 +1265,13 @@  int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
 			return -ENOMEM;
 
 		for (ptr = 0; ptr < num_sqbs; ptr++) {
-			bufptr = otx2_alloc_rbuf(pfvf, pool);
+			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
 			if (bufptr <= 0)
 				return bufptr;
 			otx2_aura_freeptr(pfvf, pool_id, bufptr);
 			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
 		}
+		otx2_get_page(pool);
 	}
 
 	return 0;
@@ -1310,12 +1317,13 @@  int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
 	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
 		pool = &pfvf->qset.pool[pool_id];
 		for (ptr = 0; ptr < num_ptrs; ptr++) {
-			bufptr = otx2_alloc_rbuf(pfvf, pool);
+			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
 			if (bufptr <= 0)
 				return bufptr;
 			otx2_aura_freeptr(pfvf, pool_id,
 					  bufptr + OTX2_HEAD_ROOM);
 		}
+		otx2_get_page(pool);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 143ae04..f670da9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -491,6 +491,18 @@  static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
 		      otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
 }
 
+/* Update page ref count */
+static inline void otx2_get_page(struct otx2_pool *pool)
+{
+	if (!pool->page)
+		return;
+
+	if (pool->pageref)
+		page_ref_add(pool->page, pool->pageref);
+	pool->pageref = 0;
+	pool->page = NULL;
+}
+
 static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
 {
 	if (type == AURA_NIX_SQ)
@@ -636,7 +648,8 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl);
 int otx2_txsch_alloc(struct otx2_nic *pfvf);
 int otx2_txschq_stop(struct otx2_nic *pfvf);
 void otx2_sqb_flush(struct otx2_nic *pfvf);
-dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool);
+dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
+			   gfp_t gfp);
 int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
 void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
 int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index d0e2541..7774d9a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -333,7 +333,7 @@  static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 
 	/* Refill pool with new buffers */
 	while (cq->pool_ptrs) {
-		bufptr = __otx2_alloc_rbuf(pfvf, cq->rbpool);
+		bufptr = otx2_alloc_rbuf(pfvf, cq->rbpool, GFP_ATOMIC);
 		if (unlikely(bufptr <= 0)) {
 			struct refill_work *work;
 			struct delayed_work *dwork;
@@ -351,6 +351,7 @@  static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 		otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM);
 		cq->pool_ptrs--;
 	}
+	otx2_get_page(cq->rbpool);
 
 	return processed_cqe;
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 73af156..9cd20c7 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -114,7 +114,11 @@  struct otx2_cq_poll {
 struct otx2_pool {
 	struct qmem		*stack;
 	struct qmem		*fc_addr;
+	u8			rbpage_order;
 	u16			rbsize;
+	u32			page_offset;
+	u16			pageref;
+	struct page		*page;
 };
 
 struct otx2_cq_queue {