diff mbox series

[net,v2] net: xilinx: axienet: Fix packet counting

Message ID 20240906164227.505984-1-sean.anderson@linux.dev (mailing list archive)
State New, archived
Headers show
Series [net,v2] net: xilinx: axienet: Fix packet counting | expand

Commit Message

Sean Anderson Sept. 6, 2024, 4:42 p.m. UTC
axienet_free_tx_chain returns the number of DMA descriptors it's
handled. However, axienet_tx_poll treats the return as the number of
packets. When scatter-gather SKBs are enabled, a single packet may use
multiple DMA descriptors, which causes incorrect packet counts. Fix this
by explicitly keepting track of the number of packets processed as
separate from the DMA descriptors.

Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
---

Changes in v2:
- Only call napi_consume_skb with non-zero budget when force is false

 .../net/ethernet/xilinx/xilinx_axienet_main.c | 31 +++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)

Comments

Radhey Shyam Pandey Sept. 6, 2024, 5:44 p.m. UTC | #1
> -----Original Message-----
> From: Sean Anderson <sean.anderson@linux.dev>
> Sent: Friday, September 6, 2024 10:12 PM
> To: Pandey, Radhey Shyam <radhey.shyam.pandey@amd.com>; David S .
> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>;
> netdev@vger.kernel.org
> Cc: Simek, Michal <michal.simek@amd.com>; linux-arm-
> kernel@lists.infradead.org; linux-kernel@vger.kernel.org; Andy Chiu
> <andy.chiu@sifive.com>; Daniel Borkmann <daniel@iogearbox.net>; Sean
> Anderson <sean.anderson@linux.dev>
> Subject: [PATCH net v2] net: xilinx: axienet: Fix packet counting
> 
> axienet_free_tx_chain returns the number of DMA descriptors it's
> handled. However, axienet_tx_poll treats the return as the number of
> packets. When scatter-gather SKBs are enabled, a single packet may use
> multiple DMA descriptors, which causes incorrect packet counts. Fix this
> by explicitly keepting track of the number of packets processed as
> separate from the DMA descriptors.
> 
> Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet

Isn't it Fixes: 9e2bc267e780 ("net: axienet: Use NAPI for TX completion path")?

> driver")
> Signed-off-by: Sean Anderson <sean.anderson@linux.dev>

+ Harini, Suraj to review and run this patch to ensure data path sanity.

> ---
> 
> Changes in v2:
> - Only call napi_consume_skb with non-zero budget when force is false
> 
>  .../net/ethernet/xilinx/xilinx_axienet_main.c | 31 +++++++++++--------
>  1 file changed, 18 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index 9aeb7b9f3ae4..556033849d55 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -670,21 +670,21 @@ static int axienet_device_reset(struct net_device
> *ndev)
>   * @force:	Whether to clean descriptors even if not complete
>   * @sizep:	Pointer to a u32 filled with the total sum of all bytes
>   *		in all cleaned-up descriptors. Ignored if NULL.
> - * @budget:	NAPI budget (use 0 when not called from NAPI poll)
> + * @budget:	NAPI budget (use INT_MAX when not called from NAPI poll)
>   *
>   * Would either be called after a successful transmit operation, or after
>   * there was an error when setting up the chain.
> - * Returns the number of descriptors handled.
> + * Returns the number of packets handled.
>   */
>  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>  				 int nr_bds, bool force, u32 *sizep, int
> budget)
>  {
>  	struct axidma_bd *cur_p;
>  	unsigned int status;
> +	int i, packets = 0;
>  	dma_addr_t phys;
> -	int i;
> 
> -	for (i = 0; i < nr_bds; i++) {
> +	for (i = 0; i < nr_bds && packets < budget; i++) {
>  		cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
>  		status = cur_p->status;
> 
> @@ -701,8 +701,10 @@ static int axienet_free_tx_chain(struct axienet_local
> *lp, u32 first_bd,
>  				 (cur_p->cntrl &
> XAXIDMA_BD_CTRL_LENGTH_MASK),
>  				 DMA_TO_DEVICE);
> 
> -		if (cur_p->skb && (status &
> XAXIDMA_BD_STS_COMPLETE_MASK))
> -			napi_consume_skb(cur_p->skb, budget);
> +		if (cur_p->skb && (status &
> XAXIDMA_BD_STS_COMPLETE_MASK)) {
> +			napi_consume_skb(cur_p->skb, force ? 0 : budget);
> +			packets++;
> +		}
> 
>  		cur_p->app0 = 0;
>  		cur_p->app1 = 0;
> @@ -718,7 +720,13 @@ static int axienet_free_tx_chain(struct axienet_local
> *lp, u32 first_bd,
>  			*sizep += status &
> XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
>  	}
> 
> -	return i;
> +	if (!force) {

Is tx_bd_ci increment dependent on force state and not done if force == true ?
> +		lp->tx_bd_ci += i;
> +		if (lp->tx_bd_ci >= lp->tx_bd_num)
> +			lp->tx_bd_ci %= lp->tx_bd_num;
> +	}
> +
> +	return packets;
>  }
> 
>  /**
> @@ -891,13 +899,10 @@ static int axienet_tx_poll(struct napi_struct *napi,
> int budget)
>  	u32 size = 0;
>  	int packets;
> 
> -	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, budget, false,
> &size, budget);
> +	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num,

Why do we need to pass tx_bd_num here? Is budget not sufficient?

> false,
> +					&size, budget);
> 
>  	if (packets) {
> -		lp->tx_bd_ci += packets;
> -		if (lp->tx_bd_ci >= lp->tx_bd_num)
> -			lp->tx_bd_ci %= lp->tx_bd_num;
> -
>  		u64_stats_update_begin(&lp->tx_stat_sync);
>  		u64_stats_add(&lp->tx_packets, packets);
>  		u64_stats_add(&lp->tx_bytes, size);
> @@ -1003,7 +1008,7 @@ axienet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>  				netdev_err(ndev, "TX DMA mapping
> error\n");
>  			ndev->stats.tx_dropped++;
>  			axienet_free_tx_chain(lp, orig_tail_ptr, ii + 1,
> -					      true, NULL, 0);
> +					      true, NULL, INT_MAX);
>  			return NETDEV_TX_OK;
>  		}
>  		desc_set_phys_addr(lp, phys, cur_p);
> --
> 2.35.1.1320.gc452695387.dirty
Sean Anderson Sept. 6, 2024, 5:49 p.m. UTC | #2
On 9/6/24 13:44, Pandey, Radhey Shyam wrote:
>> -----Original Message-----
>> From: Sean Anderson <sean.anderson@linux.dev>
>> Sent: Friday, September 6, 2024 10:12 PM
>> To: Pandey, Radhey Shyam <radhey.shyam.pandey@amd.com>; David S .
>> Miller <davem@davemloft.net>; Eric Dumazet <edumazet@google.com>;
>> Jakub Kicinski <kuba@kernel.org>; Paolo Abeni <pabeni@redhat.com>;
>> netdev@vger.kernel.org
>> Cc: Simek, Michal <michal.simek@amd.com>; linux-arm-
>> kernel@lists.infradead.org; linux-kernel@vger.kernel.org; Andy Chiu
>> <andy.chiu@sifive.com>; Daniel Borkmann <daniel@iogearbox.net>; Sean
>> Anderson <sean.anderson@linux.dev>
>> Subject: [PATCH net v2] net: xilinx: axienet: Fix packet counting
>> 
>> axienet_free_tx_chain returns the number of DMA descriptors it's
>> handled. However, axienet_tx_poll treats the return as the number of
>> packets. When scatter-gather SKBs are enabled, a single packet may use
>> multiple DMA descriptors, which causes incorrect packet counts. Fix this
>> by explicitly keepting track of the number of packets processed as
>> separate from the DMA descriptors.
>> 
>> Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet
> 
> Isn't it Fixes: 9e2bc267e780 ("net: axienet: Use NAPI for TX completion path")?

No. The packet count is also used for statistics, and this confusion has
been present since the original commit.

>> driver")
>> Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
> 
> + Harini, Suraj to review and run this patch to ensure data path sanity.
> 
>> ---
>> 
>> Changes in v2:
>> - Only call napi_consume_skb with non-zero budget when force is false
>> 
>>  .../net/ethernet/xilinx/xilinx_axienet_main.c | 31 +++++++++++--------
>>  1 file changed, 18 insertions(+), 13 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> index 9aeb7b9f3ae4..556033849d55 100644
>> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> @@ -670,21 +670,21 @@ static int axienet_device_reset(struct net_device
>> *ndev)
>>   * @force:	Whether to clean descriptors even if not complete
>>   * @sizep:	Pointer to a u32 filled with the total sum of all bytes
>>   *		in all cleaned-up descriptors. Ignored if NULL.
>> - * @budget:	NAPI budget (use 0 when not called from NAPI poll)
>> + * @budget:	NAPI budget (use INT_MAX when not called from NAPI poll)
>>   *
>>   * Would either be called after a successful transmit operation, or after
>>   * there was an error when setting up the chain.
>> - * Returns the number of descriptors handled.
>> + * Returns the number of packets handled.
>>   */
>>  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>>  				 int nr_bds, bool force, u32 *sizep, int
>> budget)
>>  {
>>  	struct axidma_bd *cur_p;
>>  	unsigned int status;
>> +	int i, packets = 0;
>>  	dma_addr_t phys;
>> -	int i;
>> 
>> -	for (i = 0; i < nr_bds; i++) {
>> +	for (i = 0; i < nr_bds && packets < budget; i++) {
>>  		cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
>>  		status = cur_p->status;
>> 
>> @@ -701,8 +701,10 @@ static int axienet_free_tx_chain(struct axienet_local
>> *lp, u32 first_bd,
>>  				 (cur_p->cntrl &
>> XAXIDMA_BD_CTRL_LENGTH_MASK),
>>  				 DMA_TO_DEVICE);
>> 
>> -		if (cur_p->skb && (status &
>> XAXIDMA_BD_STS_COMPLETE_MASK))
>> -			napi_consume_skb(cur_p->skb, budget);
>> +		if (cur_p->skb && (status &
>> XAXIDMA_BD_STS_COMPLETE_MASK)) {
>> +			napi_consume_skb(cur_p->skb, force ? 0 : budget);
>> +			packets++;
>> +		}
>> 
>>  		cur_p->app0 = 0;
>>  		cur_p->app1 = 0;
>> @@ -718,7 +720,13 @@ static int axienet_free_tx_chain(struct axienet_local
>> *lp, u32 first_bd,
>>  			*sizep += status &
>> XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
>>  	}
>> 
>> -	return i;
>> +	if (!force) {
> 
> Is tx_bd_ci increment dependent on force state and not done if force == true ?

No. "force" is used in the error path of axienet_start_xmit to free
unsent descriptors. So in that case, once everything is done, tx_bd_ci
will be the same as before axienet_start_xmit is called.

>> +		lp->tx_bd_ci += i;
>> +		if (lp->tx_bd_ci >= lp->tx_bd_num)
>> +			lp->tx_bd_ci %= lp->tx_bd_num;
>> +	}
>> +
>> +	return packets;
>>  }
>> 
>>  /**
>> @@ -891,13 +899,10 @@ static int axienet_tx_poll(struct napi_struct *napi,
>> int budget)
>>  	u32 size = 0;
>>  	int packets;
>> 
>> -	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, budget, false,
>> &size, budget);
>> +	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num,
> 
> Why do we need to pass tx_bd_num here? Is budget not sufficient?

Imagine if the user sets the TX ring to 1 packet. On completion, we will
consume the SKB. Then we will enter an infinite loop as we will still
have budget for more packets but we will keep checking the same
descriptor for more packets. By setting a maximum descriptor count we
ensure that we only go through the TX ring once.

--Sean

>> false,
>> +					&size, budget);
>> 
>>  	if (packets) {
>> -		lp->tx_bd_ci += packets;
>> -		if (lp->tx_bd_ci >= lp->tx_bd_num)
>> -			lp->tx_bd_ci %= lp->tx_bd_num;
>> -
>>  		u64_stats_update_begin(&lp->tx_stat_sync);
>>  		u64_stats_add(&lp->tx_packets, packets);
>>  		u64_stats_add(&lp->tx_bytes, size);
>> @@ -1003,7 +1008,7 @@ axienet_start_xmit(struct sk_buff *skb, struct
>> net_device *ndev)
>>  				netdev_err(ndev, "TX DMA mapping
>> error\n");
>>  			ndev->stats.tx_dropped++;
>>  			axienet_free_tx_chain(lp, orig_tail_ptr, ii + 1,
>> -					      true, NULL, 0);
>> +					      true, NULL, INT_MAX);
>>  			return NETDEV_TX_OK;
>>  		}
>>  		desc_set_phys_addr(lp, phys, cur_p);
>> --
>> 2.35.1.1320.gc452695387.dirty
>
Jakub Kicinski Sept. 10, 2024, 1 a.m. UTC | #3
On Fri,  6 Sep 2024 12:42:27 -0400 Sean Anderson wrote:
> axienet_free_tx_chain returns the number of DMA descriptors it's
> handled. However, axienet_tx_poll treats the return as the number of
> packets. When scatter-gather SKBs are enabled, a single packet may use
> multiple DMA descriptors, which causes incorrect packet counts. Fix this
> by explicitly keepting track of the number of packets processed as
> separate from the DMA descriptors.
> 
> Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
> Signed-off-by: Sean Anderson <sean.anderson@linux.dev>

> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index 9aeb7b9f3ae4..556033849d55 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -670,21 +670,21 @@ static int axienet_device_reset(struct net_device *ndev)
>   * @force:	Whether to clean descriptors even if not complete
>   * @sizep:	Pointer to a u32 filled with the total sum of all bytes
>   *		in all cleaned-up descriptors. Ignored if NULL.
> - * @budget:	NAPI budget (use 0 when not called from NAPI poll)
> + * @budget:	NAPI budget (use INT_MAX when not called from NAPI poll)

use INT_MAX and force=true when ... ?
To make sure the dependency is clear.
But actually...

>   *
>   * Would either be called after a successful transmit operation, or after
>   * there was an error when setting up the chain.
> - * Returns the number of descriptors handled.
> + * Returns the number of packets handled.
>   */
>  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>  				 int nr_bds, bool force, u32 *sizep, int budget)
>  {
>  	struct axidma_bd *cur_p;
>  	unsigned int status;
> +	int i, packets = 0;
>  	dma_addr_t phys;
> -	int i;
>  
> -	for (i = 0; i < nr_bds; i++) {
> +	for (i = 0; i < nr_bds && packets < budget; i++) {

why are you doing this? To make sure drivers doesn't complete more 
than "budget" Tx skbs? The budget is really for Rx, for Tx you can
use a reasonable fixed value, independent of what budget core
passes in, e.g. 128. See:
https://www.kernel.org/doc/html/next/networking/napi.html#datapath-api

>  		cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
>  		status = cur_p->status;
>  
> @@ -701,8 +701,10 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>  				 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
>  				 DMA_TO_DEVICE);
>  
> -		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
> -			napi_consume_skb(cur_p->skb, budget);
> +		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
> +			napi_consume_skb(cur_p->skb, force ? 0 : budget);
> +			packets++;
> +		}
>  
>  		cur_p->app0 = 0;
>  		cur_p->app1 = 0;
> @@ -718,7 +720,13 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>  			*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
>  	}
>  
> -	return i;
> +	if (!force) {
> +		lp->tx_bd_ci += i;
> +		if (lp->tx_bd_ci >= lp->tx_bd_num)
> +			lp->tx_bd_ci %= lp->tx_bd_num;
> +	}

Moving this chunk into axienet_free_tx_chain() is a noop, right?
Please avoid code cleanups in fixes.

> +	return packets;
>  }
>  
>  /**
Sean Anderson Sept. 10, 2024, 2:24 p.m. UTC | #4
On 9/9/24 21:00, Jakub Kicinski wrote:
> On Fri,  6 Sep 2024 12:42:27 -0400 Sean Anderson wrote:
>> axienet_free_tx_chain returns the number of DMA descriptors it's
>> handled. However, axienet_tx_poll treats the return as the number of
>> packets. When scatter-gather SKBs are enabled, a single packet may use
>> multiple DMA descriptors, which causes incorrect packet counts. Fix this
>> by explicitly keepting track of the number of packets processed as
>> separate from the DMA descriptors.
>> 
>> Fixes: 8a3b7a252dca ("drivers/net/ethernet/xilinx: added Xilinx AXI Ethernet driver")
>> Signed-off-by: Sean Anderson <sean.anderson@linux.dev>
> 
>> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> index 9aeb7b9f3ae4..556033849d55 100644
>> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
>> @@ -670,21 +670,21 @@ static int axienet_device_reset(struct net_device *ndev)
>>   * @force:	Whether to clean descriptors even if not complete
>>   * @sizep:	Pointer to a u32 filled with the total sum of all bytes
>>   *		in all cleaned-up descriptors. Ignored if NULL.
>> - * @budget:	NAPI budget (use 0 when not called from NAPI poll)
>> + * @budget:	NAPI budget (use INT_MAX when not called from NAPI poll)
> 
> use INT_MAX and force=true when ... ?
> To make sure the dependency is clear.
> But actually...
> 
>>   *
>>   * Would either be called after a successful transmit operation, or after
>>   * there was an error when setting up the chain.
>> - * Returns the number of descriptors handled.
>> + * Returns the number of packets handled.
>>   */
>>  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>>  				 int nr_bds, bool force, u32 *sizep, int budget)
>>  {
>>  	struct axidma_bd *cur_p;
>>  	unsigned int status;
>> +	int i, packets = 0;
>>  	dma_addr_t phys;
>> -	int i;
>>  
>> -	for (i = 0; i < nr_bds; i++) {
>> +	for (i = 0; i < nr_bds && packets < budget; i++) {
> 
> why are you doing this? To make sure drivers doesn't complete more 
> than "budget" Tx skbs? The budget is really for Rx, for Tx you can
> use a reasonable fixed value, independent of what budget core
> passes in, e.g. 128. See:
> https://www.kernel.org/doc/html/next/networking/napi.html#datapath-api

I read this but it was unclear to me because it seems oriented towards
"combined" NAPI instances, while we have separate instances for RX and
TX. So even for TX-only instances, we can ignore budget?

>>  		cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
>>  		status = cur_p->status;
>>  
>> @@ -701,8 +701,10 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>>  				 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
>>  				 DMA_TO_DEVICE);
>>  
>> -		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
>> -			napi_consume_skb(cur_p->skb, budget);
>> +		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
>> +			napi_consume_skb(cur_p->skb, force ? 0 : budget);
>> +			packets++;
>> +		}
>>  
>>  		cur_p->app0 = 0;
>>  		cur_p->app1 = 0;
>> @@ -718,7 +720,13 @@ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
>>  			*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
>>  	}
>>  
>> -	return i;
>> +	if (!force) {
>> +		lp->tx_bd_ci += i;
>> +		if (lp->tx_bd_ci >= lp->tx_bd_num)
>> +			lp->tx_bd_ci %= lp->tx_bd_num;
>> +	}
> 
> Moving this chunk into axienet_free_tx_chain() is a noop, right?
> Please avoid code cleanups in fixes.

The relevant variable (number of descriptors handled) is no longer
returned to axienet_tx_poll, so it can't update the current descriptor
properly.

--Sean

>> +	return packets;
>>  }
>>  
>>  /**
Jakub Kicinski Sept. 10, 2024, 2:39 p.m. UTC | #5
On Tue, 10 Sep 2024 10:24:36 -0400 Sean Anderson wrote:
> > why are you doing this? To make sure drivers doesn't complete more 
> > than "budget" Tx skbs? The budget is really for Rx, for Tx you can
> > use a reasonable fixed value, independent of what budget core
> > passes in, e.g. 128. See:
> > https://www.kernel.org/doc/html/next/networking/napi.html#datapath-api  
> 
> I read this but it was unclear to me because it seems oriented towards
> "combined" NAPI instances, while we have separate instances for RX and
> TX. So even for TX-only instances, we can ignore budget?

Yes, combined or dedicated it should still reap completions regardless
of budget.

> > Moving this chunk into axienet_free_tx_chain() is a noop, right?
> > Please avoid code cleanups in fixes.  
> 
> The relevant variable (number of descriptors handled) is no longer
> returned to axienet_tx_poll, so it can't update the current descriptor
> properly.

Got it, worth mentioning in the commit msg.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 9aeb7b9f3ae4..556033849d55 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -670,21 +670,21 @@  static int axienet_device_reset(struct net_device *ndev)
  * @force:	Whether to clean descriptors even if not complete
  * @sizep:	Pointer to a u32 filled with the total sum of all bytes
  *		in all cleaned-up descriptors. Ignored if NULL.
- * @budget:	NAPI budget (use 0 when not called from NAPI poll)
+ * @budget:	NAPI budget (use INT_MAX when not called from NAPI poll)
  *
  * Would either be called after a successful transmit operation, or after
  * there was an error when setting up the chain.
- * Returns the number of descriptors handled.
+ * Returns the number of packets handled.
  */
 static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
 				 int nr_bds, bool force, u32 *sizep, int budget)
 {
 	struct axidma_bd *cur_p;
 	unsigned int status;
+	int i, packets = 0;
 	dma_addr_t phys;
-	int i;
 
-	for (i = 0; i < nr_bds; i++) {
+	for (i = 0; i < nr_bds && packets < budget; i++) {
 		cur_p = &lp->tx_bd_v[(first_bd + i) % lp->tx_bd_num];
 		status = cur_p->status;
 
@@ -701,8 +701,10 @@  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
 				 (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK),
 				 DMA_TO_DEVICE);
 
-		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK))
-			napi_consume_skb(cur_p->skb, budget);
+		if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) {
+			napi_consume_skb(cur_p->skb, force ? 0 : budget);
+			packets++;
+		}
 
 		cur_p->app0 = 0;
 		cur_p->app1 = 0;
@@ -718,7 +720,13 @@  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
 			*sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
 	}
 
-	return i;
+	if (!force) {
+		lp->tx_bd_ci += i;
+		if (lp->tx_bd_ci >= lp->tx_bd_num)
+			lp->tx_bd_ci %= lp->tx_bd_num;
+	}
+
+	return packets;
 }
 
 /**
@@ -891,13 +899,10 @@  static int axienet_tx_poll(struct napi_struct *napi, int budget)
 	u32 size = 0;
 	int packets;
 
-	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, budget, false, &size, budget);
+	packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num, false,
+					&size, budget);
 
 	if (packets) {
-		lp->tx_bd_ci += packets;
-		if (lp->tx_bd_ci >= lp->tx_bd_num)
-			lp->tx_bd_ci %= lp->tx_bd_num;
-
 		u64_stats_update_begin(&lp->tx_stat_sync);
 		u64_stats_add(&lp->tx_packets, packets);
 		u64_stats_add(&lp->tx_bytes, size);
@@ -1003,7 +1008,7 @@  axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 				netdev_err(ndev, "TX DMA mapping error\n");
 			ndev->stats.tx_dropped++;
 			axienet_free_tx_chain(lp, orig_tail_ptr, ii + 1,
-					      true, NULL, 0);
+					      true, NULL, INT_MAX);
 			return NETDEV_TX_OK;
 		}
 		desc_set_phys_addr(lp, phys, cur_p);