diff mbox series

[net,v2] net: ethernet: ti: am65-cpsw: Fix NAPI registration sequence

Message ID 20250311130103.68971-1-s-vadapalli@ti.com (mailing list archive)
State New
Headers show
Series [net,v2] net: ethernet: ti: am65-cpsw: Fix NAPI registration sequence | expand

Commit Message

Siddharth Vadapalli March 11, 2025, 1:01 p.m. UTC
From: Vignesh Raghavendra <vigneshr@ti.com>

Registering the interrupts for TX or RX DMA Channels prior to registering
their respective NAPI callbacks can result in a NULL pointer dereference.
This is seen in practice as a random occurrence since it depends on the
randomness associated with the generation of traffic by Linux and the
reception of traffic from the wire.

Fixes: 681eb2beb3ef ("net: ethernet: ti: am65-cpsw: ensure proper channel cleanup in error path")
Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
Co-developed-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
---

Hello,

This patch is based on commit
4d872d51bc9d Merge tag 'x86-urgent-2025-03-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
of Mainline Linux.

v1 of this patch is at:
https://lore.kernel.org/all/20250311061214.4111634-1-s-vadapalli@ti.com/
Changes since v1:
- Based on the feedback provided by Alexander Sverdlin <alexander.sverdlin@siemens.com>
  the patch has been updated to account for the cleanup path in terms of an imbalance
  between the number of successful netif_napi_add_tx/netif_napi_add calls and the
  number of successful devm_request_irq() calls. In the event of an error, we will
  always have one extra successful netif_napi_add_tx/netif_napi_add that needs to be
  cleaned up before we clean an equal number of netif_napi_add_tx/netif_napi_add and
  devm_request_irq.

Regards,
Siddharth.

 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 28 +++++++++++++-----------
 1 file changed, 15 insertions(+), 13 deletions(-)

Comments

Sverdlin, Alexander March 11, 2025, 1:53 p.m. UTC | #1
Hi Siddharth!

On Tue, 2025-03-11 at 18:31 +0530, Siddharth Vadapalli wrote:
> From: Vignesh Raghavendra <vigneshr@ti.com>
> 
> Registering the interrupts for TX or RX DMA Channels prior to registering
> their respective NAPI callbacks can result in a NULL pointer dereference.
> This is seen in practice as a random occurrence since it depends on the
> randomness associated with the generation of traffic by Linux and the
> reception of traffic from the wire.
> 
> Fixes: 681eb2beb3ef ("net: ethernet: ti: am65-cpsw: ensure proper channel cleanup in error path")
> Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> Co-developed-by: Siddharth Vadapalli <s-vadapalli@ti.com>
> Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>

...

> v1 of this patch is at:
> https://lore.kernel.org/all/20250311061214.4111634-1-s-vadapalli@ti.com/
> Changes since v1:
> - Based on the feedback provided by Alexander Sverdlin <alexander.sverdlin@siemens.com>
>   the patch has been updated to account for the cleanup path in terms of an imbalance
>   between the number of successful netif_napi_add_tx/netif_napi_add calls and the
>   number of successful devm_request_irq() calls. In the event of an error, we will
>   always have one extra successful netif_napi_add_tx/netif_napi_add that needs to be
>   cleaned up before we clean an equal number of netif_napi_add_tx/netif_napi_add and
>   devm_request_irq.

...

> --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> @@ -2569,6 +2570,9 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
>  			     HRTIMER_MODE_REL_PINNED);
>  		flow->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
>  
> +		netif_napi_add(common->dma_ndev, &flow->napi_rx,
> +			       am65_cpsw_nuss_rx_poll);
> +
>  		ret = devm_request_irq(dev, flow->irq,
>  				       am65_cpsw_nuss_rx_irq,
>  				       IRQF_TRIGGER_HIGH,
> @@ -2579,9 +2583,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
>  			flow->irq = -EINVAL;
>  			goto err_flow;
>  		}
> -
> -		netif_napi_add(common->dma_ndev, &flow->napi_rx,
> -			       am65_cpsw_nuss_rx_poll);
>  	}
>  
>  	/* setup classifier to route priorities to flows */
> @@ -2590,10 +2591,11 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
>  	return 0;
>  
>  err_flow:
> -	for (--i; i >= 0 ; i--) {
> +	netif_napi_del(&flow->napi_rx);

There are totally 3 "goto err_flow;" instances, so if k3_udma_glue_rx_flow_init() or
k3_udma_glue_rx_get_irq() would fail on the first iteration, we would come here without
a single call to netif_napi_add().

> +	for (--i; i >= 0; i--) {
>  		flow = &rx_chn->flows[i];
> -		netif_napi_del(&flow->napi_rx);
>  		devm_free_irq(dev, flow->irq, flow);
> +		netif_napi_del(&flow->napi_rx);
>  	}
>  
>  err:
Siddharth Vadapalli March 11, 2025, 3:18 p.m. UTC | #2
On Tue, Mar 11, 2025 at 01:53:10PM +0000, Sverdlin, Alexander wrote:
> Hi Siddharth!
> 
> On Tue, 2025-03-11 at 18:31 +0530, Siddharth Vadapalli wrote:
> > From: Vignesh Raghavendra <vigneshr@ti.com>
> > 
> > Registering the interrupts for TX or RX DMA Channels prior to registering
> > their respective NAPI callbacks can result in a NULL pointer dereference.
> > This is seen in practice as a random occurrence since it depends on the
> > randomness associated with the generation of traffic by Linux and the
> > reception of traffic from the wire.
> > 
> > Fixes: 681eb2beb3ef ("net: ethernet: ti: am65-cpsw: ensure proper channel cleanup in error path")
> > Signed-off-by: Vignesh Raghavendra <vigneshr@ti.com>
> > Co-developed-by: Siddharth Vadapalli <s-vadapalli@ti.com>
> > Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
> 
> ...
> 
> > v1 of this patch is at:
> > https://lore.kernel.org/all/20250311061214.4111634-1-s-vadapalli@ti.com/
> > Changes since v1:
> > - Based on the feedback provided by Alexander Sverdlin <alexander.sverdlin@siemens.com>
> >   the patch has been updated to account for the cleanup path in terms of an imbalance
> >   between the number of successful netif_napi_add_tx/netif_napi_add calls and the
> >   number of successful devm_request_irq() calls. In the event of an error, we will
> >   always have one extra successful netif_napi_add_tx/netif_napi_add that needs to be
> >   cleaned up before we clean an equal number of netif_napi_add_tx/netif_napi_add and
> >   devm_request_irq.
> 
> ...
> 
> > --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> > +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
> > @@ -2569,6 +2570,9 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
> >  			     HRTIMER_MODE_REL_PINNED);
> >  		flow->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
> >  
> > +		netif_napi_add(common->dma_ndev, &flow->napi_rx,
> > +			       am65_cpsw_nuss_rx_poll);
> > +
> >  		ret = devm_request_irq(dev, flow->irq,
> >  				       am65_cpsw_nuss_rx_irq,
> >  				       IRQF_TRIGGER_HIGH,
> > @@ -2579,9 +2583,6 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
> >  			flow->irq = -EINVAL;
> >  			goto err_flow;
> >  		}
> > -
> > -		netif_napi_add(common->dma_ndev, &flow->napi_rx,
> > -			       am65_cpsw_nuss_rx_poll);
> >  	}
> >  
> >  	/* setup classifier to route priorities to flows */
> > @@ -2590,10 +2591,11 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
> >  	return 0;
> >  
> >  err_flow:
> > -	for (--i; i >= 0 ; i--) {
> > +	netif_napi_del(&flow->napi_rx);
> 
> There are totally 3 "goto err_flow;" instances, so if k3_udma_glue_rx_flow_init() or
> k3_udma_glue_rx_get_irq() would fail on the first iteration, we would come here without
> a single call to netif_napi_add().

The following should address this right?
------------------------------------------------------------------------------------------------
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index b88edf2dd8f4..bef734c6e5c2 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2581,7 +2581,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
                        dev_err(dev, "failure requesting rx %d irq %u, %d\n",
                                i, flow->irq, ret);
                        flow->irq = -EINVAL;
-                       goto err_flow;
+                       goto err_request_irq;
                }
        }

@@ -2590,8 +2590,10 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)

        return 0;

-err_flow:
+err_request_irq:
        netif_napi_del(&flow->napi_rx);
+
+err_flow:
        for (--i; i >= 0; i--) {
                flow = &rx_chn->flows[i];
                devm_free_irq(dev, flow->irq, flow);
------------------------------------------------------------------------------------------------

err_request_irq => We have an extra netif_napi_add() which needs to be
cleaned up.
err_flow => Equal count of netif_napi_add() and devm_request_irq() that
should be cleaned up.

Regards,
Siddharth.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 2806238629f8..b88edf2dd8f4 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2306,14 +2306,18 @@  static void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common)
 static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
 {
 	struct device *dev = common->dev;
+	struct am65_cpsw_tx_chn *tx_chn;
 	int i, ret = 0;
 
 	for (i = 0; i < common->tx_ch_num; i++) {
-		struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];
+		tx_chn = &common->tx_chns[i];
 
 		hrtimer_init(&tx_chn->tx_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
 		tx_chn->tx_hrtimer.function = &am65_cpsw_nuss_tx_timer_callback;
 
+		netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx,
+				  am65_cpsw_nuss_tx_poll);
+
 		ret = devm_request_irq(dev, tx_chn->irq,
 				       am65_cpsw_nuss_tx_irq,
 				       IRQF_TRIGGER_HIGH,
@@ -2323,19 +2327,16 @@  static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common)
 				tx_chn->id, tx_chn->irq, ret);
 			goto err;
 		}
-
-		netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx,
-				  am65_cpsw_nuss_tx_poll);
 	}
 
 	return 0;
 
 err:
-	for (--i ; i >= 0 ; i--) {
-		struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i];
-
-		netif_napi_del(&tx_chn->napi_tx);
+	netif_napi_del(&tx_chn->napi_tx);
+	for (--i; i >= 0; i--) {
+		tx_chn = &common->tx_chns[i];
 		devm_free_irq(dev, tx_chn->irq, tx_chn);
+		netif_napi_del(&tx_chn->napi_tx);
 	}
 
 	return ret;
@@ -2569,6 +2570,9 @@  static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 			     HRTIMER_MODE_REL_PINNED);
 		flow->rx_hrtimer.function = &am65_cpsw_nuss_rx_timer_callback;
 
+		netif_napi_add(common->dma_ndev, &flow->napi_rx,
+			       am65_cpsw_nuss_rx_poll);
+
 		ret = devm_request_irq(dev, flow->irq,
 				       am65_cpsw_nuss_rx_irq,
 				       IRQF_TRIGGER_HIGH,
@@ -2579,9 +2583,6 @@  static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 			flow->irq = -EINVAL;
 			goto err_flow;
 		}
-
-		netif_napi_add(common->dma_ndev, &flow->napi_rx,
-			       am65_cpsw_nuss_rx_poll);
 	}
 
 	/* setup classifier to route priorities to flows */
@@ -2590,10 +2591,11 @@  static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common)
 	return 0;
 
 err_flow:
-	for (--i; i >= 0 ; i--) {
+	netif_napi_del(&flow->napi_rx);
+	for (--i; i >= 0; i--) {
 		flow = &rx_chn->flows[i];
-		netif_napi_del(&flow->napi_rx);
 		devm_free_irq(dev, flow->irq, flow);
+		netif_napi_del(&flow->napi_rx);
 	}
 
 err: