diff mbox

[1/1] net: macb: remove BUG_ON() and reset the queue to handle RX errors

Message ID 1458830232-6159-1-git-send-email-cyrille.pitchen@atmel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cyrille Pitchen March 24, 2016, 2:37 p.m. UTC
This patch removes two BUG_ON() used to notify about RX queue corruptions
on macb (not gem) hardware without actually handling the error.

The new code skips corrupted frames but still processes faultless frames.
Then it resets the RX queue before restarting the reception from a clean
state.

This patch is a rework of an older patch proposed by Neil Armstrong:
http://patchwork.ozlabs.org/patch/371525/

Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
---
 drivers/net/ethernet/cadence/macb.c | 59 ++++++++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 10 deletions(-)

Comments

Neil Armstrong March 24, 2016, 2:53 p.m. UTC | #1
On 03/24/2016 03:37 PM, Cyrille Pitchen wrote:
> This patch removes two BUG_ON() used to notify about RX queue corruptions
> on macb (not gem) hardware without actually handling the error.
> 
> The new code skips corrupted frames but still processes faultless frames.
> Then it resets the RX queue before restarting the reception from a clean
> state.
> 
> This patch is a rework of an older patch proposed by Neil Armstrong:
> http://patchwork.ozlabs.org/patch/371525/
> 
> Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
> ---
>  drivers/net/ethernet/cadence/macb.c | 59 ++++++++++++++++++++++++++++++-------
>  1 file changed, 49 insertions(+), 10 deletions(-)

Hi Cyrille,

Thanks for the rework, we solved this situation by moving the descriptors to
an internal RAM with lower latencies, we suspected our AHB-AXI bridge + DDR Controller
to delay the descriptor writes while the interrupt was handled in the meantime.
But the error case was still not handled.

Acked-by: Neil Armstrong <narmstrong@baylibre.com>

Thanks,
Neil
Nicolas Ferre March 24, 2016, 3:42 p.m. UTC | #2
Le 24/03/2016 15:37, Cyrille Pitchen a écrit :
> This patch removes two BUG_ON() used to notify about RX queue corruptions
> on macb (not gem) hardware without actually handling the error.
> 
> The new code skips corrupted frames but still processes faultless frames.
> Then it resets the RX queue before restarting the reception from a clean
> state.
> 
> This patch is a rework of an older patch proposed by Neil Armstrong:
> http://patchwork.ozlabs.org/patch/371525/
> 
> Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>

Thanks for this rework of Neil's patch that was standing for a long time
in my backlog ;-).

Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>

Bye,

> ---
>  drivers/net/ethernet/cadence/macb.c | 59 ++++++++++++++++++++++++++++++-------
>  1 file changed, 49 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 6619178ed77b..39447a337149 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
> @@ -917,7 +917,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
>  		unsigned int frag_len = bp->rx_buffer_size;
>  
>  		if (offset + frag_len > len) {
> -			BUG_ON(frag != last_frag);
> +			if (unlikely(frag != last_frag)) {
> +				dev_kfree_skb_any(skb);
> +				return -1;
> +			}
>  			frag_len = len - offset;
>  		}
>  		skb_copy_to_linear_data_offset(skb, offset,
> @@ -945,11 +948,26 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
>  	return 0;
>  }
>  
> +static inline void macb_init_rx_ring(struct macb *bp)
> +{
> +	int i;
> +	dma_addr_t addr;
> +
> +	addr = bp->rx_buffers_dma;
> +	for (i = 0; i < RX_RING_SIZE; i++) {
> +		bp->rx_ring[i].addr = addr;
> +		bp->rx_ring[i].ctrl = 0;
> +		addr += bp->rx_buffer_size;
> +	}
> +	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
> +}
> +
>  static int macb_rx(struct macb *bp, int budget)
>  {
>  	int received = 0;
>  	unsigned int tail;
>  	int first_frag = -1;
> +	int reset_rx_queue = 0;
>  
>  	for (tail = bp->rx_tail; budget > 0; tail++) {
>  		struct macb_dma_desc *desc = macb_rx_desc(bp, tail);
> @@ -972,10 +990,18 @@ static int macb_rx(struct macb *bp, int budget)
>  
>  		if (ctrl & MACB_BIT(RX_EOF)) {
>  			int dropped;
> -			BUG_ON(first_frag == -1);
> +
> +			if (unlikely(first_frag == -1)) {
> +				reset_rx_queue = 1;
> +				continue;
> +			}
>  
>  			dropped = macb_rx_frame(bp, first_frag, tail);
>  			first_frag = -1;
> +			if (unlikely(dropped < 0)) {
> +				reset_rx_queue = 1;
> +				continue;
> +			}
>  			if (!dropped) {
>  				received++;
>  				budget--;
> @@ -983,6 +1009,26 @@ static int macb_rx(struct macb *bp, int budget)
>  		}
>  	}
>  
> +	if (unlikely(reset_rx_queue)) {
> +		unsigned long flags;
> +		u32 ctrl;
> +
> +		netdev_err(bp->dev, "RX queue corruption: reset it\n");
> +
> +		spin_lock_irqsave(&bp->lock, flags);
> +
> +		ctrl = macb_readl(bp, NCR);
> +		macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
> +
> +		macb_init_rx_ring(bp);
> +		macb_writel(bp, RBQP, bp->rx_ring_dma);
> +
> +		macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
> +
> +		spin_unlock_irqrestore(&bp->lock, flags);
> +		return received;
> +	}
> +
>  	if (first_frag != -1)
>  		bp->rx_tail = first_frag;
>  	else
> @@ -1523,15 +1569,8 @@ static void gem_init_rings(struct macb *bp)
>  static void macb_init_rings(struct macb *bp)
>  {
>  	int i;
> -	dma_addr_t addr;
>  
> -	addr = bp->rx_buffers_dma;
> -	for (i = 0; i < RX_RING_SIZE; i++) {
> -		bp->rx_ring[i].addr = addr;
> -		bp->rx_ring[i].ctrl = 0;
> -		addr += bp->rx_buffer_size;
> -	}
> -	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
> +	macb_init_rx_ring(bp);
>  
>  	for (i = 0; i < TX_RING_SIZE; i++) {
>  		bp->queues[0].tx_ring[i].addr = 0;
>
Sergei Shtylyov March 24, 2016, 6:19 p.m. UTC | #3
Hello.

On 03/24/2016 05:37 PM, Cyrille Pitchen wrote:

> This patch removes two BUG_ON() used to notify about RX queue corruptions
> on macb (not gem) hardware without actually handling the error.
>
> The new code skips corrupted frames but still processes faultless frames.
> Then it resets the RX queue before restarting the reception from a clean
> state.
>
> This patch is a rework of an older patch proposed by Neil Armstrong:
> http://patchwork.ozlabs.org/patch/371525/
>
> Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com>
> ---
>   drivers/net/ethernet/cadence/macb.c | 59 ++++++++++++++++++++++++++++++-------
>   1 file changed, 49 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
> index 6619178ed77b..39447a337149 100644
> --- a/drivers/net/ethernet/cadence/macb.c
> +++ b/drivers/net/ethernet/cadence/macb.c
[...]
> @@ -945,11 +948,26 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
>   	return 0;
>   }
>
> +static inline void macb_init_rx_ring(struct macb *bp)
> +{
> +	int i;
> +	dma_addr_t addr;

    DaveM prefers that longer declarations precede the shorter.

[...]
>   static int macb_rx(struct macb *bp, int budget)
>   {
>   	int received = 0;
>   	unsigned int tail;
>   	int first_frag = -1;
> +	int reset_rx_queue = 0;

    *bool*, please.

[...]

MBR, Sergei
diff mbox

Patch

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 6619178ed77b..39447a337149 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -917,7 +917,10 @@  static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 		unsigned int frag_len = bp->rx_buffer_size;
 
 		if (offset + frag_len > len) {
-			BUG_ON(frag != last_frag);
+			if (unlikely(frag != last_frag)) {
+				dev_kfree_skb_any(skb);
+				return -1;
+			}
 			frag_len = len - offset;
 		}
 		skb_copy_to_linear_data_offset(skb, offset,
@@ -945,11 +948,26 @@  static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	return 0;
 }
 
+static inline void macb_init_rx_ring(struct macb *bp)
+{
+	int i;
+	dma_addr_t addr;
+
+	addr = bp->rx_buffers_dma;
+	for (i = 0; i < RX_RING_SIZE; i++) {
+		bp->rx_ring[i].addr = addr;
+		bp->rx_ring[i].ctrl = 0;
+		addr += bp->rx_buffer_size;
+	}
+	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
+}
+
 static int macb_rx(struct macb *bp, int budget)
 {
 	int received = 0;
 	unsigned int tail;
 	int first_frag = -1;
+	int reset_rx_queue = 0;
 
 	for (tail = bp->rx_tail; budget > 0; tail++) {
 		struct macb_dma_desc *desc = macb_rx_desc(bp, tail);
@@ -972,10 +990,18 @@  static int macb_rx(struct macb *bp, int budget)
 
 		if (ctrl & MACB_BIT(RX_EOF)) {
 			int dropped;
-			BUG_ON(first_frag == -1);
+
+			if (unlikely(first_frag == -1)) {
+				reset_rx_queue = 1;
+				continue;
+			}
 
 			dropped = macb_rx_frame(bp, first_frag, tail);
 			first_frag = -1;
+			if (unlikely(dropped < 0)) {
+				reset_rx_queue = 1;
+				continue;
+			}
 			if (!dropped) {
 				received++;
 				budget--;
@@ -983,6 +1009,26 @@  static int macb_rx(struct macb *bp, int budget)
 		}
 	}
 
+	if (unlikely(reset_rx_queue)) {
+		unsigned long flags;
+		u32 ctrl;
+
+		netdev_err(bp->dev, "RX queue corruption: reset it\n");
+
+		spin_lock_irqsave(&bp->lock, flags);
+
+		ctrl = macb_readl(bp, NCR);
+		macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
+
+		macb_init_rx_ring(bp);
+		macb_writel(bp, RBQP, bp->rx_ring_dma);
+
+		macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
+
+		spin_unlock_irqrestore(&bp->lock, flags);
+		return received;
+	}
+
 	if (first_frag != -1)
 		bp->rx_tail = first_frag;
 	else
@@ -1523,15 +1569,8 @@  static void gem_init_rings(struct macb *bp)
 static void macb_init_rings(struct macb *bp)
 {
 	int i;
-	dma_addr_t addr;
 
-	addr = bp->rx_buffers_dma;
-	for (i = 0; i < RX_RING_SIZE; i++) {
-		bp->rx_ring[i].addr = addr;
-		bp->rx_ring[i].ctrl = 0;
-		addr += bp->rx_buffer_size;
-	}
-	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
+	macb_init_rx_ring(bp);
 
 	for (i = 0; i < TX_RING_SIZE; i++) {
 		bp->queues[0].tx_ring[i].addr = 0;