diff mbox series

[net] net: stmmac: xgmac: fix safety error descriptions

Message ID 20240123085037.939471-1-0x1207@gmail.com (mailing list archive)
State New, archived
Headers show
Series [net] net: stmmac: xgmac: fix safety error descriptions | expand

Commit Message

Furong Xu Jan. 23, 2024, 8:50 a.m. UTC
Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
XGMAC core") prints safety error descriptions when safety error assert,
but missed some special errors, and mixed correctable errors and
uncorrectable errors together.
This patch complete the error code list and print the type of errors.

Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
Signed-off-by: Furong Xu <0x1207@gmail.com>
---
 .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

Comments

Simon Horman Jan. 23, 2024, 7:50 p.m. UTC | #1
On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:
> Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> XGMAC core") prints safety error descriptions when safety error assert,
> but missed some special errors, and mixed correctable errors and
> uncorrectable errors together.
> This patch complete the error code list and print the type of errors.
> 
> Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> Signed-off-by: Furong Xu <0x1207@gmail.com>

I'm not entirely sure this is a fix rather than an enhancement.
But the code change itself looks good to me.

Reviewed-by: Simon Horman <horms@kernel.org>
Serge Semin Jan. 24, 2024, 2:25 p.m. UTC | #2
On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:
> Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> XGMAC core") prints safety error descriptions when safety error assert,
> but missed some special errors, and mixed correctable errors and
> uncorrectable errors together.
> This patch complete the error code list and print the type of errors.

The XGMAC ECC Safety code has likely been just copied from the DW GMAC
v5 (DW QoS Eth) part. So this change is partly relevant to that code too. I
can't confirm that the special errors support is relevant to the DW
QoS Eth too (it likely is though), so what about splitting this patch
up into two:
1. Elaborate the errors description for DW GMAC v5 and DW XGMAC.
2. Add new ECC safety errors support.
?

On the other hand if we were sure that both DW QoS Eth and XGMAC
safety features implementation match the ideal solution would be to
refactor out the common code into a dedicated module.

-Serge(y)

> 
> Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> Signed-off-by: Furong Xu <0x1207@gmail.com>
> ---
>  .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 36 +++++++++----------
>  1 file changed, 18 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> index eb48211d9b0e..ad812484059e 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> @@ -748,29 +748,29 @@ static void dwxgmac3_handle_mac_err(struct net_device *ndev,
>  }
>  
>  static const struct dwxgmac3_error_desc dwxgmac3_mtl_errors[32]= {
> -	{ true, "TXCES", "MTL TX Memory Error" },
> +	{ true, "TXCES", "MTL TX Memory Correctable Error" },
>  	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },
> -	{ true, "TXUES", "MTL TX Memory Error" },
> +	{ true, "TXUES", "MTL TX Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
> -	{ true, "RXCES", "MTL RX Memory Error" },
> +	{ true, "RXCES", "MTL RX Memory Correctable Error" },
>  	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },
> -	{ true, "RXUES", "MTL RX Memory Error" },
> +	{ true, "RXUES", "MTL RX Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
> -	{ true, "ECES", "MTL EST Memory Error" },
> +	{ true, "ECES", "MTL EST Memory Correctable Error" },
>  	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },
> -	{ true, "EUES", "MTL EST Memory Error" },
> +	{ true, "EUES", "MTL EST Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
> -	{ true, "RPCES", "MTL RX Parser Memory Error" },
> +	{ true, "RPCES", "MTL RX Parser Memory Correctable Error" },
>  	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },
> -	{ true, "RPUES", "MTL RX Parser Memory Error" },
> +	{ true, "RPUES", "MTL RX Parser Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
> -	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
> -	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
> -	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
> +	{ true, "SCES", "MTL SGF GCL Memory Correctable Error" },
> +	{ true, "SAMS", "MTL SGF GCL Memory Address Mismatch Error" },
> +	{ true, "SUES", "MTL SGF GCL Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
> -	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
> -	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
> -	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
> +	{ true, "RXFCES", "MTL RXF Memory Correctable Error" },
> +	{ true, "RXFAMS", "MTL RXF Memory Address Mismatch Error" },
> +	{ true, "RXFUES", "MTL RXF Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
> @@ -796,13 +796,13 @@ static void dwxgmac3_handle_mtl_err(struct net_device *ndev,
>  }
>  
>  static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
> -	{ true, "TCES", "DMA TSO Memory Error" },
> +	{ true, "TCES", "DMA TSO Memory Correctable Error" },
>  	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },
> -	{ true, "TUES", "DMA TSO Memory Error" },
> +	{ true, "TUES", "DMA TSO Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
> -	{ true, "DCES", "DMA DCACHE Memory Error" },
> +	{ true, "DCES", "DMA DCACHE Memory Correctable Error" },
>  	{ true, "DAMS", "DMA DCACHE Address Mismatch Error" },
> -	{ true, "DUES", "DMA DCACHE Memory Error" },
> +	{ true, "DUES", "DMA DCACHE Memory Uncorrectable Error" },
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
>  	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */
> -- 
> 2.34.1
> 
>
Furong Xu Jan. 25, 2024, 2:34 a.m. UTC | #3
On Wed, 24 Jan 2024 17:25:27 +0300
Serge Semin <fancer.lancer@gmail.com> wrote:

> On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:
> > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > XGMAC core") prints safety error descriptions when safety error assert,
> > but missed some special errors, and mixed correctable errors and
> > uncorrectable errors together.
> > This patch complete the error code list and print the type of errors.  
> 
> The XGMAC ECC Safety code has likely been just copied from the DW GMAC
> v5 (DW QoS Eth) part. So this change is partly relevant to that code too. I
> can't confirm that the special errors support is relevant to the DW
> QoS Eth too (it likely is though), so what about splitting this patch
> up into two:
> 1. Elaborate the errors description for DW GMAC v5 and DW XGMAC.
> 2. Add new ECC safety errors support.
> ?
> 
> On the other hand if we were sure that both DW QoS Eth and XGMAC
> safety features implementation match the ideal solution would be to
> refactor out the common code into a dedicated module.
> 
> -Serge(y)
> 

Checked XGMAC Version 3.20a and DW QoS Eth Version 5.20a, the safety error
code definitions are not identical at all, they do have some differences,
about more than 20 bits of status register are different.
I think we should just leave them in individual implementations.

> > 
> > Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> > Signed-off-by: Furong Xu <0x1207@gmail.com>
> > ---
> >  .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 36 +++++++++----------
> >  1 file changed, 18 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > index eb48211d9b0e..ad812484059e 100644
> > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > @@ -748,29 +748,29 @@ static void dwxgmac3_handle_mac_err(struct net_device *ndev,
> >  }
> >  
> >  static const struct dwxgmac3_error_desc dwxgmac3_mtl_errors[32]= {
> > -	{ true, "TXCES", "MTL TX Memory Error" },
> > +	{ true, "TXCES", "MTL TX Memory Correctable Error" },
> >  	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },
> > -	{ true, "TXUES", "MTL TX Memory Error" },
> > +	{ true, "TXUES", "MTL TX Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
> > -	{ true, "RXCES", "MTL RX Memory Error" },
> > +	{ true, "RXCES", "MTL RX Memory Correctable Error" },
> >  	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },
> > -	{ true, "RXUES", "MTL RX Memory Error" },
> > +	{ true, "RXUES", "MTL RX Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
> > -	{ true, "ECES", "MTL EST Memory Error" },
> > +	{ true, "ECES", "MTL EST Memory Correctable Error" },
> >  	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },
> > -	{ true, "EUES", "MTL EST Memory Error" },
> > +	{ true, "EUES", "MTL EST Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
> > -	{ true, "RPCES", "MTL RX Parser Memory Error" },
> > +	{ true, "RPCES", "MTL RX Parser Memory Correctable Error" },
> >  	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },
> > -	{ true, "RPUES", "MTL RX Parser Memory Error" },
> > +	{ true, "RPUES", "MTL RX Parser Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
> > -	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
> > -	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
> > -	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
> > +	{ true, "SCES", "MTL SGF GCL Memory Correctable Error" },
> > +	{ true, "SAMS", "MTL SGF GCL Memory Address Mismatch Error" },
> > +	{ true, "SUES", "MTL SGF GCL Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
> > -	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
> > -	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
> > -	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
> > +	{ true, "RXFCES", "MTL RXF Memory Correctable Error" },
> > +	{ true, "RXFAMS", "MTL RXF Memory Address Mismatch Error" },
> > +	{ true, "RXFUES", "MTL RXF Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
> > @@ -796,13 +796,13 @@ static void dwxgmac3_handle_mtl_err(struct net_device *ndev,
> >  }
> >  
> >  static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
> > -	{ true, "TCES", "DMA TSO Memory Error" },
> > +	{ true, "TCES", "DMA TSO Memory Correctable Error" },
> >  	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },
> > -	{ true, "TUES", "DMA TSO Memory Error" },
> > +	{ true, "TUES", "DMA TSO Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
> > -	{ true, "DCES", "DMA DCACHE Memory Error" },
> > +	{ true, "DCES", "DMA DCACHE Memory Correctable Error" },
> >  	{ true, "DAMS", "DMA DCACHE Address Mismatch Error" },
> > -	{ true, "DUES", "DMA DCACHE Memory Error" },
> > +	{ true, "DUES", "DMA DCACHE Memory Uncorrectable Error" },
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
> >  	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */
> > -- 
> > 2.34.1
> > 
> >
Paolo Abeni Jan. 25, 2024, 10:09 a.m. UTC | #4
On Thu, 2024-01-25 at 10:34 +0800, Furong Xu wrote:
> On Wed, 24 Jan 2024 17:25:27 +0300
> Serge Semin <fancer.lancer@gmail.com> wrote:
> 
> > On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:
> > > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > > XGMAC core") prints safety error descriptions when safety error assert,
> > > but missed some special errors, and mixed correctable errors and
> > > uncorrectable errors together.
> > > This patch complete the error code list and print the type of errors.  
> > 
> > The XGMAC ECC Safety code has likely been just copied from the DW GMAC
> > v5 (DW QoS Eth) part. So this change is partly relevant to that code too. I
> > can't confirm that the special errors support is relevant to the DW
> > QoS Eth too (it likely is though), so what about splitting this patch
> > up into two:
> > 1. Elaborate the errors description for DW GMAC v5 and DW XGMAC.
> > 2. Add new ECC safety errors support.
> > ?
> > 
> > On the other hand if we were sure that both DW QoS Eth and XGMAC
> > safety features implementation match the ideal solution would be to
> > refactor out the common code into a dedicated module.
> > 
> > -Serge(y)
> > 
> 
> Checked XGMAC Version 3.20a and DW QoS Eth Version 5.20a, the safety error
> code definitions are not identical at all, they do have some differences,
> about more than 20 bits of status register are different.
> I think we should just leave them in individual implementations.

@Serge: given the above, would you still advice for splitting this
patch into 2?

Thanks,

Paolo
Serge Semin Jan. 25, 2024, 11:01 a.m. UTC | #5
On Thu, Jan 25, 2024 at 11:09:06AM +0100, Paolo Abeni wrote:
> On Thu, 2024-01-25 at 10:34 +0800, Furong Xu wrote:
> > On Wed, 24 Jan 2024 17:25:27 +0300
> > Serge Semin <fancer.lancer@gmail.com> wrote:
> > 
> > > On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:
> > > > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > > > XGMAC core") prints safety error descriptions when safety error assert,
> > > > but missed some special errors, and mixed correctable errors and
> > > > uncorrectable errors together.
> > > > This patch complete the error code list and print the type of errors.  
> > > 
> > > The XGMAC ECC Safety code has likely been just copied from the DW GMAC
> > > v5 (DW QoS Eth) part. So this change is partly relevant to that code too. I
> > > can't confirm that the special errors support is relevant to the DW
> > > QoS Eth too (it likely is though), so what about splitting this patch
> > > up into two:
> > > 1. Elaborate the errors description for DW GMAC v5 and DW XGMAC.
> > > 2. Add new ECC safety errors support.
> > > ?
> > > 
> > > On the other hand if we were sure that both DW QoS Eth and XGMAC
> > > safety features implementation match the ideal solution would be to
> > > refactor out the common code into a dedicated module.
> > > 
> > > -Serge(y)
> > > 
> > 
> > Checked XGMAC Version 3.20a and DW QoS Eth Version 5.20a, the safety error
> > code definitions are not identical at all, they do have some differences,
> > about more than 20 bits of status register are different.
> > I think we should just leave them in individual implementations.
> 

> @Serge: given the above, would you still advice for splitting this
> patch into 2?

Preliminary I would still in insist on splitting up. I'll double check
the patch and the Safety feature implementations in both devices and
give more detailed response to Furong in an hour or so.

-Serge(y)

> 
> Thanks,
> 
> Paolo
>
Serge Semin Jan. 25, 2024, 1:48 p.m. UTC | #6
On Thu, Jan 25, 2024 at 10:34:54AM +0800, Furong Xu wrote:
> On Wed, 24 Jan 2024 17:25:27 +0300
> Serge Semin <fancer.lancer@gmail.com> wrote:
> 
> > On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:
> > > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > > XGMAC core") prints safety error descriptions when safety error assert,
> > > but missed some special errors, and mixed correctable errors and
> > > uncorrectable errors together.
> > > This patch complete the error code list and print the type of errors.  
> > 
> > The XGMAC ECC Safety code has likely been just copied from the DW GMAC
> > v5 (DW QoS Eth) part. So this change is partly relevant to that code too. I
> > can't confirm that the special errors support is relevant to the DW
> > QoS Eth too (it likely is though), so what about splitting this patch
> > up into two:
> > 1. Elaborate the errors description for DW GMAC v5 and DW XGMAC.
> > 2. Add new ECC safety errors support.
> > ?
> > 
> > On the other hand if we were sure that both DW QoS Eth and XGMAC
> > safety features implementation match the ideal solution would be to
> > refactor out the common code into a dedicated module.
> > 
> > -Serge(y)
> > 
> 

> Checked XGMAC Version 3.20a and DW QoS Eth Version 5.20a, the safety error
> code definitions are not identical at all, they do have some differences,
> about more than 20 bits of status register are different.
> I think we should just leave them in individual implementations.

For some reason you answered to the last part of my comment and
completely ignored the first part which was the main point of my
message.

Regarding the Safety Feature support implemented in QoS Eth and XGMAC
STMMAC modules. You were wrong in using the statement "at all". Except
the optional events enable/disable procedure introduced in the commit
5ac712dcdfef ("net: stmmac: enable platform specific safety
features"), there aren't many differences: at least the errors
handling and report are identical, MTL and DMA error flags match, even
MTL and DMA ECC/Safety IRQ flags match. The only difference is in the
MTL/MAC DPP (Data Parity Protection) part which can be easily factored
out based on the device ID should we attempt to refactor the safety
feature code. See the attached html-diff for more details of what
match and what is different.

Anyway I am not insisting on the refactoring. That was just a
proposal, a more preferred alternative to simultaneously patching two
parts of the drivers looking very much alike. Such refactoring would
improve the code maintainability. The main point of my comment was to
extend your patch for DW QoS Eth safety feature implementation too
since some of the changes you introduced were useful for it too, and
in splitting the patch up since your patch added new flags support
which was unrelated change.  Thus your patch would turned into the
two-patches patchset like this:
[Patch 1] would provide an elaborated errors description for both DW
QOS Eth (GMAC v5.x) and DW XGMAC.
[Patch 2] would introduce the new ECC safety errors support.

See my further comments about the respective changes.

> 
> > > 
> > > Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> > > Signed-off-by: Furong Xu <0x1207@gmail.com>
> > > ---
> > >  .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 36 +++++++++----------
> > >  1 file changed, 18 insertions(+), 18 deletions(-)
> > > 
> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > index eb48211d9b0e..ad812484059e 100644
> > > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > @@ -748,29 +748,29 @@ static void dwxgmac3_handle_mac_err(struct net_device *ndev,
> > >  }
> > >  
> > >  static const struct dwxgmac3_error_desc dwxgmac3_mtl_errors[32]= {

> > > -	{ true, "TXCES", "MTL TX Memory Error" },
> > > +	{ true, "TXCES", "MTL TX Memory Correctable Error" },

Applicable for both IP-cores
[Patch 1] +QoS, +XGMAC
please apply this change to dwmac5.c too.

> > >  	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },

> > > -	{ true, "TXUES", "MTL TX Memory Error" },
> > > +	{ true, "TXUES", "MTL TX Memory Uncorrectable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */

> > > -	{ true, "RXCES", "MTL RX Memory Error" },
> > > +	{ true, "RXCES", "MTL RX Memory Correctable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },

> > > -	{ true, "RXUES", "MTL RX Memory Error" },
> > > +	{ true, "RXUES", "MTL RX Memory Uncorrectable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */

> > > -	{ true, "ECES", "MTL EST Memory Error" },
> > > +	{ true, "ECES", "MTL EST Memory Correctable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },

> > > -	{ true, "EUES", "MTL EST Memory Error" },
> > > +	{ true, "EUES", "MTL EST Memory Uncorrectable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */

> > > -	{ true, "RPCES", "MTL RX Parser Memory Error" },
> > > +	{ true, "RPCES", "MTL RX Parser Memory Correctable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },

> > > -	{ true, "RPUES", "MTL RX Parser Memory Error" },
> > > +	{ true, "RPUES", "MTL RX Parser Memory Uncorrectable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */

> > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
> > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
> > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
> > > +	{ true, "SCES", "MTL SGF GCL Memory Correctable Error" },
> > > +	{ true, "SAMS", "MTL SGF GCL Memory Address Mismatch Error" },
> > > +	{ true, "SUES", "MTL SGF GCL Memory Uncorrectable Error" },
> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
> > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
> > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
> > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
> > > +	{ true, "RXFCES", "MTL RXF Memory Correctable Error" },
> > > +	{ true, "RXFAMS", "MTL RXF Memory Address Mismatch Error" },
> > > +	{ true, "RXFUES", "MTL RXF Memory Uncorrectable Error" },

This introduces the new flags support. Please move this change into a
separate patch (Patch 2):
[Patch 2] +XGMAC only.

My DW QoS Eth v5.10a databook doesn't have these flags defined. If
your 5.20a HW-manual have them described then please add them for DW
QoS Eth too.

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
> > > @@ -796,13 +796,13 @@ static void dwxgmac3_handle_mtl_err(struct net_device *ndev,
> > >  }
> > >  
> > >  static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {

> > > -	{ true, "TCES", "DMA TSO Memory Error" },
> > > +	{ true, "TCES", "DMA TSO Memory Correctable Error" },

Applicable for both IP-cores
[Patch 1] +QoS, +XGMAC
please apply this change to dwmac5.c too.

> > >  	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },

> > > -	{ true, "TUES", "DMA TSO Memory Error" },
> > > +	{ true, "TUES", "DMA TSO Memory Uncorrectable Error" },

[Patch 1] +QoS, +XGMAC
ditto

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */

> > > -	{ true, "DCES", "DMA DCACHE Memory Error" },
> > > +	{ true, "DCES", "DMA DCACHE Memory Correctable Error" },
> > >  	{ true, "DAMS", "DMA DCACHE Address Mismatch Error" },
> > > -	{ true, "DUES", "DMA DCACHE Memory Error" },
> > > +	{ true, "DUES", "DMA DCACHE Memory Uncorrectable Error" },

AFAICS applicable for XGMAC only
[Patch 1] +XGMAC only.
Once again, My DW QoS Eth v5.10a databook doesn't have these flags
defined. So if your DW QoS Eth 5.20a HW-manual do have them described
please add them for DW QoS Eth with the elaborated description in the
framework of the Patch 2.

-Serge(y)

> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
> > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */
> > > -- 
> > > 2.34.1
> > > 
> > >   
>
Furong Xu Jan. 26, 2024, 2:08 a.m. UTC | #7
On Thu, 25 Jan 2024 16:48:23 +0300
Serge Semin <fancer.lancer@gmail.com> wrote:

> On Thu, Jan 25, 2024 at 10:34:54AM +0800, Furong Xu wrote:
> > On Wed, 24 Jan 2024 17:25:27 +0300
> > Serge Semin <fancer.lancer@gmail.com> wrote:
> >   
> > > On Tue, Jan 23, 2024 at 04:50:37PM +0800, Furong Xu wrote:  
> > > > Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in
> > > > XGMAC core") prints safety error descriptions when safety error assert,
> > > > but missed some special errors, and mixed correctable errors and
> > > > uncorrectable errors together.
> > > > This patch complete the error code list and print the type of errors.    
> > > 
> > > The XGMAC ECC Safety code has likely been just copied from the DW GMAC
> > > v5 (DW QoS Eth) part. So this change is partly relevant to that code too. I
> > > can't confirm that the special errors support is relevant to the DW
> > > QoS Eth too (it likely is though), so what about splitting this patch
> > > up into two:
> > > 1. Elaborate the errors description for DW GMAC v5 and DW XGMAC.
> > > 2. Add new ECC safety errors support.
> > > ?
> > > 
> > > On the other hand if we were sure that both DW QoS Eth and XGMAC
> > > safety features implementation match the ideal solution would be to
> > > refactor out the common code into a dedicated module.
> > > 
> > > -Serge(y)
> > >   
> >   
> 
> > Checked XGMAC Version 3.20a and DW QoS Eth Version 5.20a, the safety error
> > code definitions are not identical at all, they do have some differences,
> > about more than 20 bits of status register are different.
> > I think we should just leave them in individual implementations.  
> 
> For some reason you answered to the last part of my comment and
> completely ignored the first part which was the main point of my
> message.
> 
> Regarding the Safety Feature support implemented in QoS Eth and XGMAC
> STMMAC modules. You were wrong in using the statement "at all". Except
> the optional events enable/disable procedure introduced in the commit
> 5ac712dcdfef ("net: stmmac: enable platform specific safety
> features"), there aren't many differences: at least the errors
> handling and report are identical, MTL and DMA error flags match, even
> MTL and DMA ECC/Safety IRQ flags match. The only difference is in the
> MTL/MAC DPP (Data Parity Protection) part which can be easily factored
> out based on the device ID should we attempt to refactor the safety
> feature code. See the attached html-diff for more details of what
> match and what is different.
> 
> Anyway I am not insisting on the refactoring. That was just a
> proposal, a more preferred alternative to simultaneously patching two
> parts of the drivers looking very much alike. Such refactoring would
> improve the code maintainability. The main point of my comment was to
> extend your patch for DW QoS Eth safety feature implementation too
> since some of the changes you introduced were useful for it too, and
> in splitting the patch up since your patch added new flags support
> which was unrelated change.  Thus your patch would turned into the
> two-patches patchset like this:
> [Patch 1] would provide an elaborated errors description for both DW
> QOS Eth (GMAC v5.x) and DW XGMAC.
> [Patch 2] would introduce the new ECC safety errors support.
> 
> See my further comments about the respective changes.
> 
> >   
> > > > 
> > > > Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core")
> > > > Signed-off-by: Furong Xu <0x1207@gmail.com>
> > > > ---
> > > >  .../ethernet/stmicro/stmmac/dwxgmac2_core.c   | 36 +++++++++----------
> > > >  1 file changed, 18 insertions(+), 18 deletions(-)
> > > > 
> > > > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > > index eb48211d9b0e..ad812484059e 100644
> > > > --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > > +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
> > > > @@ -748,29 +748,29 @@ static void dwxgmac3_handle_mac_err(struct net_device *ndev,
> > > >  }
> > > >  
> > > >  static const struct dwxgmac3_error_desc dwxgmac3_mtl_errors[32]= {  
> 
> > > > -	{ true, "TXCES", "MTL TX Memory Error" },
> > > > +	{ true, "TXCES", "MTL TX Memory Correctable Error" },  
> 
> Applicable for both IP-cores
> [Patch 1] +QoS, +XGMAC
> please apply this change to dwmac5.c too.
> 
> > > >  	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },  
> 
> > > > -	{ true, "TXUES", "MTL TX Memory Error" },
> > > > +	{ true, "TXUES", "MTL TX Memory Uncorrectable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */  
> 
> > > > -	{ true, "RXCES", "MTL RX Memory Error" },
> > > > +	{ true, "RXCES", "MTL RX Memory Correctable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },  
> 
> > > > -	{ true, "RXUES", "MTL RX Memory Error" },
> > > > +	{ true, "RXUES", "MTL RX Memory Uncorrectable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */  
> 
> > > > -	{ true, "ECES", "MTL EST Memory Error" },
> > > > +	{ true, "ECES", "MTL EST Memory Correctable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },  
> 
> > > > -	{ true, "EUES", "MTL EST Memory Error" },
> > > > +	{ true, "EUES", "MTL EST Memory Uncorrectable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */  
> 
> > > > -	{ true, "RPCES", "MTL RX Parser Memory Error" },
> > > > +	{ true, "RPCES", "MTL RX Parser Memory Correctable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },  
> 
> > > > -	{ true, "RPUES", "MTL RX Parser Memory Error" },
> > > > +	{ true, "RPUES", "MTL RX Parser Memory Uncorrectable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */  
> 
> > > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
> > > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
> > > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
> > > > +	{ true, "SCES", "MTL SGF GCL Memory Correctable Error" },
> > > > +	{ true, "SAMS", "MTL SGF GCL Memory Address Mismatch Error" },
> > > > +	{ true, "SUES", "MTL SGF GCL Memory Uncorrectable Error" },
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
> > > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
> > > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
> > > > -	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
> > > > +	{ true, "RXFCES", "MTL RXF Memory Correctable Error" },
> > > > +	{ true, "RXFAMS", "MTL RXF Memory Address Mismatch Error" },
> > > > +	{ true, "RXFUES", "MTL RXF Memory Uncorrectable Error" },  
> 
> This introduces the new flags support. Please move this change into a
> separate patch (Patch 2):
> [Patch 2] +XGMAC only.
> 
> My DW QoS Eth v5.10a databook doesn't have these flags defined. If
> your 5.20a HW-manual have them described then please add them for DW
> QoS Eth too.
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
> > > > @@ -796,13 +796,13 @@ static void dwxgmac3_handle_mtl_err(struct net_device *ndev,
> > > >  }
> > > >  
> > > >  static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {  
> 
> > > > -	{ true, "TCES", "DMA TSO Memory Error" },
> > > > +	{ true, "TCES", "DMA TSO Memory Correctable Error" },  
> 
> Applicable for both IP-cores
> [Patch 1] +QoS, +XGMAC
> please apply this change to dwmac5.c too.
> 
> > > >  	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },  
> 
> > > > -	{ true, "TUES", "DMA TSO Memory Error" },
> > > > +	{ true, "TUES", "DMA TSO Memory Uncorrectable Error" },  
> 
> [Patch 1] +QoS, +XGMAC
> ditto
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */  
> 
> > > > -	{ true, "DCES", "DMA DCACHE Memory Error" },
> > > > +	{ true, "DCES", "DMA DCACHE Memory Correctable Error" },
> > > >  	{ true, "DAMS", "DMA DCACHE Address Mismatch Error" },
> > > > -	{ true, "DUES", "DMA DCACHE Memory Error" },
> > > > +	{ true, "DUES", "DMA DCACHE Memory Uncorrectable Error" },  
> 
> AFAICS applicable for XGMAC only
> [Patch 1] +XGMAC only.
> Once again, My DW QoS Eth v5.10a databook doesn't have these flags
> defined. So if your DW QoS Eth 5.20a HW-manual do have them described
> please add them for DW QoS Eth with the elaborated description in the
> framework of the Patch 2.
> 
> -Serge(y)
> 
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
> > > >  	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */
> > > > -- 
> > > > 2.34.1
> > > > 
> > > >     
> >   

Hi Serge:
Thanks for your detailed explanation, new refactoring will be sent to net-next.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index eb48211d9b0e..ad812484059e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -748,29 +748,29 @@  static void dwxgmac3_handle_mac_err(struct net_device *ndev,
 }
 
 static const struct dwxgmac3_error_desc dwxgmac3_mtl_errors[32]= {
-	{ true, "TXCES", "MTL TX Memory Error" },
+	{ true, "TXCES", "MTL TX Memory Correctable Error" },
 	{ true, "TXAMS", "MTL TX Memory Address Mismatch Error" },
-	{ true, "TXUES", "MTL TX Memory Error" },
+	{ true, "TXUES", "MTL TX Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
-	{ true, "RXCES", "MTL RX Memory Error" },
+	{ true, "RXCES", "MTL RX Memory Correctable Error" },
 	{ true, "RXAMS", "MTL RX Memory Address Mismatch Error" },
-	{ true, "RXUES", "MTL RX Memory Error" },
+	{ true, "RXUES", "MTL RX Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
-	{ true, "ECES", "MTL EST Memory Error" },
+	{ true, "ECES", "MTL EST Memory Correctable Error" },
 	{ true, "EAMS", "MTL EST Memory Address Mismatch Error" },
-	{ true, "EUES", "MTL EST Memory Error" },
+	{ true, "EUES", "MTL EST Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 11 */
-	{ true, "RPCES", "MTL RX Parser Memory Error" },
+	{ true, "RPCES", "MTL RX Parser Memory Correctable Error" },
 	{ true, "RPAMS", "MTL RX Parser Memory Address Mismatch Error" },
-	{ true, "RPUES", "MTL RX Parser Memory Error" },
+	{ true, "RPUES", "MTL RX Parser Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 15 */
-	{ false, "UNKNOWN", "Unknown Error" }, /* 16 */
-	{ false, "UNKNOWN", "Unknown Error" }, /* 17 */
-	{ false, "UNKNOWN", "Unknown Error" }, /* 18 */
+	{ true, "SCES", "MTL SGF GCL Memory Correctable Error" },
+	{ true, "SAMS", "MTL SGF GCL Memory Address Mismatch Error" },
+	{ true, "SUES", "MTL SGF GCL Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 19 */
-	{ false, "UNKNOWN", "Unknown Error" }, /* 20 */
-	{ false, "UNKNOWN", "Unknown Error" }, /* 21 */
-	{ false, "UNKNOWN", "Unknown Error" }, /* 22 */
+	{ true, "RXFCES", "MTL RXF Memory Correctable Error" },
+	{ true, "RXFAMS", "MTL RXF Memory Address Mismatch Error" },
+	{ true, "RXFUES", "MTL RXF Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 23 */
 	{ false, "UNKNOWN", "Unknown Error" }, /* 24 */
 	{ false, "UNKNOWN", "Unknown Error" }, /* 25 */
@@ -796,13 +796,13 @@  static void dwxgmac3_handle_mtl_err(struct net_device *ndev,
 }
 
 static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= {
-	{ true, "TCES", "DMA TSO Memory Error" },
+	{ true, "TCES", "DMA TSO Memory Correctable Error" },
 	{ true, "TAMS", "DMA TSO Memory Address Mismatch Error" },
-	{ true, "TUES", "DMA TSO Memory Error" },
+	{ true, "TUES", "DMA TSO Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 3 */
-	{ true, "DCES", "DMA DCACHE Memory Error" },
+	{ true, "DCES", "DMA DCACHE Memory Correctable Error" },
 	{ true, "DAMS", "DMA DCACHE Address Mismatch Error" },
-	{ true, "DUES", "DMA DCACHE Memory Error" },
+	{ true, "DUES", "DMA DCACHE Memory Uncorrectable Error" },
 	{ false, "UNKNOWN", "Unknown Error" }, /* 7 */
 	{ false, "UNKNOWN", "Unknown Error" }, /* 8 */
 	{ false, "UNKNOWN", "Unknown Error" }, /* 9 */