diff mbox series

[2/2] edac: synopsys: Fix the issue in reporting of the error count

Message ID 20210818072315.15149-2-shubhrajyoti.datta@xilinx.com (mailing list archive)
State New, archived
Headers show
Series [1/2] edac: synopsys: Fix the wrong value assignment for edac_mode | expand

Commit Message

Shubhrajyoti Datta Aug. 18, 2021, 7:23 a.m. UTC
Currently we are reading the error count from status register which
is not correct, this patch fixes the issue by reading the count from
error count register(ERRCNT). Currently we are not reporting the
errors cumulatively.
Also send the cumulative errors to the edac_mc_handle_error.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
---
 drivers/edac/synopsys_edac.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

Comments

Borislav Petkov Sept. 16, 2021, 9:47 a.m. UTC | #1
On Wed, Aug 18, 2021 at 12:53:15PM +0530, Shubhrajyoti Datta wrote:
> Currently we are reading the error count from status register which

Please use passive voice in your commit message: no "we" or "I", etc,
and describe your changes in imperative mood.

Also, pls read section "2) Describe your changes" in
Documentation/process/submitting-patches.rst for more details.

Bottom line is: personal pronouns are ambiguous in text, especially with
so many parties/companies/etc developing the kernel so let's avoid them
please.

> is not correct, this patch fixes the issue by reading the count from

Avoid having "This patch" or "This commit" in the commit message. It is
tautologically useless.

Also, do

$ git grep 'This patch' Documentation/process

for more details.

> error count register(ERRCNT). Currently we are not reporting the
> errors cumulatively.
> Also send the cumulative errors to the edac_mc_handle_error.
> 
> Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
> ---
>  drivers/edac/synopsys_edac.c | 19 +++++++++++++------
>  1 file changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
> index 7d08627e738b..38c03bdc2393 100644
> --- a/drivers/edac/synopsys_edac.c
> +++ b/drivers/edac/synopsys_edac.c
> @@ -163,6 +163,11 @@
>  #define ECC_STAT_CECNT_SHIFT		8
>  #define ECC_STAT_BITNUM_MASK		0x7F
>  
> +/* ECC error count register definitions */
> +#define ECC_ERRCNT_UECNT_MASK		0xFFFF0000
> +#define ECC_ERRCNT_UECNT_SHIFT		16
> +#define ECC_ERRCNT_CECNT_MASK		0xFFFF
> +
>  /* DDR QOS Interrupt register definitions */
>  #define DDR_QOS_IRQ_STAT_OFST		0x20200
>  #define DDR_QOSUE_MASK			0x4
> @@ -418,14 +423,16 @@ static int zynqmp_get_error_info(struct synps_edac_priv *priv)
>  	base = priv->baseaddr;
>  	p = &priv->stat;
>  
> +	regval = readl(base + ECC_ERRCNT_OFST);
> +	p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
> +	p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
> +	if (!p->ce_cnt)
> +		goto ue_err;
> +
>  	regval = readl(base + ECC_STAT_OFST);
>  	if (!regval)
>  		return 1;
>  
> -	p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
> -	p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
> -	if (!p->ce_cnt)
> -		goto ue_err;
>  
>  	p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
>  

That change looks correct.

> @@ -491,7 +498,7 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
>  		}
>  
>  		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
> -				     p->ce_cnt, 0, 0, 0, 0, 0, -1,
> +				     priv->ce_cnt, 0, 0, 0, 0, 0, -1,
>  				     priv->message, "");
>  	}
>  
> @@ -509,7 +516,7 @@ static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
>  		}
>  
>  		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
> -				     p->ue_cnt, 0, 0, 0, 0, 0, -1,
> +				     priv->ue_cnt, 0, 0, 0, 0, 0, -1,
>  				     priv->message, "");
>  	}
>  

That one doesn't. AFIACT, handle_error() is supposed to deal with the
current errors logged which ->get_error_info() has done by putting the
counts into priv->stat which gets passed in.

The cumilative errors are dumped a little bit further down - grep for
"Total error count". Those are debug statements, though.

Also, edac_mc_handle_error() gets the *current* error counts which got
logged by the current ECC interrupt - not the cumulative!

HTH.
diff mbox series

Patch

diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 7d08627e738b..38c03bdc2393 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -163,6 +163,11 @@ 
 #define ECC_STAT_CECNT_SHIFT		8
 #define ECC_STAT_BITNUM_MASK		0x7F
 
+/* ECC error count register definitions */
+#define ECC_ERRCNT_UECNT_MASK		0xFFFF0000
+#define ECC_ERRCNT_UECNT_SHIFT		16
+#define ECC_ERRCNT_CECNT_MASK		0xFFFF
+
 /* DDR QOS Interrupt register definitions */
 #define DDR_QOS_IRQ_STAT_OFST		0x20200
 #define DDR_QOSUE_MASK			0x4
@@ -418,14 +423,16 @@  static int zynqmp_get_error_info(struct synps_edac_priv *priv)
 	base = priv->baseaddr;
 	p = &priv->stat;
 
+	regval = readl(base + ECC_ERRCNT_OFST);
+	p->ce_cnt = regval & ECC_ERRCNT_CECNT_MASK;
+	p->ue_cnt = (regval & ECC_ERRCNT_UECNT_MASK) >> ECC_ERRCNT_UECNT_SHIFT;
+	if (!p->ce_cnt)
+		goto ue_err;
+
 	regval = readl(base + ECC_STAT_OFST);
 	if (!regval)
 		return 1;
 
-	p->ce_cnt = (regval & ECC_STAT_CECNT_MASK) >> ECC_STAT_CECNT_SHIFT;
-	p->ue_cnt = (regval & ECC_STAT_UECNT_MASK) >> ECC_STAT_UECNT_SHIFT;
-	if (!p->ce_cnt)
-		goto ue_err;
 
 	p->ceinfo.bitpos = (regval & ECC_STAT_BITNUM_MASK);
 
@@ -491,7 +498,7 @@  static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
 		}
 
 		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
-				     p->ce_cnt, 0, 0, 0, 0, 0, -1,
+				     priv->ce_cnt, 0, 0, 0, 0, 0, -1,
 				     priv->message, "");
 	}
 
@@ -509,7 +516,7 @@  static void handle_error(struct mem_ctl_info *mci, struct synps_ecc_status *p)
 		}
 
 		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
-				     p->ue_cnt, 0, 0, 0, 0, 0, -1,
+				     priv->ue_cnt, 0, 0, 0, 0, 0, -1,
 				     priv->message, "");
 	}