diff mbox series

[V10,RESEND] EDAC: Add EDAC driver for loongson memory controller

Message ID 20241216013351.15432-1-zhaoqunqin@loongson.cn (mailing list archive)
State New
Headers show
Series [V10,RESEND] EDAC: Add EDAC driver for loongson memory controller | expand

Commit Message

Zhao Qunqin Dec. 16, 2024, 1:33 a.m. UTC
Add ECC support for Loongson SoC DDR controller. This
driver reports single bit errors (CE) only.

Only ACPI firmware is supported.

Signed-off-by: Zhao Qunqin <zhaoqunqin@loongson.cn>
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn>
---
Changes in v10:
	- Changed acpi_device_id to "LOON0010"

Changes in v9:
	- Still using readq() and included "linux/io-64-nonatomic-lo-hi.h"
	  to avoid the compiler's waring.
	- Used alpha-betical order when selecting symbol and including
	  header file.

Changes in v8:
	- Used readl() instead of readq()
	- Used acpi_device_id instead of of_device_id, then removed
	  dt-bindings

Changes in v7:
	- Fixed sparse's "incorrect type in assignment"
	- Cleaned up coding style

Changes in v6:
	- Changed the Kconfig name to CONFIG_EDAC_LOONGSON

Changes in v5:
	- Dropepd the loongson_ prefix from all static functions.
	- Aligned function arguments on the opening brace.
	- Dropepd useless comments and useless wrapper. Dropped side
	  comments.
	- Reordered variable declarations.

Changes in v4:
	- None

Changes in v3:
	- Addressed review comments raised by Krzysztof and Huacai

Changes in v2:
	- Addressed review comments raised by Krzysztof

 MAINTAINERS                  |   6 ++
 arch/loongarch/Kconfig       |   1 +
 drivers/edac/Kconfig         |   8 ++
 drivers/edac/Makefile        |   1 +
 drivers/edac/loongson_edac.c | 156 +++++++++++++++++++++++++++++++++++
 5 files changed, 172 insertions(+)
 create mode 100644 drivers/edac/loongson_edac.c


base-commit: 584e09743d2f44905290b0dbf3215064d2a1888c

Comments

Borislav Petkov Dec. 16, 2024, 11:55 a.m. UTC | #1
On Mon, Dec 16, 2024 at 09:33:51AM +0800, Zhao Qunqin wrote:
> +LOONGSON EDAC DRIVER
> +M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
> +L:	linux-edac@vger.kernel.org
> +S:	Maintained
> +F:	drivers/edac/loongson_edac.c

If you add yourself as a maintainer, I'd expect you to review and/or ack
patches for your driver so that I can pick them up.

> +config EDAC_LOONGSON
> +	tristate "Loongson Memory Controller"
> +	depends on (LOONGARCH && ACPI) || COMPILE_TEST

The COMPILE_TEST thing would mean that you'll make sure this driver always
builds on other arches and it doesn't break randconfig builds of people. If it
happens too often and no one is fixing it, I'll remove the COMPILE_TEST.

> +	help
> +	  Support for error detection and correction on the Loongson
> +	  family memory controller. This driver reports single bit
> +	  errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000
> +	  are compatible.
>  
>  endif # EDAC

> +static int read_ecc(struct mem_ctl_info *mci)
> +{
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +	u64 ecc;
> +	int cs;
> +
> +	if (!pvt->ecc_base)

When can that even happen? You're initializing it properly in pvt_init().

> +		return pvt->last_ce_count;
> +
> +	ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG);
> +	/* cs0 -- cs3 */
> +	cs = ecc & 0xff;
> +	cs += (ecc >> 8) & 0xff;
> +	cs += (ecc >> 16) & 0xff;
> +	cs += (ecc >> 24) & 0xff;
> +
> +	return cs;
> +}
> +
> +static void edac_check(struct mem_ctl_info *mci)
> +{
> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
> +	int new, add;
> +
> +	new = read_ecc(mci);
> +	add = new - pvt->last_ce_count;
> +	pvt->last_ce_count = new;

That last_ce_count is just silly. Kill it.

> +	if (add <= 0)
> +		return;
> +
> +	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
> +			     0, 0, 0, 0, 0, -1, "error", "");
> +	edac_mc_printk(mci, KERN_INFO, "add: %d", add);

"add"? What are you adding? Error count?

No need.

> +static int edac_probe(struct platform_device *pdev)
> +{
> +	struct edac_mc_layer layers[2];
> +	struct mem_ctl_info *mci;
> +	void __iomem *vbase;
> +	int ret;
> +
> +	vbase = devm_platform_ioremap_resource(pdev, 0);
> +	if (IS_ERR(vbase))
> +		return PTR_ERR(vbase);
> +
> +	/* allocate a new MC control structure */
> +	layers[0].type = EDAC_MC_LAYER_CHANNEL;
> +	layers[0].size = 1;
> +	layers[0].is_virt_csrow = false;
> +	layers[1].type = EDAC_MC_LAYER_SLOT;
> +	layers[1].size = 1;
> +	layers[1].is_virt_csrow = true;
> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
> +			    sizeof(struct loongson_edac_pvt));
> +	if (mci == NULL)
> +		return -ENOMEM;
> +
> +	mci->mc_idx = edac_device_alloc_index();
> +	mci->mtype_cap = MEM_FLAG_RDDR4;
> +	mci->edac_ctl_cap = EDAC_FLAG_NONE;
> +	mci->edac_cap = EDAC_FLAG_NONE;
> +	mci->mod_name = "loongson_edac.c";
> +	mci->ctl_name = "loongson_edac_ctl";
> +	mci->dev_name = "loongson_edac_dev";
> +	mci->ctl_page_to_phys = NULL;
> +	mci->pdev = &pdev->dev;
> +	mci->error_desc.grain = 8;
> +	/* Set the function pointer to an actual operation function */

Remove that obvious comment.
Zhao Qunqin Dec. 17, 2024, 2:25 a.m. UTC | #2
在 2024/12/16 下午7:55, Borislav Petkov 写道:
> On Mon, Dec 16, 2024 at 09:33:51AM +0800, Zhao Qunqin wrote:
>> +LOONGSON EDAC DRIVER
>> +M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
>> +L:	linux-edac@vger.kernel.org
>> +S:	Maintained
>> +F:	drivers/edac/loongson_edac.c
> If you add yourself as a maintainer, I'd expect you to review and/or ack
> patches for your driver so that I can pick them up.
OK. I can review the patches for this driver.
>
>> +config EDAC_LOONGSON
>> +	tristate "Loongson Memory Controller"
>> +	depends on (LOONGARCH && ACPI) || COMPILE_TEST
> The COMPILE_TEST thing would mean that you'll make sure this driver always
> builds on other arches and it doesn't break randconfig builds of people. If it
> happens too often and no one is fixing it, I'll remove the COMPILE_TEST.
There have indeed been build errors on other arches. So i will remove it.
>> +	help
>> +	  Support for error detection and correction on the Loongson
>> +	  family memory controller. This driver reports single bit
>> +	  errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000
>> +	  are compatible.
>>   
>>   endif # EDAC
>> +static int read_ecc(struct mem_ctl_info *mci)
>> +{
>> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
>> +	u64 ecc;
>> +	int cs;
>> +
>> +	if (!pvt->ecc_base)
> When can that even happen? You're initializing it properly in pvt_init().
Will remove this check.
>> +		return pvt->last_ce_count;
>> +
>> +	ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG);
>> +	/* cs0 -- cs3 */
>> +	cs = ecc & 0xff;
>> +	cs += (ecc >> 8) & 0xff;
>> +	cs += (ecc >> 16) & 0xff;
>> +	cs += (ecc >> 24) & 0xff;
>> +
>> +	return cs;
>> +}
>> +
>> +static void edac_check(struct mem_ctl_info *mci)
>> +{
>> +	struct loongson_edac_pvt *pvt = mci->pvt_info;
>> +	int new, add;
>> +
>> +	new = read_ecc(mci);
>> +	add = new - pvt->last_ce_count;
>> +	pvt->last_ce_count = new;
> That last_ce_count is just silly. Kill it.

Then  I  can't calculate the error count added since the last check,  cause

what record in Loongson's ECC register is  the error count  from reset of

the memory controller.

>> +	if (add <= 0)
>> +		return;
>> +
>> +	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
>> +			     0, 0, 0, 0, 0, -1, "error", "");
>> +	edac_mc_printk(mci, KERN_INFO, "add: %d", add);
> "add"? What are you adding? Error count?
>
> No need.
Will remove this printk.
>> +static int edac_probe(struct platform_device *pdev)
>> +{
>> +	struct edac_mc_layer layers[2];
>> +	struct mem_ctl_info *mci;
>> +	void __iomem *vbase;
>> +	int ret;
>> +
>> +	vbase = devm_platform_ioremap_resource(pdev, 0);
>> +	if (IS_ERR(vbase))
>> +		return PTR_ERR(vbase);
>> +
>> +	/* allocate a new MC control structure */
>> +	layers[0].type = EDAC_MC_LAYER_CHANNEL;
>> +	layers[0].size = 1;
>> +	layers[0].is_virt_csrow = false;
>> +	layers[1].type = EDAC_MC_LAYER_SLOT;
>> +	layers[1].size = 1;
>> +	layers[1].is_virt_csrow = true;
>> +	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
>> +			    sizeof(struct loongson_edac_pvt));
>> +	if (mci == NULL)
>> +		return -ENOMEM;
>> +
>> +	mci->mc_idx = edac_device_alloc_index();
>> +	mci->mtype_cap = MEM_FLAG_RDDR4;
>> +	mci->edac_ctl_cap = EDAC_FLAG_NONE;
>> +	mci->edac_cap = EDAC_FLAG_NONE;
>> +	mci->mod_name = "loongson_edac.c";
>> +	mci->ctl_name = "loongson_edac_ctl";
>> +	mci->dev_name = "loongson_edac_dev";
>> +	mci->ctl_page_to_phys = NULL;
>> +	mci->pdev = &pdev->dev;
>> +	mci->error_desc.grain = 8;
>> +	/* Set the function pointer to an actual operation function */
> Remove that obvious comment.

OK

Thanks for taking the time to review.

Best regards

>
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index e9659a5a7..b36a45051 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13397,6 +13397,12 @@  S:	Maintained
 F:	Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml
 F:	drivers/thermal/loongson2_thermal.c
 
+LOONGSON EDAC DRIVER
+M:	Zhao Qunqin <zhaoqunqin@loongson.cn>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/loongson_edac.c
+
 LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI)
 M:	Sathya Prakash <sathya.prakash@broadcom.com>
 M:	Sreekanth Reddy <sreekanth.reddy@broadcom.com>
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index bb35c34f8..33052526b 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -79,6 +79,7 @@  config LOONGARCH
 	select BUILDTIME_TABLE_SORT
 	select COMMON_CLK
 	select CPU_PM
+	select EDAC_SUPPORT
 	select EFI
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CMOS_UPDATE
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 81af6c344..433c33785 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -564,5 +564,13 @@  config EDAC_VERSAL
 	  Support injecting both correctable and uncorrectable errors
 	  for debugging purposes.
 
+config EDAC_LOONGSON
+	tristate "Loongson Memory Controller"
+	depends on (LOONGARCH && ACPI) || COMPILE_TEST
+	help
+	  Support for error detection and correction on the Loongson
+	  family memory controller. This driver reports single bit
+	  errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000
+	  are compatible.
 
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index faf310eec..f8bdbc895 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -88,3 +88,4 @@  obj-$(CONFIG_EDAC_DMC520)		+= dmc520_edac.o
 obj-$(CONFIG_EDAC_NPCM)			+= npcm_edac.o
 obj-$(CONFIG_EDAC_ZYNQMP)		+= zynqmp_edac.o
 obj-$(CONFIG_EDAC_VERSAL)		+= versal_edac.o
+obj-$(CONFIG_EDAC_LOONGSON)		+= loongson_edac.o
diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c
new file mode 100644
index 000000000..29607972f
--- /dev/null
+++ b/drivers/edac/loongson_edac.c
@@ -0,0 +1,156 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Loongson Technology Corporation Limited.
+ */
+
+#include <linux/acpi.h>
+#include <linux/edac.h>
+#include <linux/init.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include "edac_module.h"
+
+#define ECC_CS_COUNT_REG	0x18
+
+struct loongson_edac_pvt {
+	void __iomem *ecc_base;
+	int last_ce_count;
+};
+
+static int read_ecc(struct mem_ctl_info *mci)
+{
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+	u64 ecc;
+	int cs;
+
+	if (!pvt->ecc_base)
+		return pvt->last_ce_count;
+
+	ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG);
+	/* cs0 -- cs3 */
+	cs = ecc & 0xff;
+	cs += (ecc >> 8) & 0xff;
+	cs += (ecc >> 16) & 0xff;
+	cs += (ecc >> 24) & 0xff;
+
+	return cs;
+}
+
+static void edac_check(struct mem_ctl_info *mci)
+{
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+	int new, add;
+
+	new = read_ecc(mci);
+	add = new - pvt->last_ce_count;
+	pvt->last_ce_count = new;
+	if (add <= 0)
+		return;
+
+	edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add,
+			     0, 0, 0, 0, 0, -1, "error", "");
+	edac_mc_printk(mci, KERN_INFO, "add: %d", add);
+}
+
+static void dimm_config_init(struct mem_ctl_info *mci)
+{
+	struct dimm_info *dimm;
+	u32 size, npages;
+
+	/* size not used */
+	size = -1;
+	npages = MiB_TO_PAGES(size);
+
+	dimm = edac_get_dimm(mci, 0, 0, 0);
+	dimm->nr_pages = npages;
+	snprintf(dimm->label, sizeof(dimm->label),
+		 "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0);
+	dimm->grain = 8;
+}
+
+static void pvt_init(struct mem_ctl_info *mci, void __iomem *vbase)
+{
+	struct loongson_edac_pvt *pvt = mci->pvt_info;
+
+	pvt->ecc_base = vbase;
+	pvt->last_ce_count = read_ecc(mci);
+}
+
+static int edac_probe(struct platform_device *pdev)
+{
+	struct edac_mc_layer layers[2];
+	struct mem_ctl_info *mci;
+	void __iomem *vbase;
+	int ret;
+
+	vbase = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(vbase))
+		return PTR_ERR(vbase);
+
+	/* allocate a new MC control structure */
+	layers[0].type = EDAC_MC_LAYER_CHANNEL;
+	layers[0].size = 1;
+	layers[0].is_virt_csrow = false;
+	layers[1].type = EDAC_MC_LAYER_SLOT;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = true;
+	mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers,
+			    sizeof(struct loongson_edac_pvt));
+	if (mci == NULL)
+		return -ENOMEM;
+
+	mci->mc_idx = edac_device_alloc_index();
+	mci->mtype_cap = MEM_FLAG_RDDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "loongson_edac.c";
+	mci->ctl_name = "loongson_edac_ctl";
+	mci->dev_name = "loongson_edac_dev";
+	mci->ctl_page_to_phys = NULL;
+	mci->pdev = &pdev->dev;
+	mci->error_desc.grain = 8;
+	/* Set the function pointer to an actual operation function */
+	mci->edac_check = edac_check;
+
+	pvt_init(mci, vbase);
+	dimm_config_init(mci);
+
+	ret = edac_mc_add_mc(mci);
+	if (ret) {
+		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+		edac_mc_free(mci);
+		return ret;
+	}
+	edac_op_state = EDAC_OPSTATE_POLL;
+
+	return 0;
+}
+
+static void edac_remove(struct platform_device *pdev)
+{
+	struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev);
+
+	if (mci)
+		edac_mc_free(mci);
+}
+
+static const struct acpi_device_id loongson_edac_acpi_match[] = {
+	{"LOON0010", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, loongson_edac_acpi_match);
+
+static struct platform_driver loongson_edac_driver = {
+	.probe		= edac_probe,
+	.remove		= edac_remove,
+	.driver		= {
+		.name	= "loongson-mc-edac",
+		.acpi_match_table = loongson_edac_acpi_match,
+	},
+};
+module_platform_driver(loongson_edac_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>");
+MODULE_DESCRIPTION("EDAC driver for loongson memory controller");