Message ID | 20241216013351.15432-1-zhaoqunqin@loongson.cn (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [V10,RESEND] EDAC: Add EDAC driver for loongson memory controller | expand |
On Mon, Dec 16, 2024 at 09:33:51AM +0800, Zhao Qunqin wrote: > +LOONGSON EDAC DRIVER > +M: Zhao Qunqin <zhaoqunqin@loongson.cn> > +L: linux-edac@vger.kernel.org > +S: Maintained > +F: drivers/edac/loongson_edac.c If you add yourself as a maintainer, I'd expect you to review and/or ack patches for your driver so that I can pick them up. > +config EDAC_LOONGSON > + tristate "Loongson Memory Controller" > + depends on (LOONGARCH && ACPI) || COMPILE_TEST The COMPILE_TEST thing would mean that you'll make sure this driver always builds on other arches and it doesn't break randconfig builds of people. If it happens too often and no one is fixing it, I'll remove the COMPILE_TEST. > + help > + Support for error detection and correction on the Loongson > + family memory controller. This driver reports single bit > + errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000 > + are compatible. > > endif # EDAC > +static int read_ecc(struct mem_ctl_info *mci) > +{ > + struct loongson_edac_pvt *pvt = mci->pvt_info; > + u64 ecc; > + int cs; > + > + if (!pvt->ecc_base) When can that even happen? You're initializing it properly in pvt_init(). > + return pvt->last_ce_count; > + > + ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG); > + /* cs0 -- cs3 */ > + cs = ecc & 0xff; > + cs += (ecc >> 8) & 0xff; > + cs += (ecc >> 16) & 0xff; > + cs += (ecc >> 24) & 0xff; > + > + return cs; > +} > + > +static void edac_check(struct mem_ctl_info *mci) > +{ > + struct loongson_edac_pvt *pvt = mci->pvt_info; > + int new, add; > + > + new = read_ecc(mci); > + add = new - pvt->last_ce_count; > + pvt->last_ce_count = new; That last_ce_count is just silly. Kill it. > + if (add <= 0) > + return; > + > + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, > + 0, 0, 0, 0, 0, -1, "error", ""); > + edac_mc_printk(mci, KERN_INFO, "add: %d", add); "add"? What are you adding? Error count? No need. > +static int edac_probe(struct platform_device *pdev) > +{ > + struct edac_mc_layer layers[2]; > + struct mem_ctl_info *mci; > + void __iomem *vbase; > + int ret; > + > + vbase = devm_platform_ioremap_resource(pdev, 0); > + if (IS_ERR(vbase)) > + return PTR_ERR(vbase); > + > + /* allocate a new MC control structure */ > + layers[0].type = EDAC_MC_LAYER_CHANNEL; > + layers[0].size = 1; > + layers[0].is_virt_csrow = false; > + layers[1].type = EDAC_MC_LAYER_SLOT; > + layers[1].size = 1; > + layers[1].is_virt_csrow = true; > + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, > + sizeof(struct loongson_edac_pvt)); > + if (mci == NULL) > + return -ENOMEM; > + > + mci->mc_idx = edac_device_alloc_index(); > + mci->mtype_cap = MEM_FLAG_RDDR4; > + mci->edac_ctl_cap = EDAC_FLAG_NONE; > + mci->edac_cap = EDAC_FLAG_NONE; > + mci->mod_name = "loongson_edac.c"; > + mci->ctl_name = "loongson_edac_ctl"; > + mci->dev_name = "loongson_edac_dev"; > + mci->ctl_page_to_phys = NULL; > + mci->pdev = &pdev->dev; > + mci->error_desc.grain = 8; > + /* Set the function pointer to an actual operation function */ Remove that obvious comment.
在 2024/12/16 下午7:55, Borislav Petkov 写道: > On Mon, Dec 16, 2024 at 09:33:51AM +0800, Zhao Qunqin wrote: >> +LOONGSON EDAC DRIVER >> +M: Zhao Qunqin <zhaoqunqin@loongson.cn> >> +L: linux-edac@vger.kernel.org >> +S: Maintained >> +F: drivers/edac/loongson_edac.c > If you add yourself as a maintainer, I'd expect you to review and/or ack > patches for your driver so that I can pick them up. OK. I can review the patches for this driver. > >> +config EDAC_LOONGSON >> + tristate "Loongson Memory Controller" >> + depends on (LOONGARCH && ACPI) || COMPILE_TEST > The COMPILE_TEST thing would mean that you'll make sure this driver always > builds on other arches and it doesn't break randconfig builds of people. If it > happens too often and no one is fixing it, I'll remove the COMPILE_TEST. There have indeed been build errors on other arches. So i will remove it. >> + help >> + Support for error detection and correction on the Loongson >> + family memory controller. This driver reports single bit >> + errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000 >> + are compatible. >> >> endif # EDAC >> +static int read_ecc(struct mem_ctl_info *mci) >> +{ >> + struct loongson_edac_pvt *pvt = mci->pvt_info; >> + u64 ecc; >> + int cs; >> + >> + if (!pvt->ecc_base) > When can that even happen? You're initializing it properly in pvt_init(). Will remove this check. >> + return pvt->last_ce_count; >> + >> + ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG); >> + /* cs0 -- cs3 */ >> + cs = ecc & 0xff; >> + cs += (ecc >> 8) & 0xff; >> + cs += (ecc >> 16) & 0xff; >> + cs += (ecc >> 24) & 0xff; >> + >> + return cs; >> +} >> + >> +static void edac_check(struct mem_ctl_info *mci) >> +{ >> + struct loongson_edac_pvt *pvt = mci->pvt_info; >> + int new, add; >> + >> + new = read_ecc(mci); >> + add = new - pvt->last_ce_count; >> + pvt->last_ce_count = new; > That last_ce_count is just silly. Kill it. Then I can't calculate the error count added since the last check, cause what record in Loongson's ECC register is the error count from reset of the memory controller. >> + if (add <= 0) >> + return; >> + >> + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, >> + 0, 0, 0, 0, 0, -1, "error", ""); >> + edac_mc_printk(mci, KERN_INFO, "add: %d", add); > "add"? What are you adding? Error count? > > No need. Will remove this printk. >> +static int edac_probe(struct platform_device *pdev) >> +{ >> + struct edac_mc_layer layers[2]; >> + struct mem_ctl_info *mci; >> + void __iomem *vbase; >> + int ret; >> + >> + vbase = devm_platform_ioremap_resource(pdev, 0); >> + if (IS_ERR(vbase)) >> + return PTR_ERR(vbase); >> + >> + /* allocate a new MC control structure */ >> + layers[0].type = EDAC_MC_LAYER_CHANNEL; >> + layers[0].size = 1; >> + layers[0].is_virt_csrow = false; >> + layers[1].type = EDAC_MC_LAYER_SLOT; >> + layers[1].size = 1; >> + layers[1].is_virt_csrow = true; >> + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, >> + sizeof(struct loongson_edac_pvt)); >> + if (mci == NULL) >> + return -ENOMEM; >> + >> + mci->mc_idx = edac_device_alloc_index(); >> + mci->mtype_cap = MEM_FLAG_RDDR4; >> + mci->edac_ctl_cap = EDAC_FLAG_NONE; >> + mci->edac_cap = EDAC_FLAG_NONE; >> + mci->mod_name = "loongson_edac.c"; >> + mci->ctl_name = "loongson_edac_ctl"; >> + mci->dev_name = "loongson_edac_dev"; >> + mci->ctl_page_to_phys = NULL; >> + mci->pdev = &pdev->dev; >> + mci->error_desc.grain = 8; >> + /* Set the function pointer to an actual operation function */ > Remove that obvious comment. OK Thanks for taking the time to review. Best regards >
diff --git a/MAINTAINERS b/MAINTAINERS index e9659a5a7..b36a45051 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13397,6 +13397,12 @@ S: Maintained F: Documentation/devicetree/bindings/thermal/loongson,ls2k-thermal.yaml F: drivers/thermal/loongson2_thermal.c +LOONGSON EDAC DRIVER +M: Zhao Qunqin <zhaoqunqin@loongson.cn> +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/loongson_edac.c + LSILOGIC MPT FUSION DRIVERS (FC/SAS/SPI) M: Sathya Prakash <sathya.prakash@broadcom.com> M: Sreekanth Reddy <sreekanth.reddy@broadcom.com> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bb35c34f8..33052526b 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -79,6 +79,7 @@ config LOONGARCH select BUILDTIME_TABLE_SORT select COMMON_CLK select CPU_PM + select EDAC_SUPPORT select EFI select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 81af6c344..433c33785 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -564,5 +564,13 @@ config EDAC_VERSAL Support injecting both correctable and uncorrectable errors for debugging purposes. +config EDAC_LOONGSON + tristate "Loongson Memory Controller" + depends on (LOONGARCH && ACPI) || COMPILE_TEST + help + Support for error detection and correction on the Loongson + family memory controller. This driver reports single bit + errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000 + are compatible. endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index faf310eec..f8bdbc895 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -88,3 +88,4 @@ obj-$(CONFIG_EDAC_DMC520) += dmc520_edac.o obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o +obj-$(CONFIG_EDAC_LOONGSON) += loongson_edac.o diff --git a/drivers/edac/loongson_edac.c b/drivers/edac/loongson_edac.c new file mode 100644 index 000000000..29607972f --- /dev/null +++ b/drivers/edac/loongson_edac.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Loongson Technology Corporation Limited. + */ + +#include <linux/acpi.h> +#include <linux/edac.h> +#include <linux/init.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include "edac_module.h" + +#define ECC_CS_COUNT_REG 0x18 + +struct loongson_edac_pvt { + void __iomem *ecc_base; + int last_ce_count; +}; + +static int read_ecc(struct mem_ctl_info *mci) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + u64 ecc; + int cs; + + if (!pvt->ecc_base) + return pvt->last_ce_count; + + ecc = readq(pvt->ecc_base + ECC_CS_COUNT_REG); + /* cs0 -- cs3 */ + cs = ecc & 0xff; + cs += (ecc >> 8) & 0xff; + cs += (ecc >> 16) & 0xff; + cs += (ecc >> 24) & 0xff; + + return cs; +} + +static void edac_check(struct mem_ctl_info *mci) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + int new, add; + + new = read_ecc(mci); + add = new - pvt->last_ce_count; + pvt->last_ce_count = new; + if (add <= 0) + return; + + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, add, + 0, 0, 0, 0, 0, -1, "error", ""); + edac_mc_printk(mci, KERN_INFO, "add: %d", add); +} + +static void dimm_config_init(struct mem_ctl_info *mci) +{ + struct dimm_info *dimm; + u32 size, npages; + + /* size not used */ + size = -1; + npages = MiB_TO_PAGES(size); + + dimm = edac_get_dimm(mci, 0, 0, 0); + dimm->nr_pages = npages; + snprintf(dimm->label, sizeof(dimm->label), + "MC#%uChannel#%u_DIMM#%u", mci->mc_idx, 0, 0); + dimm->grain = 8; +} + +static void pvt_init(struct mem_ctl_info *mci, void __iomem *vbase) +{ + struct loongson_edac_pvt *pvt = mci->pvt_info; + + pvt->ecc_base = vbase; + pvt->last_ce_count = read_ecc(mci); +} + +static int edac_probe(struct platform_device *pdev) +{ + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + void __iomem *vbase; + int ret; + + vbase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(vbase)) + return PTR_ERR(vbase); + + /* allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = 1; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = 1; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, + sizeof(struct loongson_edac_pvt)); + if (mci == NULL) + return -ENOMEM; + + mci->mc_idx = edac_device_alloc_index(); + mci->mtype_cap = MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "loongson_edac.c"; + mci->ctl_name = "loongson_edac_ctl"; + mci->dev_name = "loongson_edac_dev"; + mci->ctl_page_to_phys = NULL; + mci->pdev = &pdev->dev; + mci->error_desc.grain = 8; + /* Set the function pointer to an actual operation function */ + mci->edac_check = edac_check; + + pvt_init(mci, vbase); + dimm_config_init(mci); + + ret = edac_mc_add_mc(mci); + if (ret) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + edac_mc_free(mci); + return ret; + } + edac_op_state = EDAC_OPSTATE_POLL; + + return 0; +} + +static void edac_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci = edac_mc_del_mc(&pdev->dev); + + if (mci) + edac_mc_free(mci); +} + +static const struct acpi_device_id loongson_edac_acpi_match[] = { + {"LOON0010", 0}, + {} +}; +MODULE_DEVICE_TABLE(acpi, loongson_edac_acpi_match); + +static struct platform_driver loongson_edac_driver = { + .probe = edac_probe, + .remove = edac_remove, + .driver = { + .name = "loongson-mc-edac", + .acpi_match_table = loongson_edac_acpi_match, + }, +}; +module_platform_driver(loongson_edac_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Zhao Qunqin <zhaoqunqin@loongson.cn>"); +MODULE_DESCRIPTION("EDAC driver for loongson memory controller");