diff mbox series

[RESEND,v3,3/9] EDAC/ghes: Make ghes_edac a proper module to remove the dependency on ghes

Message ID 20220822154048.188253-4-justin.he@arm.com (mailing list archive)
State New, archived
Headers show
Series Make ghes_edac a proper module | expand

Commit Message

Jia He Aug. 22, 2022, 3:40 p.m. UTC
Commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
apci_init()") introduced a bug that ghes_edac_register() would be invoked
before edac_init(). Because at that time, the bus "edac" hadn't been even
registered, this created sysfs /devices/mc0 instead of
/sys/devices/system/edac/mc/mc0 on an Ampere eMag server.

To remove the dependency of ghes_edac on ghes, make it a proper module. Use
a list to save the probing devices in ghes_probe(), and defer the
ghes_edac_register() to module_init() of the new ghes_edac module by
iterating over the devices list.

Co-developed-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Jia He <justin.he@arm.com>
Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
Cc: stable@kernel.org
Cc: Shuai Xue <xueshuai@linux.alibaba.com>
---
 drivers/acpi/apei/ghes.c | 35 ++++++++++++++++++++++--
 drivers/edac/Kconfig     |  4 +--
 drivers/edac/ghes_edac.c | 59 +++++++++++++++++++++++++---------------
 include/acpi/ghes.h      | 23 ++++------------
 4 files changed, 77 insertions(+), 44 deletions(-)

Comments

Borislav Petkov Aug. 24, 2022, 3:37 p.m. UTC | #1
On Mon, Aug 22, 2022 at 03:40:42PM +0000, Jia He wrote:
> Commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
> apci_init()") introduced a bug that ghes_edac_register() would be invoked
> before edac_init(). Because at that time, the bus "edac" hadn't been even
> registered, this created sysfs /devices/mc0 instead of
> /sys/devices/system/edac/mc/mc0 on an Ampere eMag server.
> 
> To remove the dependency of ghes_edac on ghes, make it a proper module. Use
> a list to save the probing devices in ghes_probe(), and defer the
> ghes_edac_register() to module_init() of the new ghes_edac module by
> iterating over the devices list.
> 
> Co-developed-by: Borislav Petkov <bp@alien8.de>
> Signed-off-by: Borislav Petkov <bp@alien8.de>
> Signed-off-by: Jia He <justin.he@arm.com>
> Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
> Cc: stable@kernel.org

Why is this marked for stable?

The prerequisite patches are needed too. I guess this needs to be
communicated to stable folks somehow by doing

Cc: stable@kernel.org # needs commits X, Y, ...

but I guess the committer needs to do that because only at commit time
will X and Y be known...

So, is there any particular reason why this should be in stable?

> @@ -1442,7 +1449,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
>  
>  	ghes_fini(ghes);
>  
> -	ghes_edac_unregister(ghes);
> +	mutex_lock(&ghes_devs_mutex);
> +	list_del_rcu(&ghes->elist);

Is that list RCU-protected?

> +	mutex_unlock(&ghes_devs_mutex);
>  
>  	kfree(ghes);

...

> @@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
>  unlock:
>  	mutex_unlock(&ghes_reg_mutex);
>  }
> +
> +static int __init ghes_edac_init(void)
> +{
> +	struct ghes *g, *g_tmp;
> +
> +	if (!IS_ENABLED(CONFIG_X86))
> +		force_load = true;

No, this is not how this works.

> +	ghes_devs = ghes_get_devices(force_load);
> +	if (!ghes_devs)
> +		return -ENODEV;

You simply need to check force_load here.

> +	list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) {
> +		ghes_edac_register(g->dev);
> +	}
> +
> +	return 0;
> +}
Jia He Aug. 25, 2022, 12:21 p.m. UTC | #2
Hi Borislav

> -----Original Message-----
> On Mon, Aug 22, 2022 at 03:40:42PM +0000, Jia He wrote:
> > Commit dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
> > apci_init()") introduced a bug that ghes_edac_register() would be
> > invoked before edac_init(). Because at that time, the bus "edac"
> > hadn't been even registered, this created sysfs /devices/mc0 instead
> > of
> > /sys/devices/system/edac/mc/mc0 on an Ampere eMag server.
> >
> > To remove the dependency of ghes_edac on ghes, make it a proper
> > module. Use a list to save the probing devices in ghes_probe(), and
> > defer the
> > ghes_edac_register() to module_init() of the new ghes_edac module by
> > iterating over the devices list.
> >
> > Co-developed-by: Borislav Petkov <bp@alien8.de>
> > Signed-off-by: Borislav Petkov <bp@alien8.de>
> > Signed-off-by: Jia He <justin.he@arm.com>
> > Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in
> > apci_init()")
> > Cc: stable@kernel.org
> 
> Why is this marked for stable?
> 
> The prerequisite patches are needed too. I guess this needs to be
> communicated to stable folks somehow by doing
> 
> Cc: stable@kernel.org # needs commits X, Y, ...
> 
> but I guess the committer needs to do that because only at commit time will X
> and Y be known...
> 
> So, is there any particular reason why this should be in stable?

Okay, I am fine with removing the stable line if dc4e8c07e9e2 will not be included in
any stable tree branch.

> 
> > @@ -1442,7 +1449,9 @@ static int ghes_remove(struct platform_device
> > *ghes_dev)
> >
> >  	ghes_fini(ghes);
> >
> > -	ghes_edac_unregister(ghes);
> > +	mutex_lock(&ghes_devs_mutex);
> > +	list_del_rcu(&ghes->elist);
> 
> Is that list RCU-protected?

No, I will remove the "rcu" suffix since I use list_add_tail.
 
> 
> > +	mutex_unlock(&ghes_devs_mutex);
> >
> >  	kfree(ghes);
> 
> ...
> 
> > @@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
> >  unlock:
> >  	mutex_unlock(&ghes_reg_mutex);
> >  }
> > +
> > +static int __init ghes_edac_init(void) {
> > +	struct ghes *g, *g_tmp;
> > +
> > +	if (!IS_ENABLED(CONFIG_X86))
> > +		force_load = true;
> 
> No, this is not how this works.
> 
> > +	ghes_devs = ghes_get_devices(force_load);
> > +	if (!ghes_devs)
> > +		return -ENODEV;
> 
> You simply need to check force_load here.
> 

Okay, hence should I export the *ghes_devs* in ghes?


--
Cheers,
Justin (Jia He)
Kani, Toshi Aug. 26, 2022, 7:30 p.m. UTC | #3
On Thursday, August 25, 2022 6:21 AM, Justin He wrote:
> > > @@ -566,3 +549,35 @@ void ghes_edac_unregister(struct ghes *ghes)
> > >  unlock:
> > >  	mutex_unlock(&ghes_reg_mutex);
> > >  }
> > > +
> > > +static int __init ghes_edac_init(void) {
> > > +	struct ghes *g, *g_tmp;
> > > +
> > > +	if (!IS_ENABLED(CONFIG_X86))
> > > +		force_load = true;
> >
> > No, this is not how this works.
> >
> > > +	ghes_devs = ghes_get_devices(force_load);
> > > +	if (!ghes_devs)
> > > +		return -ENODEV;
> >
> > You simply need to check force_load here.
> >
> 
> Okay, hence should I export the *ghes_devs* in ghes?

It does not matter.  This series then moves the force_load check
to ghes_edac_preferred().  It is confusing for reviewers...
Please divide patches based on the final design. 

Toshi
Elliott, Robert (Servers) Aug. 26, 2022, 10:42 p.m. UTC | #4
> -----Original Message-----
> From: Jia He <justin.he@arm.com>
> Sent: Monday, August 22, 2022 10:41 AM
> Subject: [RESEND PATCH v3 3/9] EDAC/ghes: Make ghes_edac a proper module to
> remove the dependency on ghes

1. I suggest adding:
    MODULE_ALIAS("acpi*")

so udev will automatically load the module on any system with ACPI. 

> drivers/edac/Kconfig
> config EDAC_GHES
> +	tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC"

2. I suggest:
    tristate "APEI (ACPI Platform Error Interfaces) GHES (Generic Hardware Error Source)"

That's in a menu of EDAC drivers, so no suffix is needed.

3. The Kconfig help text needs some updates, since the drivers are now ordering
themselves to avoid race conditions.

Current:
          Not all machines support hardware-driven error report. Some of those
          provide a BIOS-driven error report mechanism via ACPI, using the
          APEI/GHES driver. By enabling this option, the error reports provided
          by GHES are sent to userspace via the EDAC API.

          When this option is enabled, it will disable the hardware-driven
          mechanisms, if a GHES BIOS is detected, entering into the
          "Firmware First" mode.

          It should be noticed that keeping both GHES and a hardware-driven
          error mechanism won't work well, as BIOS will race with OS, while
          reading the error registers. So, if you want to not use "Firmware
          first" GHES error mechanism, you should disable GHES either at
          compilation time or by passing "ghes.disable=1" Kernel parameter
          at boot time.

          In doubt, say 'Y'.

Suggestion:
  Support for error detection and correction based on APEI (ACPI Platform
  Error Interfaces), which allows system firmware to report hardware errors 
  via the HEST (Hardware Error Source Table) using GHES (Generic Hardware
  Error Source) records. Some systems perform "firmware first" processing
  of errors before reporting them.

  This module is supported in systems supporting GHES. If the architecture
  is x86, the module only loads if the platform is listed in a known-good
  platform list (see drivers/edac/ghes_edac.c) or if ghes.force_load=1
  is specified on the kernel command line).


4. In the help text for each module that looks for GHES and refuses to load 
(e.g., EDAC_AMD64), add a sentence:

  This module does not load on a system supporting ACPI GHES.

> drivers/acpi/apei/ghes.c
> +MODULE_DESCRIPTION("Output ACPI APEI/GHES BIOS detected errors module via EDAC");

5. I suggest:
    MODULE_DESCRIPTION("APEI (ACPI Platform Error Interfaces) GHES (Generic Hardware Error Source) EDAC driver")
Borislav Petkov Aug. 27, 2022, 5:22 a.m. UTC | #5
On Fri, Aug 26, 2022 at 10:42:13PM +0000, Elliott, Robert (Servers) wrote:
> 4. In the help text for each module that looks for GHES and refuses to load 
> (e.g., EDAC_AMD64), add a sentence:
> 
>   This module does not load on a system supporting ACPI GHES.

It is not "system supporting ACPI GHES." - it is on a system which is
*known* to have a more or less tested GHES implementation. The notoriety
of firmware RAS brokenness is well known.

So please stop this - there's a world outside HP BIOS.

None of this is needed for this patchset.
Yazen Ghannam Aug. 29, 2022, 3:59 p.m. UTC | #6
On Sat, Aug 27, 2022 at 07:22:48AM +0200, Borislav Petkov wrote:
> On Fri, Aug 26, 2022 at 10:42:13PM +0000, Elliott, Robert (Servers) wrote:
> > 4. In the help text for each module that looks for GHES and refuses to load 
> > (e.g., EDAC_AMD64), add a sentence:
> > 
> >   This module does not load on a system supporting ACPI GHES.
> 
> It is not "system supporting ACPI GHES." - it is on a system which is
> *known* to have a more or less tested GHES implementation. The notoriety
> of firmware RAS brokenness is well known.
> 
> So please stop this - there's a world outside HP BIOS.
> 
> None of this is needed for this patchset.
>

GHES can be used for more than just memory errors. There are platforms where
memory errors are handled through the OS MCA, and PCIe AER errors are handled
through the FW, for example.

Is the HPE Server platform guaranteed to always provide memory errors through
GHES regardless of CPU vendor/architecture?

Thanks,
Yazen
Borislav Petkov Aug. 29, 2022, 8:39 p.m. UTC | #7
On Mon, Aug 29, 2022 at 03:59:28PM +0000, Yazen Ghannam wrote:
> GHES can be used for more than just memory errors. There are platforms where
> memory errors are handled through the OS MCA, and PCIe AER errors are handled
> through the FW, for example.
> 
> Is the HPE Server platform guaranteed to always provide memory errors through
> GHES regardless of CPU vendor/architecture?

/me looks in the direction of HPE folks...
Kani, Toshi Aug. 29, 2022, 9:37 p.m. UTC | #8
On Monday, August 29, 2022 2:39 PM, Borislav Petkov wrote:
> On Mon, Aug 29, 2022 at 03:59:28PM +0000, Yazen Ghannam wrote:
> > GHES can be used for more than just memory errors. There are platforms where
> > memory errors are handled through the OS MCA, and PCIe AER errors are handled
> > through the FW, for example.
> >
> > Is the HPE Server platform guaranteed to always provide memory errors through
> > GHES regardless of CPU vendor/architecture?
> 
> /me looks in the direction of HPE folks...

The HPE platforms enabled by the platform check are guaranteed to be operating
in FW First mode, which FW decides which error to report to the OS via GHES or 
other means.  This may include multiple CPU vendors/architecture.
On such platforms, for instance, FW does not report corrected errors to the OS 
since FW manages the threshold & FRU notification.  Chipset-specific edac drivers,
designed for OS First mode, is not necessary on such platforms.  Disabling such OS
First edac driver is achieved by enabling ghes_edac as well.

OS MCA is still used for uncorrected errors, such as SRAR (software recoverable
action required) which requires recovery action synchronous to the execution via
MCE signalling.

Toshi
diff mbox series

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 8cb65f757d06..9c52183e3ad9 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -120,6 +120,9 @@  module_param_named(disable, ghes_disable, bool, 0);
 static LIST_HEAD(ghes_hed);
 static DEFINE_MUTEX(ghes_list_mutex);
 
+static LIST_HEAD(ghes_devs);
+static DEFINE_MUTEX(ghes_devs_mutex);
+
 /*
  * Because the memory area used to transfer hardware error information
  * from BIOS to Linux can be determined only in NMI, IRQ or timer
@@ -1378,7 +1381,11 @@  static int ghes_probe(struct platform_device *ghes_dev)
 
 	platform_set_drvdata(ghes_dev, ghes);
 
-	ghes_edac_register(ghes, &ghes_dev->dev);
+	ghes->dev = &ghes_dev->dev;
+
+	mutex_lock(&ghes_devs_mutex);
+	list_add_tail(&ghes->elist, &ghes_devs);
+	mutex_unlock(&ghes_devs_mutex);
 
 	/* Handle any pending errors right away */
 	spin_lock_irqsave(&ghes_notify_lock_irq, flags);
@@ -1442,7 +1449,9 @@  static int ghes_remove(struct platform_device *ghes_dev)
 
 	ghes_fini(ghes);
 
-	ghes_edac_unregister(ghes);
+	mutex_lock(&ghes_devs_mutex);
+	list_del_rcu(&ghes->elist);
+	mutex_unlock(&ghes_devs_mutex);
 
 	kfree(ghes);
 
@@ -1500,6 +1509,28 @@  void __init acpi_ghes_init(void)
 		pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n");
 }
 
+/*
+ * Known x86 systems that prefer GHES error reporting:
+ */
+static struct acpi_platform_list plat_list[] = {
+	{"HPE   ", "Server  ", 0, ACPI_SIG_FADT, all_versions},
+	{ } /* End */
+};
+
+struct list_head *ghes_get_devices(bool force)
+{
+	int idx = -1;
+
+	if (IS_ENABLED(CONFIG_X86)) {
+		idx = acpi_match_platform_list(plat_list);
+		if (idx < 0 && !force)
+			return NULL;
+	}
+
+	return &ghes_devs;
+}
+EXPORT_SYMBOL_GPL(ghes_get_devices);
+
 void ghes_register_report_chain(struct notifier_block *nb)
 {
 	atomic_notifier_chain_register(&ghes_report_chain, nb);
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 17562cf1fe97..df45db81858b 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -53,8 +53,8 @@  config EDAC_DECODE_MCE
 	  has been initialized.
 
 config EDAC_GHES
-	bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
-	depends on ACPI_APEI_GHES && (EDAC=y)
+	tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC"
+	depends on ACPI_APEI_GHES
 	select UEFI_CPER
 	help
 	  Not all machines support hardware-driven error report. Some of those
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 7b8d56a769f6..bb3ea42ba70b 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -60,6 +60,8 @@  module_param(force_load, bool, 0);
 
 static bool system_scanned;
 
+static struct list_head *ghes_devs;
+
 /* Memory Device - Type 17 of SMBIOS spec */
 struct memdev_dmi_entry {
 	u8 type;
@@ -387,34 +389,15 @@  static struct notifier_block ghes_edac_mem_err_nb = {
 	.priority	= 0,
 };
 
-/*
- * Known systems that are safe to enable this module.
- */
-static struct acpi_platform_list plat_list[] = {
-	{"HPE   ", "Server  ", 0, ACPI_SIG_FADT, all_versions},
-	{ } /* End */
-};
-
-int ghes_edac_register(struct ghes *ghes, struct device *dev)
+static int ghes_edac_register(struct device *dev)
 {
 	bool fake = false;
 	struct mem_ctl_info *mci;
 	struct ghes_pvt *pvt;
 	struct edac_mc_layer layers[1];
 	unsigned long flags;
-	int idx = -1;
 	int rc = 0;
 
-	if (IS_ENABLED(CONFIG_X86)) {
-		/* Check if safe to enable on this system */
-		idx = acpi_match_platform_list(plat_list);
-		if (!force_load && idx < 0)
-			return -ENODEV;
-	} else {
-		force_load = true;
-		idx = 0;
-	}
-
 	/* finish another registration/unregistration instance first */
 	mutex_lock(&ghes_reg_mutex);
 
@@ -458,7 +441,7 @@  int ghes_edac_register(struct ghes *ghes, struct device *dev)
 		pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n");
 		pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n");
 		pr_info("work on such system. Use this driver with caution\n");
-	} else if (idx < 0) {
+	} else if (force_load) {
 		pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n");
 		pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n");
 		pr_info("So, the end result of using this driver varies from vendor to vendor.\n");
@@ -530,7 +513,7 @@  int ghes_edac_register(struct ghes *ghes, struct device *dev)
 	return rc;
 }
 
-void ghes_edac_unregister(struct ghes *ghes)
+static void ghes_edac_unregister(struct ghes *ghes)
 {
 	struct mem_ctl_info *mci;
 	unsigned long flags;
@@ -566,3 +549,35 @@  void ghes_edac_unregister(struct ghes *ghes)
 unlock:
 	mutex_unlock(&ghes_reg_mutex);
 }
+
+static int __init ghes_edac_init(void)
+{
+	struct ghes *g, *g_tmp;
+
+	if (!IS_ENABLED(CONFIG_X86))
+		force_load = true;
+
+	ghes_devs = ghes_get_devices(force_load);
+	if (!ghes_devs)
+		return -ENODEV;
+
+	list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) {
+		ghes_edac_register(g->dev);
+	}
+
+	return 0;
+}
+module_init(ghes_edac_init);
+
+static void __exit ghes_edac_exit(void)
+{
+	struct ghes *g, *g_tmp;
+
+	list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) {
+		ghes_edac_unregister(g);
+	}
+}
+module_exit(ghes_edac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Output ACPI APEI/GHES BIOS detected errors module via EDAC");
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 5cbd38b6e4e1..150c0b9500d6 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -27,6 +27,8 @@  struct ghes {
 		struct timer_list timer;
 		unsigned int irq;
 	};
+	struct device *dev;
+	struct list_head elist;
 };
 
 struct ghes_estatus_node {
@@ -69,28 +71,13 @@  int ghes_register_vendor_record_notifier(struct notifier_block *nb);
  * @nb: pointer to the notifier_block structure of the vendor record handler.
  */
 void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
+struct list_head *ghes_get_devices(bool force);
+#else
+static inline struct list_head *ghes_get_devices(bool force) { return NULL; }
 #endif
 
 int ghes_estatus_pool_init(int num_ghes);
 
-/* From drivers/edac/ghes_edac.c */
-
-#ifdef CONFIG_EDAC_GHES
-int ghes_edac_register(struct ghes *ghes, struct device *dev);
-
-void ghes_edac_unregister(struct ghes *ghes);
-
-#else
-static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
-{
-	return -ENODEV;
-}
-
-static inline void ghes_edac_unregister(struct ghes *ghes)
-{
-}
-#endif
-
 static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
 {
 	return gdata->revision >> 8;