diff mbox series

[v2] ACPI / extlog: Handle multiple records

Message ID 20221010203423.24300-1-tony.luck@intel.com (mailing list archive)
State Mainlined, archived
Headers show
Series [v2] ACPI / extlog: Handle multiple records | expand

Commit Message

Tony Luck Oct. 10, 2022, 8:34 p.m. UTC
If there is no user space consumer of extlog_mem trace records, then
Linux properly handles multiple error records in an ELOG block

	extlog_print()
	  print_extlog_rcd()
	    __print_extlog_rcd()
	      cper_estatus_print()
		apei_estatus_for_each_section()

But the other code path hard codes looking for a single record to
output a trace record.

Fix by using the same apei_estatus_for_each_section() iterator
to step over all records.

Fixes: 2dfb7d51a61d ("trace, RAS: Add eMCA trace event interface")
Signed-off-by: Tony Luck <tony.luck@intel.com>
---

Changes since v1: Oops. Cut & pasted the apei_estatus_for_each_section()
line, but forgot to change the argument from "estatus" to "tmp". Need to
walk the *copy* that was made, not the original because BIOS is allowed to
stomp on it as soon as it sees block_status has been cleared.

 drivers/acpi/acpi_extlog.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

Comments

Rafael J. Wysocki Oct. 13, 2022, 6:44 p.m. UTC | #1
On Mon, Oct 10, 2022 at 10:34 PM Tony Luck <tony.luck@intel.com> wrote:
>
> If there is no user space consumer of extlog_mem trace records, then
> Linux properly handles multiple error records in an ELOG block
>
>         extlog_print()
>           print_extlog_rcd()
>             __print_extlog_rcd()
>               cper_estatus_print()
>                 apei_estatus_for_each_section()
>
> But the other code path hard codes looking for a single record to
> output a trace record.
>
> Fix by using the same apei_estatus_for_each_section() iterator
> to step over all records.
>
> Fixes: 2dfb7d51a61d ("trace, RAS: Add eMCA trace event interface")
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>
> Changes since v1: Oops. Cut & pasted the apei_estatus_for_each_section()
> line, but forgot to change the argument from "estatus" to "tmp". Need to
> walk the *copy* that was made, not the original because BIOS is allowed to
> stomp on it as soon as it sees block_status has been cleared.
>
>  drivers/acpi/acpi_extlog.c | 33 ++++++++++++++++++++-------------
>  1 file changed, 20 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
> index 72f1fb77abcd..e648158368a7 100644
> --- a/drivers/acpi/acpi_extlog.c
> +++ b/drivers/acpi/acpi_extlog.c
> @@ -12,6 +12,7 @@
>  #include <linux/ratelimit.h>
>  #include <linux/edac.h>
>  #include <linux/ras.h>
> +#include <acpi/ghes.h>
>  #include <asm/cpu.h>
>  #include <asm/mce.h>
>
> @@ -138,8 +139,8 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>         int     cpu = mce->extcpu;
>         struct acpi_hest_generic_status *estatus, *tmp;
>         struct acpi_hest_generic_data *gdata;
> -       const guid_t *fru_id = &guid_null;
> -       char *fru_text = "";
> +       const guid_t *fru_id;
> +       char *fru_text;
>         guid_t *sec_type;
>         static u32 err_seq;
>
> @@ -160,17 +161,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>
>         /* log event via trace */
>         err_seq++;
> -       gdata = (struct acpi_hest_generic_data *)(tmp + 1);
> -       if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> -               fru_id = (guid_t *)gdata->fru_id;
> -       if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> -               fru_text = gdata->fru_text;
> -       sec_type = (guid_t *)gdata->section_type;
> -       if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
> -               struct cper_sec_mem_err *mem = (void *)(gdata + 1);
> -               if (gdata->error_data_length >= sizeof(*mem))
> -                       trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
> -                                              (u8)gdata->error_severity);
> +       apei_estatus_for_each_section(tmp, gdata) {
> +               if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> +                       fru_id = (guid_t *)gdata->fru_id;
> +               else
> +                       fru_id = &guid_null;
> +               if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> +                       fru_text = gdata->fru_text;
> +               else
> +                       fru_text = "";
> +               sec_type = (guid_t *)gdata->section_type;
> +               if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
> +                       struct cper_sec_mem_err *mem = (void *)(gdata + 1);
> +
> +                       if (gdata->error_data_length >= sizeof(*mem))
> +                               trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
> +                                                      (u8)gdata->error_severity);
> +               }
>         }
>
>  out:
> --

Applied as 6.1-rc material, thanks!
diff mbox series

Patch

diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 72f1fb77abcd..e648158368a7 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -12,6 +12,7 @@ 
 #include <linux/ratelimit.h>
 #include <linux/edac.h>
 #include <linux/ras.h>
+#include <acpi/ghes.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 
@@ -138,8 +139,8 @@  static int extlog_print(struct notifier_block *nb, unsigned long val,
 	int	cpu = mce->extcpu;
 	struct acpi_hest_generic_status *estatus, *tmp;
 	struct acpi_hest_generic_data *gdata;
-	const guid_t *fru_id = &guid_null;
-	char *fru_text = "";
+	const guid_t *fru_id;
+	char *fru_text;
 	guid_t *sec_type;
 	static u32 err_seq;
 
@@ -160,17 +161,23 @@  static int extlog_print(struct notifier_block *nb, unsigned long val,
 
 	/* log event via trace */
 	err_seq++;
-	gdata = (struct acpi_hest_generic_data *)(tmp + 1);
-	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
-		fru_id = (guid_t *)gdata->fru_id;
-	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
-		fru_text = gdata->fru_text;
-	sec_type = (guid_t *)gdata->section_type;
-	if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
-		struct cper_sec_mem_err *mem = (void *)(gdata + 1);
-		if (gdata->error_data_length >= sizeof(*mem))
-			trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
-					       (u8)gdata->error_severity);
+	apei_estatus_for_each_section(tmp, gdata) {
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+			fru_id = (guid_t *)gdata->fru_id;
+		else
+			fru_id = &guid_null;
+		if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+			fru_text = gdata->fru_text;
+		else
+			fru_text = "";
+		sec_type = (guid_t *)gdata->section_type;
+		if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+			struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+
+			if (gdata->error_data_length >= sizeof(*mem))
+				trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
+						       (u8)gdata->error_severity);
+		}
 	}
 
 out: