diff mbox

[V3,2/2] acpi: apei: call into AER handling regardless of severity

Message ID 1510168392-30114-3-git-send-email-tbaicar@codeaurora.org (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Tyler Baicar Nov. 8, 2017, 7:13 p.m. UTC
Currently the GHES code only calls into the AER driver for
recoverable type errors. This is incorrect because errors of
other severities do not get logged by the AER driver and do not
get exposed to user space via the AER trace event. So, call
into the AER driver for PCIe errors regardless of the severity

Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
---
 drivers/acpi/apei/ghes.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

Comments

Borislav Petkov Nov. 9, 2017, 9:46 a.m. UTC | #1
On Wed, Nov 08, 2017 at 12:13:12PM -0700, Tyler Baicar wrote:
> Currently the GHES code only calls into the AER driver for
> recoverable type errors. This is incorrect because errors of
> other severities do not get logged by the AER driver and do not
> get exposed to user space via the AER trace event. So, call
> into the AER driver for PCIe errors regardless of the severity
> 
> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
> ---
>  drivers/acpi/apei/ghes.c | 8 +++-----
>  1 file changed, 3 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 839c3d5..bb65fa6 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -458,14 +458,12 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
>  #endif
>  }

Where did the explanatory comment go?

+/*
+ * PCIe AER errors need to be sent to the AER driver for reporting and
+ * recovery. The GHES severities map to the following AER severities and
+ * require the following handling:
+ *
+ * GHES_SEV_CORRECTABLE -> AER_CORRECTABLE
+ *     These need to be reported by the AER driver but no recovery is
+ *     necessary.
+ * GHES_SEV_RECOVERABLE -> AER_NONFATAL
+ * GHES_SEV_RECOVERABLE && CPER_SEC_RESET -> AER_FATAL
+ *     These both need to be reported and recovered from by the AER driver.
+ * GHES_SEV_PANIC does not make it to this handling since the kernel must
+ *     panic.
+ */

<--- ???

> -static void ghes_handle_aer(struct acpi_hest_generic_data *gdata, int sev, int sec_sev)
> +static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
>  {
>  #ifdef CONFIG_ACPI_APEI_PCIEAER
>  	struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
>  
> -	if (sev == GHES_SEV_RECOVERABLE &&
> -	    sec_sev == GHES_SEV_RECOVERABLE &&
> -	    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
> +	if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
>  	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
>  		unsigned int devfn;
>  		int aer_severity;
> @@ -519,7 +517,7 @@ static void ghes_do_proc(struct ghes *ghes,
>  			ghes_handle_memory_failure(gdata, sev);
>  		}
>  		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
> -			ghes_handle_aer(gdata, sev, sec_sev);
> +			ghes_handle_aer(gdata);
>  		}
>  		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
>  			struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
> --
Tyler Baicar Nov. 9, 2017, 2:37 p.m. UTC | #2
On 11/9/2017 4:46 AM, Borislav Petkov wrote:
> On Wed, Nov 08, 2017 at 12:13:12PM -0700, Tyler Baicar wrote:
>> Currently the GHES code only calls into the AER driver for
>> recoverable type errors. This is incorrect because errors of
>> other severities do not get logged by the AER driver and do not
>> get exposed to user space via the AER trace event. So, call
>> into the AER driver for PCIe errors regardless of the severity
>>
>> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
>> ---
>>   drivers/acpi/apei/ghes.c | 8 +++-----
>>   1 file changed, 3 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 839c3d5..bb65fa6 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -458,14 +458,12 @@ static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
>>   #endif
>>   }
> Where did the explanatory comment go?
Ah, forgot to but that back in. I'll send an update shortly.

Thanks,
Tyler
Borislav Petkov Nov. 9, 2017, 3:01 p.m. UTC | #3
On Thu, Nov 09, 2017 at 09:37:45AM -0500, Tyler Baicar wrote:
> Ah, forgot to but that back in. I'll send an update shortly.

Just the one patch which needs updating pls, as a reply to the the
respective message.

Thx.
diff mbox

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 839c3d5..bb65fa6 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -458,14 +458,12 @@  static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
 #endif
 }
 
-static void ghes_handle_aer(struct acpi_hest_generic_data *gdata, int sev, int sec_sev)
+static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 {
 #ifdef CONFIG_ACPI_APEI_PCIEAER
 	struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
 
-	if (sev == GHES_SEV_RECOVERABLE &&
-	    sec_sev == GHES_SEV_RECOVERABLE &&
-	    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
+	if (pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
 	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
 		unsigned int devfn;
 		int aer_severity;
@@ -519,7 +517,7 @@  static void ghes_do_proc(struct ghes *ghes,
 			ghes_handle_memory_failure(gdata, sev);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
-			ghes_handle_aer(gdata, sev, sec_sev);
+			ghes_handle_aer(gdata);
 		}
 		else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
 			struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);