Message ID | 1459955578-24602-2-git-send-email-tbaicar@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/04/16 16:12, Tyler Baicar wrote: > A RAS (Reliability, Availability, Serviceability) controller > may be a separate processor running in parallel with OS > execution, and may generate error records for consumption by > the OS. If the RAS controller produces multiple error records, > then they may be overwritten before the OS has consumed them. > > The Generic Hardware Error Source (GHES) v2 structure > introduces the capability for the OS to acknowledge the > consumption of the error record generated by the RAS > controller. A RAS controller supporting GHESv2 shall wait for > the acknowledgment before writing a new error record, thus > eliminating the race condition. > > Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org> > Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org> > Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org> > Signed-off-by: Naveen Kaje <nkaje@codeaurora.org> > return ERR_PTR(-ENOMEM); > + > + hest_hdr = (struct acpi_hest_header *)generic; > + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) { > + ghes->generic_v2 = (struct acpi_hest_generic_v2 *)generic; > + rc = apei_map_generic_address( > + &ghes->generic_v2->read_ack_register); > + if (rc) > + goto err_unmap; > + } else > + ghes->generic_v2 = NULL; ... > err_unmap: > apei_unmap_generic_address(&generic->error_status_address); > + if (ghes->generic_v2) > + apei_unmap_generic_address( > + &ghes->generic_v2->read_ack_register); > err_free: > kfree(ghes); > return ERR_PTR(rc); > @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes) > { > kfree(ghes->estatus); > apei_unmap_generic_address(&ghes->generic->error_status_address); > + if (ghes->generic_v2) > + apei_unmap_generic_address( > + &ghes->generic_v2->error_status_address); I am not familiar with the APEI code, but is this error_status_address or read_ack_register ? We don't seem to be mapping error_status_address in generic_v2 header which is introduced in this patch ? Am I missing something ? Suzuki
Hello Suzuki, On 4/6/2016 9:53 AM, Suzuki K Poulose wrote: > On 06/04/16 16:12, Tyler Baicar wrote: >> + hest_hdr = (struct acpi_hest_header *)generic; >> + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) { >> + ghes->generic_v2 = (struct acpi_hest_generic_v2 *)generic; >> + rc = apei_map_generic_address( >> + &ghes->generic_v2->read_ack_register); >> + if (rc) >> + goto err_unmap; >> + } else >> + ghes->generic_v2 = NULL; > ... >> err_unmap: >> apei_unmap_generic_address(&generic->error_status_address); >> + if (ghes->generic_v2) >> + apei_unmap_generic_address( >> + &ghes->generic_v2->read_ack_register); >> err_free: >> kfree(ghes); >> return ERR_PTR(rc); >> @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes) >> { >> kfree(ghes->estatus); >> apei_unmap_generic_address(&ghes->generic->error_status_address); >> + if (ghes->generic_v2) >> + apei_unmap_generic_address( >> + &ghes->generic_v2->error_status_address); > > I am not familiar with the APEI code, but is this error_status_address or > read_ack_register ? We don't seem to be mapping error_status_address > in generic_v2 header > which is introduced in this patch ? Am I missing something ? > > Suzuki Thank you for your feedback. This does look like an error; it should be &ghes->generic_v2->read_ack_register. The variable &ghes->generic_v2->error_status_address is the same as &ghes->generic->error_status_address which is unmapped on the line above the if statement here. Thanks, Tyler
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 60746ef..9b0543e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -45,6 +45,7 @@ #include <linux/aer.h> #include <linux/nmi.h> +#include <acpi/actbl1.h> #include <acpi/ghes.h> #include <acpi/apei.h> #include <asm/tlbflush.h> @@ -244,10 +245,22 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) struct ghes *ghes; unsigned int error_block_length; int rc; + struct acpi_hest_header *hest_hdr; ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + + hest_hdr = (struct acpi_hest_header *)generic; + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) { + ghes->generic_v2 = (struct acpi_hest_generic_v2 *)generic; + rc = apei_map_generic_address( + &ghes->generic_v2->read_ack_register); + if (rc) + goto err_unmap; + } else + ghes->generic_v2 = NULL; + ghes->generic = generic; rc = apei_map_generic_address(&generic->error_status_address); if (rc) @@ -270,6 +283,9 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) err_unmap: apei_unmap_generic_address(&generic->error_status_address); + if (ghes->generic_v2) + apei_unmap_generic_address( + &ghes->generic_v2->read_ack_register); err_free: kfree(ghes); return ERR_PTR(rc); @@ -279,6 +295,9 @@ static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); apei_unmap_generic_address(&ghes->generic->error_status_address); + if (ghes->generic_v2) + apei_unmap_generic_address( + &ghes->generic_v2->error_status_address); } static inline int ghes_severity(int severity) @@ -648,6 +667,22 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } +static int ghes_do_read_ack(struct acpi_hest_generic_v2 *generic_v2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &generic_v2->read_ack_register); + if (rc) + return rc; + val &= generic_v2->read_ack_preserve << + generic_v2->read_ack_register.bit_offset; + val |= generic_v2->read_ack_write; + rc = apei_write(val, &generic_v2->read_ack_register); + + return rc; +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -660,6 +695,12 @@ static int ghes_proc(struct ghes *ghes) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } ghes_do_proc(ghes, ghes->estatus); + + if (ghes->generic_v2) { + rc = ghes_do_read_ack(ghes->generic_v2); + if (rc) + return rc; + } out: ghes_clear_estatus(ghes); return 0; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 792a0d9..ef725a9 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = { [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), }; static int hest_esrc_len(struct acpi_hest_header *hest_hdr) @@ -146,7 +147,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void { int *count = data; - if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) (*count)++; return 0; } @@ -157,7 +159,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) struct ghes_arr *ghes_arr = data; int rc, i; - if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) return 0; if (!((struct acpi_hest_generic *)hest_hdr)->enabled) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 720446c..d0108b6 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -14,6 +14,7 @@ struct ghes { struct acpi_hest_generic *generic; + struct acpi_hest_generic_v2 *generic_v2; struct acpi_hest_generic_status *estatus; u64 buffer_paddr; unsigned long flags;