Message ID | 1490729440-32591-2-git-send-email-tbaicar@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Mar 28, 2017 at 01:30:31PM -0600, Tyler Baicar wrote: > A RAS (Reliability, Availability, Serviceability) controller > may be a separate processor running in parallel with OS > execution, and may generate error records for consumption by > the OS. If the RAS controller produces multiple error records, > then they may be overwritten before the OS has consumed them. > > The Generic Hardware Error Source (GHES) v2 structure > introduces the capability for the OS to acknowledge the > consumption of the error record generated by the RAS > controller. A RAS controller supporting GHESv2 shall wait for > the acknowledgment before writing a new error record, thus > eliminating the race condition. > > Add support for parsing of GHESv2 sub-tables as well. > > Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org> > CC: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org> > Reviewed-by: James Morse <james.morse@arm.com> > --- > drivers/acpi/apei/ghes.c | 49 +++++++++++++++++++++++++++++++++++++++++++++--- > drivers/acpi/apei/hest.c | 7 +++++-- > include/acpi/ghes.h | 5 ++++- > 3 files changed, 55 insertions(+), 6 deletions(-) ... > @@ -249,10 +254,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) > ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); > if (!ghes) > return ERR_PTR(-ENOMEM); > + > ghes->generic = generic; > + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { > + rc = apei_map_generic_address( > + &ghes->generic_v2->read_ack_register); Yeah, that linebreak just to keep the 80-cols rule makes the code ugly and hard to read. Please put that mapping and unmapping in wrappers called map_gen_v2(ghes) and unmap_gen_v2(ghes) or so, so that you can call them wherever needed. Thus should make the flow a bit more understandable what's going on and you won't have to repeat the unmapping lines in ghes_fini(). > @@ -649,6 +669,23 @@ static void ghes_estatus_cache_add( > rcu_read_unlock(); > } > > +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) > +{ > + int rc; > + u64 val = 0; > + > + rc = apei_read(&val, &generic_v2->read_ack_register); > + if (rc) > + return rc; > + val &= generic_v2->read_ack_preserve << > + generic_v2->read_ack_register.bit_offset; > + val |= generic_v2->read_ack_write << > + generic_v2->read_ack_register.bit_offset; Yeah, let them stick out, it more readable this way. Line spacing is helpful too: ... rc = apei_read(&val, &generic_v2->read_ack_register); if (rc) return rc; val &= generic_v2->read_ack_preserve << generic_v2->read_ack_register.bit_offset; val |= generic_v2->read_ack_write << generic_v2->read_ack_register.bit_offset; return apei_write(val, &generic_v2->read_ack_register); } > + rc = apei_write(val, &generic_v2->read_ack_register); > + > + return rc; > +} > + > static int ghes_proc(struct ghes *ghes) > { > int rc;
On 4/11/2017 11:15 AM, Borislav Petkov wrote: > On Tue, Mar 28, 2017 at 01:30:31PM -0600, Tyler Baicar wrote: >> A RAS (Reliability, Availability, Serviceability) controller >> may be a separate processor running in parallel with OS >> execution, and may generate error records for consumption by >> the OS. If the RAS controller produces multiple error records, >> then they may be overwritten before the OS has consumed them. >> >> The Generic Hardware Error Source (GHES) v2 structure >> introduces the capability for the OS to acknowledge the >> consumption of the error record generated by the RAS >> controller. A RAS controller supporting GHESv2 shall wait for >> the acknowledgment before writing a new error record, thus >> eliminating the race condition. >> >> Add support for parsing of GHESv2 sub-tables as well. >> >> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org> >> CC: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org> >> Reviewed-by: James Morse <james.morse@arm.com> >> --- >> drivers/acpi/apei/ghes.c | 49 +++++++++++++++++++++++++++++++++++++++++++++--- >> drivers/acpi/apei/hest.c | 7 +++++-- >> include/acpi/ghes.h | 5 ++++- >> 3 files changed, 55 insertions(+), 6 deletions(-) > ... > >> @@ -249,10 +254,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) >> ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); >> if (!ghes) >> return ERR_PTR(-ENOMEM); >> + >> ghes->generic = generic; >> + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { >> + rc = apei_map_generic_address( >> + &ghes->generic_v2->read_ack_register); > Yeah, that linebreak just to keep the 80-cols rule makes the code ugly > and hard to read. > > Please put that mapping and unmapping in wrappers called > map_gen_v2(ghes) and unmap_gen_v2(ghes) or so, so that you can call them > wherever needed. Thus should make the flow a bit more understandable > what's going on and you won't have to repeat the unmapping lines in > ghes_fini(). Hello Boris, Thank you for the feedback. I will make this change in the next version. >> @@ -649,6 +669,23 @@ static void ghes_estatus_cache_add( >> rcu_read_unlock(); >> } >> >> +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) >> +{ >> + int rc; >> + u64 val = 0; >> + >> + rc = apei_read(&val, &generic_v2->read_ack_register); >> + if (rc) >> + return rc; >> + val &= generic_v2->read_ack_preserve << >> + generic_v2->read_ack_register.bit_offset; >> + val |= generic_v2->read_ack_write << >> + generic_v2->read_ack_register.bit_offset; > Yeah, let them stick out, it more readable this way. Line spacing is > helpful too: > > ... > rc = apei_read(&val, &generic_v2->read_ack_register); > if (rc) > return rc; > > val &= generic_v2->read_ack_preserve << generic_v2->read_ack_register.bit_offset; > val |= generic_v2->read_ack_write << generic_v2->read_ack_register.bit_offset; > > return apei_write(val, &generic_v2->read_ack_register); > } I will make this change in the next version. Thanks, Tyler >> + rc = apei_write(val, &generic_v2->read_ack_register); >> + >> + return rc; >> +} >> + >> static int ghes_proc(struct ghes *ghes) >> { >> int rc;
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b192b42..0241e36 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -46,6 +46,7 @@ #include <linux/nmi.h> #include <linux/sched/clock.h> +#include <acpi/actbl1.h> #include <acpi/ghes.h> #include <acpi/apei.h> #include <asm/tlbflush.h> @@ -80,6 +81,10 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) +#define IS_HEST_TYPE_GENERIC_V2(ghes) \ + ((struct acpi_hest_header *)ghes->generic)->type == \ + ACPI_HEST_TYPE_GENERIC_ERROR_V2 + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -249,10 +254,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + ghes->generic = generic; + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { + rc = apei_map_generic_address( + &ghes->generic_v2->read_ack_register); + if (rc) + goto err_free; + } + rc = apei_map_generic_address(&generic->error_status_address); if (rc) - goto err_free; + goto err_unmap_read_ack_addr; error_block_length = generic->error_block_length; if (error_block_length > GHES_ESTATUS_MAX_SIZE) { pr_warning(FW_WARN GHES_PFX @@ -264,13 +277,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); if (!ghes->estatus) { rc = -ENOMEM; - goto err_unmap; + goto err_unmap_status_addr; } return ghes; -err_unmap: +err_unmap_status_addr: apei_unmap_generic_address(&generic->error_status_address); +err_unmap_read_ack_addr: + if (IS_HEST_TYPE_GENERIC_V2(ghes)) + apei_unmap_generic_address( + &ghes->generic_v2->read_ack_register); err_free: kfree(ghes); return ERR_PTR(rc); @@ -280,6 +297,9 @@ static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); apei_unmap_generic_address(&ghes->generic->error_status_address); + if (IS_HEST_TYPE_GENERIC_V2(ghes)) + apei_unmap_generic_address( + &ghes->generic_v2->read_ack_register); } static inline int ghes_severity(int severity) @@ -649,6 +669,23 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) +{ + int rc; + u64 val = 0; + + rc = apei_read(&val, &generic_v2->read_ack_register); + if (rc) + return rc; + val &= generic_v2->read_ack_preserve << + generic_v2->read_ack_register.bit_offset; + val |= generic_v2->read_ack_write << + generic_v2->read_ack_register.bit_offset; + rc = apei_write(val, &generic_v2->read_ack_register); + + return rc; +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -661,6 +698,12 @@ static int ghes_proc(struct ghes *ghes) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } ghes_do_proc(ghes, ghes->estatus); + + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { + rc = ghes_ack_error(ghes->generic_v2); + if (rc) + return rc; + } out: ghes_clear_estatus(ghes); return rc; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 8f2a98e..456b488 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -52,6 +52,7 @@ [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), + [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2), }; static int hest_esrc_len(struct acpi_hest_header *hest_hdr) @@ -141,7 +142,8 @@ static int __init hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void { int *count = data; - if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR || + hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2) (*count)++; return 0; } @@ -152,7 +154,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) struct ghes_arr *ghes_arr = data; int rc, i; - if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) + if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR && + hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2) return 0; if (!((struct acpi_hest_generic *)hest_hdr)->enabled) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 720446c..68f088a 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -13,7 +13,10 @@ #define GHES_EXITING 0x0002 struct ghes { - struct acpi_hest_generic *generic; + union { + struct acpi_hest_generic *generic; + struct acpi_hest_generic_v2 *generic_v2; + }; struct acpi_hest_generic_status *estatus; u64 buffer_paddr; unsigned long flags;