Message ID | 1484244924-24786-6-git-send-email-tbaicar@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Tyler, On 12/01/17 18:15, Tyler Baicar wrote: > ARM APEI extension proposal added SEA (Synchrounous External Nit: Synchronous > Abort) notification type for ARMv8. > Add a new GHES error source handling function for SEA. If an error > source's notification type is SEA, then this function can be registered > into the SEA exception handler. That way GHES will parse and report > SEA exceptions when they occur. > diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c > index 2acbc60..87efe26 100644 > --- a/drivers/acpi/apei/ghes.c > +++ b/drivers/acpi/apei/ghes.c > @@ -767,6 +772,62 @@ static struct notifier_block ghes_notifier_sci = { > .notifier_call = ghes_notify_sci, > }; > > +#ifdef CONFIG_HAVE_ACPI_APEI_SEA > +static LIST_HEAD(ghes_sea); > + > +static int ghes_notify_sea(struct notifier_block *this, > + unsigned long event, void *data) > +{ > + struct ghes *ghes; > + int ret = NOTIFY_DONE; > + > + nmi_enter(); Can we move this into the arch code? Its because we got here from a synchronous-exception that makes this nmi-like, I think it only makes sense for it be called from under /arch/. Where did the rcu_read_lock() go? I can see its missing from ghes_notify_nmi() too, but I don't know enough about RCU to know if that's safe! The second paragraph in the comment above rcu_read_lock() describes it as preventing call_rcu() during a read-side critical section that was running concurrently. Doesn't this mean we can race with ghes_sea_remove() on another CPU because we wait for the wrong grace period? The same comment talks about how these read-side critical sections can nest, so I think its quite safe to make these 'lock' calls here. > + list_for_each_entry_rcu(ghes, &ghes_sea, list) { > + if (!ghes_proc(ghes)) > + ret = NOTIFY_OK; > + } > + nmi_exit(); > + > + return ret; > +} > + > +static struct notifier_block ghes_notifier_sea = { > + .notifier_call = ghes_notify_sea, > +}; > + > +static int ghes_sea_add(struct ghes *ghes) > +{ > + mutex_lock(&ghes_list_mutex); > + if (list_empty(&ghes_sea)) > + register_sea_notifier(&ghes_notifier_sea); > + list_add_rcu(&ghes->list, &ghes_sea); > + mutex_unlock(&ghes_list_mutex); > + return 0; > +} > + > +static void ghes_sea_remove(struct ghes *ghes) > +{ > + mutex_lock(&ghes_list_mutex); > + list_del_rcu(&ghes->list); > + if (list_empty(&ghes_sea)) > + unregister_sea_notifier(&ghes_notifier_sea); > + mutex_unlock(&ghes_list_mutex); ghes_nmi_remove() has: > /* > * To synchronize with NMI handler, ghes can only be > * freed after NMI handler finishes. > */ > synchronize_rcu() This 'waits until a grace period has elapsed'. This is because ghes_remove() goes and kfree()s the ghes object while another CPU may be holding that entry in the list in ghes_notify_sea(). > +} > +#else /* CONFIG_HAVE_ACPI_APEI_SEA */ > +static inline int ghes_sea_add(struct ghes *ghes) > +{ > + pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", > + ghes->generic->header.source_id); > + return -ENOTSUPP; > +} > + > +static inline void ghes_sea_remove(struct ghes *ghes) > +{ > + pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", > + ghes->generic->header.source_id); > +} > +#endif /* CONFIG_HAVE_ACPI_APEI_SEA */ > + > #ifdef CONFIG_HAVE_ACPI_APEI_NMI > /* > * printk is not safe in NMI context. So in NMI handler, we allocate > @@ -1011,6 +1072,14 @@ static int ghes_probe(struct platform_device *ghes_dev) > case ACPI_HEST_NOTIFY_EXTERNAL: > case ACPI_HEST_NOTIFY_SCI: > break; > + case ACPI_HEST_NOTIFY_SEA: > + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_SEA)) { > + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", > + generic->header.source_id); > + rc = -ENOTSUPP; > + goto err; > + } > + break; > case ACPI_HEST_NOTIFY_NMI: > if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { > pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", > @@ -1022,6 +1091,13 @@ static int ghes_probe(struct platform_device *ghes_dev) > pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", > generic->header.source_id); > goto err; > + case ACPI_HEST_NOTIFY_GPIO: > + case ACPI_HEST_NOTIFY_SEI: > + case ACPI_HEST_NOTIFY_GSIV: These three weren't mentioned in the commit message. I guess they are drive-by cleanup? > + pr_warn(GHES_PFX "Generic hardware error source: %d notified via notification type %u is not supported\n", > + generic->header.source_id, generic->header.source_id); > + rc = -ENOTSUPP; > + goto err; > default: > pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", > generic->notify.type, generic->header.source_id); Thanks, James
Hello James, On 1/18/2017 7:50 AM, James Morse wrote: > Hi Tyler, > > On 12/01/17 18:15, Tyler Baicar wrote: >> ARM APEI extension proposal added SEA (Synchrounous External > Nit: Synchronous I'll fix that :) >> Abort) notification type for ARMv8. >> Add a new GHES error source handling function for SEA. If an error >> source's notification type is SEA, then this function can be registered >> into the SEA exception handler. That way GHES will parse and report >> SEA exceptions when they occur. >> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c >> index 2acbc60..87efe26 100644 >> --- a/drivers/acpi/apei/ghes.c >> +++ b/drivers/acpi/apei/ghes.c >> @@ -767,6 +772,62 @@ static struct notifier_block ghes_notifier_sci = { >> .notifier_call = ghes_notify_sci, >> }; >> >> +#ifdef CONFIG_HAVE_ACPI_APEI_SEA >> +static LIST_HEAD(ghes_sea); >> + >> +static int ghes_notify_sea(struct notifier_block *this, >> + unsigned long event, void *data) >> +{ >> + struct ghes *ghes; >> + int ret = NOTIFY_DONE; >> + >> + nmi_enter(); > Can we move this into the arch code? Its because we got here from a > synchronous-exception that makes this nmi-like, I think it only makes sense for > it be called from under /arch/. So move the nmi_enter/exit calls into do_sea of the previous patch? I can do that in the next patchset. > Where did the rcu_read_lock() go? I can see its missing from ghes_notify_nmi() > too, but I don't know enough about RCU to know if that's safe! > > The second paragraph in the comment above rcu_read_lock() describes it as > preventing call_rcu() during a read-side critical section that was running > concurrently. Doesn't this mean we can race with ghes_sea_remove() on another > CPU because we wait for the wrong grace period? > > The same comment talks about how these read-side critical sections can nest, so > I think its quite safe to make these 'lock' calls here. Sorry, I thought we wanted nmi_enter/exit instead of the rcu_read_lock/unlock. I guess the rcu locks will not cause the deadlock scenario you described in the previous patchset if we have the nmi_enter/exit wrapped around the rcu critical section. >> + list_for_each_entry_rcu(ghes, &ghes_sea, list) { >> + if (!ghes_proc(ghes)) >> + ret = NOTIFY_OK; >> + } >> + nmi_exit(); >> + >> + return ret; >> +} >> + >> +static struct notifier_block ghes_notifier_sea = { >> + .notifier_call = ghes_notify_sea, >> +}; >> + >> +static int ghes_sea_add(struct ghes *ghes) >> +{ >> + mutex_lock(&ghes_list_mutex); >> + if (list_empty(&ghes_sea)) >> + register_sea_notifier(&ghes_notifier_sea); >> + list_add_rcu(&ghes->list, &ghes_sea); >> + mutex_unlock(&ghes_list_mutex); >> + return 0; >> +} >> + >> +static void ghes_sea_remove(struct ghes *ghes) >> +{ >> + mutex_lock(&ghes_list_mutex); >> + list_del_rcu(&ghes->list); >> + if (list_empty(&ghes_sea)) >> + unregister_sea_notifier(&ghes_notifier_sea); >> + mutex_unlock(&ghes_list_mutex); > ghes_nmi_remove() has: >> /* >> * To synchronize with NMI handler, ghes can only be >> * freed after NMI handler finishes. >> */ >> synchronize_rcu() > This 'waits until a grace period has elapsed'. This is because ghes_remove() > goes and kfree()s the ghes object while another CPU may be holding that entry in > the list in ghes_notify_sea(). I will add synchronize_rcu() in the next patchset. >> +} >> +#else /* CONFIG_HAVE_ACPI_APEI_SEA */ >> +static inline int ghes_sea_add(struct ghes *ghes) >> +{ >> + pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", >> + ghes->generic->header.source_id); >> + return -ENOTSUPP; >> +} >> + >> +static inline void ghes_sea_remove(struct ghes *ghes) >> +{ >> + pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", >> + ghes->generic->header.source_id); >> +} >> +#endif /* CONFIG_HAVE_ACPI_APEI_SEA */ >> + >> #ifdef CONFIG_HAVE_ACPI_APEI_NMI >> /* >> * printk is not safe in NMI context. So in NMI handler, we allocate >> @@ -1011,6 +1072,14 @@ static int ghes_probe(struct platform_device *ghes_dev) >> case ACPI_HEST_NOTIFY_EXTERNAL: >> case ACPI_HEST_NOTIFY_SCI: >> break; >> + case ACPI_HEST_NOTIFY_SEA: >> + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_SEA)) { >> + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", >> + generic->header.source_id); >> + rc = -ENOTSUPP; >> + goto err; >> + } >> + break; >> case ACPI_HEST_NOTIFY_NMI: >> if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { >> pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", >> @@ -1022,6 +1091,13 @@ static int ghes_probe(struct platform_device *ghes_dev) >> pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", >> generic->header.source_id); >> goto err; > >> + case ACPI_HEST_NOTIFY_GPIO: >> + case ACPI_HEST_NOTIFY_SEI: >> + case ACPI_HEST_NOTIFY_GSIV: > These three weren't mentioned in the commit message. I guess they are drive-by > cleanup? SEI and GSIV were also added in the ACPI 6.1 spec (18.3.2.9 Hardware Error Notification) and GPIO was missing, so I added all three. Thanks, Tyler >> + pr_warn(GHES_PFX "Generic hardware error source: %d notified via notification type %u is not supported\n", >> + generic->header.source_id, generic->header.source_id); >> + rc = -ENOTSUPP; >> + goto err; >> default: >> pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", >> generic->notify.type, generic->header.source_id); > > Thanks, > > James > >
Hi Tyler, On 18/01/17 23:51, Baicar, Tyler wrote: > On 1/18/2017 7:50 AM, James Morse wrote: >> On 12/01/17 18:15, Tyler Baicar wrote: >>> ARM APEI extension proposal added SEA (Synchrounous External >>> Abort) notification type for ARMv8. >>> Add a new GHES error source handling function for SEA. If an error >>> source's notification type is SEA, then this function can be registered >>> into the SEA exception handler. That way GHES will parse and report >>> SEA exceptions when they occur. >>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c >>> index 2acbc60..87efe26 100644 >>> --- a/drivers/acpi/apei/ghes.c >>> +++ b/drivers/acpi/apei/ghes.c >>> @@ -767,6 +772,62 @@ static struct notifier_block ghes_notifier_sci = { >>> .notifier_call = ghes_notify_sci, >>> }; >>> +#ifdef CONFIG_HAVE_ACPI_APEI_SEA >>> +static LIST_HEAD(ghes_sea); >>> + >>> +static int ghes_notify_sea(struct notifier_block *this, >>> + unsigned long event, void *data) >>> +{ >>> + struct ghes *ghes; >>> + int ret = NOTIFY_DONE; >>> + >>> + nmi_enter(); >> Can we move this into the arch code? Its because we got here from a >> synchronous-exception that makes this nmi-like, I think it only makes sense for >> it be called from under /arch/. > So move the nmi_enter/exit calls into do_sea of the previous patch? I can do > that in the next patchset. >> Where did the rcu_read_lock() go? I can see its missing from ghes_notify_nmi() >> too, but I don't know enough about RCU to know if that's safe! >> >> The second paragraph in the comment above rcu_read_lock() describes it as >> preventing call_rcu() during a read-side critical section that was running >> concurrently. Doesn't this mean we can race with ghes_sea_remove() on another >> CPU because we wait for the wrong grace period? >> >> The same comment talks about how these read-side critical sections can nest, so >> I think its quite safe to make these 'lock' calls here. > Sorry, I thought we wanted nmi_enter/exit instead of the rcu_read_lock/unlock. I > guess the rcu locks > will not cause the deadlock scenario you described in the previous patchset if > we have the > nmi_enter/exit wrapped around the rcu critical section. Ah, not instead of, (well, not initially!). The nmi_enter()/nmi_exit() thing was to fix the APEI interrupting APEI problem. This is only a problem for notification types which can interrupt interrupts-masked code, of which SEA is one. (and x86's NMI is the other). I think I've found the answer to why the rcu_read_lock() isn't needed. synchronize_sched() has: > * This means that all preempt_disable code sequences, including NMI and > * non-threaded hardware-interrupt handlers, in progress on entry will > * have completed before this primitive returns. synchronize_rcu() has the same innards, so I'm convinced this its safe not to have those calls in here. Could we have a comment along the lines of: > synchronize_rcu() will wait for nmi_exit(), so no need to rcu_read_lock(). (The more I learn about RCU the scarier it becomes!) There are two other things that need changing to make the in_nmi() code path work on arm64. Always reserve the virtual-address-space forcing GHES_IOREMAP_PAGES to be 2 regardless of CONFIG_HAVE_ACPI_APEI_NMI. This is almost revert of 594c7255dce7a13cac50cf2470cc56e2c3b0494e (but that did a few other things too). We also need to fix ghes_ioremap_pfn_nmi() to use arch_apei_get_mem_attribute() and not assume PAGE_KERNEL. Thanks, James
On 1/19/2017 10:57 AM, James Morse wrote: > Hi Tyler, > > On 18/01/17 23:51, Baicar, Tyler wrote: >> On 1/18/2017 7:50 AM, James Morse wrote: >>> On 12/01/17 18:15, Tyler Baicar wrote: >>>> ARM APEI extension proposal added SEA (Synchrounous External >>>> Abort) notification type for ARMv8. >>>> Add a new GHES error source handling function for SEA. If an error >>>> source's notification type is SEA, then this function can be registered >>>> into the SEA exception handler. That way GHES will parse and report >>>> SEA exceptions when they occur. >>>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c >>>> index 2acbc60..87efe26 100644 >>>> --- a/drivers/acpi/apei/ghes.c >>>> +++ b/drivers/acpi/apei/ghes.c >>>> @@ -767,6 +772,62 @@ static struct notifier_block ghes_notifier_sci = { >>>> .notifier_call = ghes_notify_sci, >>>> }; >>>> +#ifdef CONFIG_HAVE_ACPI_APEI_SEA >>>> +static LIST_HEAD(ghes_sea); >>>> + >>>> +static int ghes_notify_sea(struct notifier_block *this, >>>> + unsigned long event, void *data) >>>> +{ >>>> + struct ghes *ghes; >>>> + int ret = NOTIFY_DONE; >>>> + >>>> + nmi_enter(); >>> Can we move this into the arch code? Its because we got here from a >>> synchronous-exception that makes this nmi-like, I think it only makes sense for >>> it be called from under /arch/. >> So move the nmi_enter/exit calls into do_sea of the previous patch? I can do >> that in the next patchset. >>> Where did the rcu_read_lock() go? I can see its missing from ghes_notify_nmi() >>> too, but I don't know enough about RCU to know if that's safe! >>> >>> The second paragraph in the comment above rcu_read_lock() describes it as >>> preventing call_rcu() during a read-side critical section that was running >>> concurrently. Doesn't this mean we can race with ghes_sea_remove() on another >>> CPU because we wait for the wrong grace period? >>> >>> The same comment talks about how these read-side critical sections can nest, so >>> I think its quite safe to make these 'lock' calls here. >> Sorry, I thought we wanted nmi_enter/exit instead of the rcu_read_lock/unlock. I >> guess the rcu locks >> will not cause the deadlock scenario you described in the previous patchset if >> we have the >> nmi_enter/exit wrapped around the rcu critical section. > Ah, not instead of, (well, not initially!). > The nmi_enter()/nmi_exit() thing was to fix the APEI interrupting APEI problem. > This is only a problem for notification types which can interrupt > interrupts-masked code, of which SEA is one. (and x86's NMI is the other). > > I think I've found the answer to why the rcu_read_lock() isn't needed. > synchronize_sched() has: >> * This means that all preempt_disable code sequences, including NMI and >> * non-threaded hardware-interrupt handlers, in progress on entry will >> * have completed before this primitive returns. > synchronize_rcu() has the same innards, so I'm convinced this its safe not to > have those calls in here. Could we have a comment along the lines of: >> synchronize_rcu() will wait for nmi_exit(), so no need to rcu_read_lock(). Okay, I'll add the comment in the next patchset. > (The more I learn about RCU the scarier it becomes!) > > > There are two other things that need changing to make the in_nmi() code path > work on arm64. > Always reserve the virtual-address-space forcing GHES_IOREMAP_PAGES to be 2 > regardless of CONFIG_HAVE_ACPI_APEI_NMI. This is almost revert of > 594c7255dce7a13cac50cf2470cc56e2c3b0494e (but that did a few other things too). Looks simple enough, should I force it to 2 in all cases, or add a check for CONFIG_HAVE_ACPI_APEI_SEA similar to the check for CONFIG_HAVE_ACPI_APEI_NMI? > We also need to fix ghes_ioremap_pfn_nmi() to use arch_apei_get_mem_attribute() > and not assume PAGE_KERNEL. So just change the call to ioremap_page_range to: ioremap_page_range(vaddr, vaddr + PAGE_SIZE, pfn << PAGE_SHIFT, arch_apei_get_mem_attribute()); Thanks, Tyler
Hi Tyler, On 20/01/17 20:58, Baicar, Tyler wrote: > On 1/19/2017 10:57 AM, James Morse wrote: >> On 18/01/17 23:51, Baicar, Tyler wrote: >>> On 1/18/2017 7:50 AM, James Morse wrote: >>>> On 12/01/17 18:15, Tyler Baicar wrote: >>>>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c >> There are two other things that need changing to make the in_nmi() code path >> work on arm64. >> Always reserve the virtual-address-space forcing GHES_IOREMAP_PAGES to be 2 >> regardless of CONFIG_HAVE_ACPI_APEI_NMI. This is almost revert of >> 594c7255dce7a13cac50cf2470cc56e2c3b0494e (but that did a few other things too). > Looks simple enough, should I force it to 2 in all cases, or add a check for > CONFIG_HAVE_ACPI_APEI_SEA > similar to the check for CONFIG_HAVE_ACPI_APEI_NMI? Its just address space not actual memory it is reserving right? I think just reserve two pages all the time to save eye-sore #ifdefs! >> We also need to fix ghes_ioremap_pfn_nmi() to use arch_apei_get_mem_attribute() >> and not assume PAGE_KERNEL. > So just change the call to ioremap_page_range to: > > ioremap_page_range(vaddr, vaddr + PAGE_SIZE, pfn << PAGE_SHIFT, > arch_apei_get_mem_attribute()); (you need to give arch_apei_get_mem_attribute() the address...) copying whatever ghes_ioremap_pfn_irq() does a few lines down is probably best. Thanks, James
On 1/24/2017 10:55 AM, James Morse wrote: > Hi Tyler, > > On 20/01/17 20:58, Baicar, Tyler wrote: >> On 1/19/2017 10:57 AM, James Morse wrote: >>> On 18/01/17 23:51, Baicar, Tyler wrote: >>>> On 1/18/2017 7:50 AM, James Morse wrote: >>>>> On 12/01/17 18:15, Tyler Baicar wrote: >>>>>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c >>> There are two other things that need changing to make the in_nmi() code path >>> work on arm64. >>> Always reserve the virtual-address-space forcing GHES_IOREMAP_PAGES to be 2 >>> regardless of CONFIG_HAVE_ACPI_APEI_NMI. This is almost revert of >>> 594c7255dce7a13cac50cf2470cc56e2c3b0494e (but that did a few other things too). >> Looks simple enough, should I force it to 2 in all cases, or add a check for >> CONFIG_HAVE_ACPI_APEI_SEA >> similar to the check for CONFIG_HAVE_ACPI_APEI_NMI? > Its just address space not actual memory it is reserving right? I think just > reserve two pages all the time to save eye-sore #ifdefs! > Okay, will do! >>> We also need to fix ghes_ioremap_pfn_nmi() to use arch_apei_get_mem_attribute() >>> and not assume PAGE_KERNEL. >> So just change the call to ioremap_page_range to: >> >> ioremap_page_range(vaddr, vaddr + PAGE_SIZE, pfn << PAGE_SHIFT, >> arch_apei_get_mem_attribute()); > (you need to give arch_apei_get_mem_attribute() the address...) copying whatever > ghes_ioremap_pfn_irq() does a few lines down is probably best. Sounds good, I'll make the changes in my next patchset. Thanks, Tyler
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b380c87..0465601 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -53,6 +53,8 @@ config ARM64 select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_ACPI_APEI if (ACPI && EFI) + select HAVE_ACPI_APEI_SEA if (ACPI && EFI) + select HAVE_NMI if HAVE_ACPI_APEI_SEA select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index b0140c8..3786ff1 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -4,6 +4,20 @@ config HAVE_ACPI_APEI config HAVE_ACPI_APEI_NMI bool +config HAVE_ACPI_APEI_SEA + bool "APEI Synchronous External Abort logging/recovering support" + depends on ARM64 + help + This option should be enabled if the system supports + firmware first handling of SEA (Synchronous External Abort). + SEA happens with certain faults of data abort or instruction + abort synchronous exceptions on ARMv8 systems. If a system + supports firmware first handling of SEA, the platform analyzes + and handles hardware error notifications with SEA, and it may then + form a HW error record for the OS to parse and handle. This + option allows the OS to look for such HW error record, and + take appropriate action. + config ACPI_APEI bool "ACPI Platform Error Interface (APEI)" select MISC_FILESYSTEMS diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 2acbc60..87efe26 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -44,12 +44,17 @@ #include <linux/pci.h> #include <linux/aer.h> #include <linux/nmi.h> +#include <linux/hardirq.h> #include <acpi/actbl1.h> #include <acpi/ghes.h> #include <acpi/apei.h> #include <asm/tlbflush.h> +#ifdef CONFIG_HAVE_ACPI_APEI_SEA +#include <asm/system_misc.h> +#endif + #include "apei-internal.h" #define GHES_PFX "GHES: " @@ -767,6 +772,62 @@ static struct notifier_block ghes_notifier_sci = { .notifier_call = ghes_notify_sci, }; +#ifdef CONFIG_HAVE_ACPI_APEI_SEA +static LIST_HEAD(ghes_sea); + +static int ghes_notify_sea(struct notifier_block *this, + unsigned long event, void *data) +{ + struct ghes *ghes; + int ret = NOTIFY_DONE; + + nmi_enter(); + list_for_each_entry_rcu(ghes, &ghes_sea, list) { + if (!ghes_proc(ghes)) + ret = NOTIFY_OK; + } + nmi_exit(); + + return ret; +} + +static struct notifier_block ghes_notifier_sea = { + .notifier_call = ghes_notify_sea, +}; + +static int ghes_sea_add(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + if (list_empty(&ghes_sea)) + register_sea_notifier(&ghes_notifier_sea); + list_add_rcu(&ghes->list, &ghes_sea); + mutex_unlock(&ghes_list_mutex); + return 0; +} + +static void ghes_sea_remove(struct ghes *ghes) +{ + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + if (list_empty(&ghes_sea)) + unregister_sea_notifier(&ghes_notifier_sea); + mutex_unlock(&ghes_list_mutex); +} +#else /* CONFIG_HAVE_ACPI_APEI_SEA */ +static inline int ghes_sea_add(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to add SEA notification which is not supported\n", + ghes->generic->header.source_id); + return -ENOTSUPP; +} + +static inline void ghes_sea_remove(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to remove SEA notification which is not supported\n", + ghes->generic->header.source_id); +} +#endif /* CONFIG_HAVE_ACPI_APEI_SEA */ + #ifdef CONFIG_HAVE_ACPI_APEI_NMI /* * printk is not safe in NMI context. So in NMI handler, we allocate @@ -1011,6 +1072,14 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_EXTERNAL: case ACPI_HEST_NOTIFY_SCI: break; + case ACPI_HEST_NOTIFY_SEA: + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_SEA)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via SEA is not supported\n", + generic->header.source_id); + rc = -ENOTSUPP; + goto err; + } + break; case ACPI_HEST_NOTIFY_NMI: if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", @@ -1022,6 +1091,13 @@ static int ghes_probe(struct platform_device *ghes_dev) pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", generic->header.source_id); goto err; + case ACPI_HEST_NOTIFY_GPIO: + case ACPI_HEST_NOTIFY_SEI: + case ACPI_HEST_NOTIFY_GSIV: + pr_warn(GHES_PFX "Generic hardware error source: %d notified via notification type %u is not supported\n", + generic->header.source_id, generic->header.source_id); + rc = -ENOTSUPP; + goto err; default: pr_warning(FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware error source: %d\n", generic->notify.type, generic->header.source_id); @@ -1076,6 +1152,11 @@ static int ghes_probe(struct platform_device *ghes_dev) list_add_rcu(&ghes->list, &ghes_sci); mutex_unlock(&ghes_list_mutex); break; + case ACPI_HEST_NOTIFY_SEA: + rc = ghes_sea_add(ghes); + if (rc) + goto err_edac_unreg; + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_add(ghes); break; @@ -1118,6 +1199,9 @@ static int ghes_remove(struct platform_device *ghes_dev) unregister_acpi_hed_notifier(&ghes_notifier_sci); mutex_unlock(&ghes_list_mutex); break; + case ACPI_HEST_NOTIFY_SEA: + ghes_sea_remove(ghes); + break; case ACPI_HEST_NOTIFY_NMI: ghes_nmi_remove(ghes); break;