@@ -504,6 +504,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned in
return 0;
}
+EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
{
@@ -240,6 +240,7 @@ unsigned __kprobes long oops_begin(void)
bust_spinlocks(1);
return flags;
}
+EXPORT_SYMBOL_GPL(oops_begin);
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
{
@@ -12,10 +12,6 @@
* For more information about Generic Hardware Error Source, please
* refer to ACPI Specification version 4.0, section 17.3.2.6
*
- * Now, only SCI notification type and memory errors are
- * supported. More notification type and hardware error type will be
- * added later.
- *
* Copyright 2010 Intel Corp.
* Author: Huang Ying <ying.huang@intel.com>
*
@@ -39,15 +35,18 @@
#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/interrupt.h>
+#include <linux/timer.h>
#include <linux/cper.h>
#include <linux/kdebug.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
#include <acpi/apei.h>
#include <acpi/atomicio.h>
#include <acpi/hed.h>
#include <asm/mce.h>
+#include <asm/tlbflush.h>
#include "apei-internal.h"
@@ -56,42 +55,132 @@
#define GHES_ESTATUS_MAX_SIZE 65536
/*
- * One struct ghes is created for each generic hardware error
- * source.
- *
+ * One struct ghes is created for each generic hardware error source.
* It provides the context for APEI hardware error timer/IRQ/SCI/NMI
- * handler. Handler for one generic hardware error source is only
- * triggered after the previous one is done. So handler can uses
- * struct ghes without locking.
+ * handler.
*
* estatus: memory buffer for error status block, allocated during
* HEST parsing.
*/
#define GHES_TO_CLEAR 0x0001
+#define GHES_EXITING 0x0002
struct ghes {
struct acpi_hest_generic *generic;
struct acpi_hest_generic_status *estatus;
- struct list_head list;
u64 buffer_paddr;
unsigned long flags;
+ union {
+ struct list_head list;
+ struct timer_list timer;
+ unsigned int irq;
+ };
};
+static int ghes_panic_timeout __read_mostly = 30;
+
/*
- * Error source lists, one list for each notification method. The
- * members in lists are struct ghes.
+ * All error sources notified with SCI shares one notifier function,
+ * so they need to be linked and checked one by one. This is applied
+ * to NMI too.
*
- * The list members are only added in HEST parsing and deleted during
- * module_exit, that is, single-threaded. So no lock is needed for
- * that.
- *
- * But the mutual exclusion is needed between members adding/deleting
- * and timer/IRQ/SCI/NMI handler, which may traverse the list. RCU is
- * used for that.
+ * RCU is used for these lists, so ghes_list_mutex is only used for
+ * list changing, not for traversing.
*/
static LIST_HEAD(ghes_sci);
+static LIST_HEAD(ghes_nmi);
static DEFINE_MUTEX(ghes_list_mutex);
+/*
+ * NMI may be triggered on any CPU, so ghes_nmi_lock is used for
+ * mutual exclusion.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_nmi_lock);
+
+/*
+ * Because the memory area used to transfer hardware error information
+ * from BIOS to Linux can be determined only in NMI, IRQ or timer
+ * handler, but general ioremap can not be used in atomic context, so
+ * a special version of atomic ioremap is implemented for that.
+ */
+
+/*
+ * Two virtual pages are used, one for NMI context, the other for
+ * IRQ/PROCESS context
+ */
+#define GHES_IOREMAP_PAGES 2
+#define GHES_IOREMAP_NMI_PAGE(base) (base)
+#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE)
+
+/* virtual memory area for atomic ioremap */
+static struct vm_struct *ghes_ioremap_area;
+/*
+ * These 2 spinlock is used to prevent atomic ioremap virtual memory
+ * area from being mapped simultaneously.
+ */
+static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi);
+static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
+
+static int ghes_ioremap_init(void)
+{
+ ghes_ioremap_area = __get_vm_area(PAGE_SIZE * GHES_IOREMAP_PAGES,
+ VM_IOREMAP, VMALLOC_START, VMALLOC_END);
+ if (!ghes_ioremap_area) {
+ pr_err(
+GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void ghes_ioremap_exit(void)
+{
+ free_vm_area(ghes_ioremap_area);
+}
+
+static void __iomem *ghes_ioremap_pfn_nmi(u64 pfn)
+{
+ unsigned long vaddr;
+
+ vaddr = (unsigned long)GHES_IOREMAP_NMI_PAGE(ghes_ioremap_area->addr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+ pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ return (void __iomem *)vaddr;
+}
+
+static void __iomem *ghes_ioremap_pfn_irq(u64 pfn)
+{
+ unsigned long vaddr;
+
+ vaddr = (unsigned long)GHES_IOREMAP_IRQ_PAGE(ghes_ioremap_area->addr);
+ ioremap_page_range(vaddr, vaddr + PAGE_SIZE,
+ pfn << PAGE_SHIFT, PAGE_KERNEL);
+
+ return (void __iomem *)vaddr;
+}
+
+static void ghes_iounmap_nmi(void __iomem *vaddr_ptr)
+{
+ unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+ void *base = ghes_ioremap_area->addr;
+
+ BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base));
+ unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+ __flush_tlb_one(vaddr);
+}
+
+static void ghes_iounmap_irq(void __iomem *vaddr_ptr)
+{
+ unsigned long vaddr = (unsigned long __force)vaddr_ptr;
+ void *base = ghes_ioremap_area->addr;
+
+ BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base));
+ unmap_kernel_range_noflush(vaddr, PAGE_SIZE);
+ __flush_tlb_one(vaddr);
+}
+
static struct ghes *ghes_new(struct acpi_hest_generic *generic)
{
struct ghes *ghes;
@@ -102,15 +191,14 @@ static struct ghes *ghes_new(struct acpi
if (!ghes)
return ERR_PTR(-ENOMEM);
ghes->generic = generic;
- INIT_LIST_HEAD(&ghes->list);
rc = acpi_pre_map_gar(&generic->error_status_address);
if (rc)
goto err_free;
error_block_length = generic->error_block_length;
if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
- pr_warning(FW_WARN GHES_PFX
- "Error status block length is too long: %u for "
- "generic hardware error source: %d.\n",
+ pr_warning(
+FW_WARN GHES_PFX "Error status block length is too long: %u for \n"
+"generic hardware error source: %d.\n",
error_block_length, generic->header.source_id);
error_block_length = GHES_ESTATUS_MAX_SIZE;
}
@@ -159,22 +247,41 @@ static inline int ghes_severity(int seve
}
}
-/* SCI handler run in work queue, so ioremap can be used here */
-static int ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
- int from_phys)
+static void ghes_copy_tofrom_phys(void *buffer, u64 paddr, u32 len,
+ int from_phys)
{
- void *vaddr;
-
- vaddr = ioremap_cache(paddr, len);
- if (!vaddr)
- return -ENOMEM;
- if (from_phys)
- memcpy(buffer, vaddr, len);
- else
- memcpy(vaddr, buffer, len);
- iounmap(vaddr);
-
- return 0;
+ void __iomem *vaddr;
+ unsigned long flags = 0;
+ int in_nmi = in_nmi();
+ u64 offset;
+ u32 trunk;
+
+ while (len > 0) {
+ offset = paddr - (paddr & PAGE_MASK);
+ if (in_nmi) {
+ raw_spin_lock(&ghes_ioremap_lock_nmi);
+ vaddr = ghes_ioremap_pfn_nmi(paddr >> PAGE_SHIFT);
+ } else {
+ spin_lock_irqsave(&ghes_ioremap_lock_irq, flags);
+ vaddr = ghes_ioremap_pfn_irq(paddr >> PAGE_SHIFT);
+ }
+ trunk = PAGE_SIZE - offset;
+ trunk = min(trunk, len);
+ if (from_phys)
+ memcpy_fromio(buffer, vaddr + offset, trunk);
+ else
+ memcpy_toio(vaddr + offset, buffer, trunk);
+ len -= trunk;
+ paddr += trunk;
+ buffer += trunk;
+ if (in_nmi) {
+ ghes_iounmap_nmi(vaddr);
+ raw_spin_unlock(&ghes_ioremap_lock_nmi);
+ } else {
+ ghes_iounmap_irq(vaddr);
+ spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
+ }
+ }
}
static int ghes_read_estatus(struct ghes *ghes, int silent)
@@ -187,18 +294,17 @@ static int ghes_read_estatus(struct ghes
rc = acpi_atomic_read(&buf_paddr, &g->error_status_address);
if (rc) {
if (!silent && printk_ratelimit())
- pr_warning(FW_WARN GHES_PFX
-"Failed to read error status block address for hardware error source: %d.\n",
+ pr_warning(
+FW_WARN GHES_PFX "Failed to read error status block address for\n"
+"hardware error source: %d.\n",
g->header.source_id);
return -EIO;
}
if (!buf_paddr)
return -ENOENT;
- rc = ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
- sizeof(*ghes->estatus), 1);
- if (rc)
- return rc;
+ ghes_copy_tofrom_phys(ghes->estatus, buf_paddr,
+ sizeof(*ghes->estatus), 1);
if (!ghes->estatus->block_status)
return -ENOENT;
@@ -213,17 +319,15 @@ static int ghes_read_estatus(struct ghes
goto err_read_block;
if (apei_estatus_check_header(ghes->estatus))
goto err_read_block;
- rc = ghes_copy_tofrom_phys(ghes->estatus + 1,
- buf_paddr + sizeof(*ghes->estatus),
- len - sizeof(*ghes->estatus), 1);
- if (rc)
- return rc;
+ ghes_copy_tofrom_phys(ghes->estatus + 1,
+ buf_paddr + sizeof(*ghes->estatus),
+ len - sizeof(*ghes->estatus), 1);
if (apei_estatus_check(ghes->estatus))
goto err_read_block;
rc = 0;
err_read_block:
- if (rc && !silent)
+ if (rc && !silent && printk_ratelimit())
pr_warning(FW_WARN GHES_PFX
"Failed to read error status block!\n");
return rc;
@@ -293,6 +397,44 @@ out:
return 0;
}
+static void ghes_add_timer(struct ghes *ghes)
+{
+ struct acpi_hest_generic *g = ghes->generic;
+ unsigned long expire;
+
+ if (!g->notify.poll_interval) {
+ pr_warning(
+FW_WARN GHES_PFX "Poll interval is 0 for generic hardware\n"
+"error source: %d, disabled.",
+ g->header.source_id);
+ return;
+ }
+ expire = jiffies + msecs_to_jiffies(g->notify.poll_interval);
+ ghes->timer.expires = round_jiffies_relative(expire);
+ add_timer(&ghes->timer);
+}
+
+static void ghes_poll_func(unsigned long data)
+{
+ struct ghes *ghes = (void *)data;
+
+ ghes_proc(ghes);
+ if (!(ghes->flags & GHES_EXITING))
+ ghes_add_timer(ghes);
+}
+
+static irqreturn_t ghes_irq_func(int irq, void *data)
+{
+ struct ghes *ghes = data;
+ int rc;
+
+ rc = ghes_proc(ghes);
+ if (rc)
+ return IRQ_NONE;
+
+ return IRQ_HANDLED;
+}
+
static int ghes_notify_sci(struct notifier_block *this,
unsigned long event, void *data)
{
@@ -309,10 +451,63 @@ static int ghes_notify_sci(struct notifi
return ret;
}
+static int ghes_notify_nmi(struct notifier_block *this,
+ unsigned long cmd, void *data)
+{
+ struct ghes *ghes, *ghes_global = NULL;
+ int sev, sev_global = -1;
+ int ret = NOTIFY_DONE;
+
+ if (cmd != DIE_NMI && cmd != DIE_NMI_IPI)
+ return ret;
+
+ raw_spin_lock(&ghes_nmi_lock);
+ list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+ if (ghes_read_estatus(ghes, 1)) {
+ ghes_clear_estatus(ghes);
+ continue;
+ }
+ sev = ghes_severity(ghes->estatus->error_severity);
+ if (sev > sev_global) {
+ sev_global = sev;
+ ghes_global = ghes;
+ }
+ ret = NOTIFY_STOP;
+ }
+
+ if (ret == NOTIFY_DONE)
+ goto out;
+
+ if (sev_global >= GHES_SEV_PANIC) {
+ oops_begin();
+ ghes_print_estatus(KERN_EMERG HW_ERR, ghes_global);
+ /* reboot to log the error! */
+ if (panic_timeout == 0)
+ panic_timeout = ghes_panic_timeout;
+ panic("Fatal hardware error!");
+ }
+
+ list_for_each_entry_rcu(ghes, &ghes_nmi, list) {
+ if (!(ghes->flags & GHES_TO_CLEAR))
+ continue;
+ /* Do not print estatus because printk is not NMI safe */
+ ghes_do_proc(ghes);
+ ghes_clear_estatus(ghes);
+ }
+
+out:
+ raw_spin_unlock(&ghes_nmi_lock);
+ return ret;
+}
+
static struct notifier_block ghes_notifier_sci = {
.notifier_call = ghes_notify_sci,
};
+static struct notifier_block ghes_notifier_nmi = {
+ .notifier_call = ghes_notify_nmi,
+};
+
static int __devinit ghes_probe(struct platform_device *ghes_dev)
{
struct acpi_hest_generic *generic;
@@ -323,18 +518,33 @@ static int __devinit ghes_probe(struct p
if (!generic->enabled)
return -ENODEV;
- if (generic->error_block_length <
- sizeof(struct acpi_hest_generic_status)) {
- pr_warning(FW_BUG GHES_PFX
-"Invalid error block length: %u for generic hardware error source: %d\n",
- generic->error_block_length,
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ case ACPI_HEST_NOTIFY_SCI:
+ case ACPI_HEST_NOTIFY_NMI:
+ break;
+ case ACPI_HEST_NOTIFY_LOCAL:
+ pr_warning(
+GHES_PFX "Generic hardware error source: %d notified via local interrupt\n"
+"is not supported!\n",
generic->header.source_id);
goto err;
+ default:
+ pr_warning(
+FW_WARN GHES_PFX "Unknown notification type: %u for generic hardware\n"
+"error source: %d\n",
+ generic->notify.type, generic->header.source_id);
+ goto err;
}
- if (generic->records_to_preallocate == 0) {
- pr_warning(FW_BUG GHES_PFX
-"Invalid records to preallocate: %u for generic hardware error source: %d\n",
- generic->records_to_preallocate,
+
+ rc = -EIO;
+ if (generic->error_block_length <
+ sizeof(struct acpi_hest_generic_status)) {
+ pr_warning(
+FW_BUG GHES_PFX "Invalid error block length: %u for generic hardware\n"
+"error source: %d\n",
+ generic->error_block_length,
generic->header.source_id);
goto err;
}
@@ -344,38 +554,45 @@ static int __devinit ghes_probe(struct p
ghes = NULL;
goto err;
}
- if (generic->notify.type == ACPI_HEST_NOTIFY_SCI) {
+ switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ ghes->timer.function = ghes_poll_func;
+ ghes->timer.data = (unsigned long)ghes;
+ init_timer_deferrable(&ghes->timer);
+ ghes_add_timer(ghes);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ /* External interrupt vector is GSI */
+ if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
+ pr_err(
+GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err;
+ }
+ if (request_irq(ghes->irq, ghes_irq_func,
+ 0, "GHES IRQ", ghes)) {
+ pr_err(
+GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
+ generic->header.source_id);
+ goto err;
+ }
+ break;
+ case ACPI_HEST_NOTIFY_SCI:
mutex_lock(&ghes_list_mutex);
if (list_empty(&ghes_sci))
register_acpi_hed_notifier(&ghes_notifier_sci);
list_add_rcu(&ghes->list, &ghes_sci);
mutex_unlock(&ghes_list_mutex);
- } else {
- unsigned char *notify = NULL;
-
- switch (generic->notify.type) {
- case ACPI_HEST_NOTIFY_POLLED:
- notify = "POLL";
- break;
- case ACPI_HEST_NOTIFY_EXTERNAL:
- case ACPI_HEST_NOTIFY_LOCAL:
- notify = "IRQ";
- break;
- case ACPI_HEST_NOTIFY_NMI:
- notify = "NMI";
- break;
- }
- if (notify) {
- pr_warning(GHES_PFX
-"Generic hardware error source: %d notified via %s is not supported!\n",
- generic->header.source_id, notify);
- } else {
- pr_warning(FW_WARN GHES_PFX
-"Unknown notification type: %u for generic hardware error source: %d\n",
- generic->notify.type, generic->header.source_id);
- }
- rc = -ENODEV;
- goto err;
+ break;
+ case ACPI_HEST_NOTIFY_NMI:
+ mutex_lock(&ghes_list_mutex);
+ if (list_empty(&ghes_nmi))
+ register_die_notifier(&ghes_notifier_nmi);
+ list_add_rcu(&ghes->list, &ghes_nmi);
+ mutex_unlock(&ghes_list_mutex);
+ break;
+ default:
+ BUG();
}
platform_set_drvdata(ghes_dev, ghes);
@@ -396,7 +613,14 @@ static int __devexit ghes_remove(struct
ghes = platform_get_drvdata(ghes_dev);
generic = ghes->generic;
+ ghes->flags |= GHES_EXITING;
switch (generic->notify.type) {
+ case ACPI_HEST_NOTIFY_POLLED:
+ del_timer_sync(&ghes->timer);
+ break;
+ case ACPI_HEST_NOTIFY_EXTERNAL:
+ free_irq(ghes->irq, ghes);
+ break;
case ACPI_HEST_NOTIFY_SCI:
mutex_lock(&ghes_list_mutex);
list_del_rcu(&ghes->list);
@@ -404,12 +628,23 @@ static int __devexit ghes_remove(struct
unregister_acpi_hed_notifier(&ghes_notifier_sci);
mutex_unlock(&ghes_list_mutex);
break;
+ case ACPI_HEST_NOTIFY_NMI:
+ mutex_lock(&ghes_list_mutex);
+ list_del_rcu(&ghes->list);
+ if (list_empty(&ghes_nmi))
+ unregister_die_notifier(&ghes_notifier_nmi);
+ mutex_unlock(&ghes_list_mutex);
+ /*
+ * To synchronize with NMI handler, ghes can only be
+ * freed after NMI handler finishes.
+ */
+ synchronize_rcu();
+ break;
default:
BUG();
break;
}
- synchronize_rcu();
ghes_fini(ghes);
kfree(ghes);
@@ -429,6 +664,8 @@ static struct platform_driver ghes_platf
static int __init ghes_init(void)
{
+ int rc;
+
if (acpi_disabled)
return -ENODEV;
@@ -437,12 +674,25 @@ static int __init ghes_init(void)
return -EINVAL;
}
- return platform_driver_register(&ghes_platform_driver);
+ rc = ghes_ioremap_init();
+ if (rc)
+ goto err;
+
+ rc = platform_driver_register(&ghes_platform_driver);
+ if (rc)
+ goto err_ioremap_exit;
+
+ return 0;
+err_ioremap_exit:
+ ghes_ioremap_exit();
+err:
+ return rc;
}
static void __exit ghes_exit(void)
{
platform_driver_unregister(&ghes_platform_driver);
+ ghes_ioremap_exit();
}
module_init(ghes_init);
@@ -34,6 +34,7 @@ static int pause_on_oops_flag;
static DEFINE_SPINLOCK(pause_on_oops_lock);
int panic_timeout;
+EXPORT_SYMBOL_GPL(panic_timeout);
ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/io.h>
+#include <linux/module.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>
@@ -90,3 +91,4 @@ int ioremap_page_range(unsigned long add
return err;
}
+EXPORT_SYMBOL_GPL(ioremap_page_range);
@@ -1175,6 +1175,7 @@ void unmap_kernel_range_noflush(unsigned
{
vunmap_page_range(addr, addr + size);
}
+EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
/**
* unmap_kernel_range - unmap kernel VM area and flush cache and TLB