diff mbox series

[RFC,4/4] trace, ras: add ARM RAS extension trace event

Message ID 1562086280-5351-5-git-send-email-baicar@os.amperecomputing.com (mailing list archive)
State RFC, archived
Headers show
Series ARM Error Source Table Support | expand

Commit Message

Tyler Baicar July 2, 2019, 4:52 p.m. UTC
Add a trace event for hardware errors reported by the ARMv8.2
RAS extension registers.

Signed-off-by: Tyler Baicar <baicar@os.amperecomputing.com>
---
 arch/arm64/kernel/ras.c   |  3 +++
 drivers/acpi/arm64/aest.c |  4 ++++
 include/ras/ras_event.h   | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+)
diff mbox series

Patch

diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c
index ca47efa..4e34d63 100644
--- a/arch/arm64/kernel/ras.c
+++ b/arch/arm64/kernel/ras.c
@@ -5,6 +5,7 @@ 
 #include <linux/smp.h>
 
 #include <asm/ras.h>
+#include <ras/ras_event.h>
 
 void arch_arm_ras_report_error(void)
 {
@@ -50,6 +51,8 @@  void arch_arm_ras_report_error(void)
 			       regs.err_misc1);
 		}
 
+		trace_arm_ras_ext_event(0, cpu_num, &regs);
+
 		/*
 		 * In the future, we will treat UER conditions as potentially
 		 * recoverable.
diff --git a/drivers/acpi/arm64/aest.c b/drivers/acpi/arm64/aest.c
index fd4f3b5..21ec583 100644
--- a/drivers/acpi/arm64/aest.c
+++ b/drivers/acpi/arm64/aest.c
@@ -13,6 +13,7 @@ 
 #include <linux/ratelimit.h>
 
 #include <asm/ras.h>
+#include <ras/ras_event.h>
 
 #undef pr_fmt
 #define pr_fmt(fmt) "ACPI AEST: " fmt
@@ -102,6 +103,9 @@  static void aest_proc(struct aest_node_data *data)
 
 		aest_print(data, regs, i);
 
+		trace_arm_ras_ext_event(data->node_type, data->data.proc.id,
+					&regs);
+
 		if (regs.err_status & ERR_STATUS_UE)
 			fatal = true;
 
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 36c5c5e..8b76cb1 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -339,6 +339,52 @@ 
 );
 
 /*
+ * ARM RAS Extension Events Report
+ *
+ * This event is generated when an error reported by the ARM RAS extension
+ * hardware is detected.
+ */
+
+#ifdef CONFIG_ARM64
+#include <asm/ras.h>
+TRACE_EVENT(arm_ras_ext_event,
+
+	TP_PROTO(u8 type, u32 id, struct ras_ext_regs *regs),
+
+	TP_ARGS(type, id, regs),
+
+	TP_STRUCT__entry(
+		__field(u8,  type)
+		__field(u32, id)
+		__field(u64, err_fr)
+		__field(u64, err_ctlr)
+		__field(u64, err_status)
+		__field(u64, err_addr)
+		__field(u64, err_misc0)
+		__field(u64, err_misc1)
+	),
+
+	TP_fast_assign(
+		__entry->type = type;
+		__entry->id = id;
+		__entry->err_fr = regs->err_fr;
+		__entry->err_ctlr = regs->err_ctlr;
+		__entry->err_status = regs->err_status;
+		__entry->err_addr = regs->err_addr;
+		__entry->err_misc0 = regs->err_misc0;
+		__entry->err_misc1 = regs->err_misc1;
+	),
+
+	TP_printk("type: %d; id: %d; ERR_FR: %llx; ERR_CTLR: %llx; "
+		  "ERR_STATUS: %llx; ERR_ADDR: %llx; ERR_MISC0: %llx; "
+		  "ERR_MISC1: %llx",
+		  __entry->type, __entry->id, __entry->err_fr,
+		  __entry->err_ctlr, __entry->err_status, __entry->err_addr,
+		  __entry->err_misc0, __entry->err_misc1)
+);
+#endif
+
+/*
  * memory-failure recovery action result event
  *
  * unsigned long pfn -	Page Frame Number of the corrupted page