Message ID | 20210205022229.313030-1-jason@os.amperecomputing.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] arm64/ras: Update code to trace out more data for ARM processor | expand |
Hi All, Can I get some comments from you for my patch? Thanks Jason -----Original Message----- From: Jason Tian OS <jason@os.amperecomputing.com> Sent: Friday, February 5, 2021 10:23 AM To: linux-kernel@vger.kernel.org; linux-edac@vger.kernel.org; linux-arm-kernel@lists.infradead.org; james.morse@arm.com; Tyler Baicar OS <baicar@os.amperecomputing.com> Cc: Frank Wang <zwang@amperecomputing.com>; Jason Tian OS <jason@os.amperecomputing.com> Subject: [PATCH v2] arm64/ras: Update code to trace out more data for ARM processor The original arm_event trace code only traces out ARM processor error information data. According to UEFI_2_8_A_Feb14 specification chapter N2.4.4, the ARM processor error section includes several ARM processor error information, several ARM processor context information and several vendor specific error information structures. Add code to trace out all ARM processor context information and vendor specific error information with raw hex format. Signed-off-by: Jason Tian <jason@os.amperecomputing.com> --- drivers/ras/ras.c | 22 +++++++++++++++++++++- include/ras/ras_event.h | 41 +++++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 95540ea8dd9d..6f3269da9476 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -23,7 +23,27 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, void log_arm_hw_error(struct cper_sec_proc_arm *err) { - trace_arm_event(err); + u32 pei_len; + u32 ctx_len; + u32 vsei_len; + u8 *pei_err; + u8 *ctx_err; + u8 *ven_err_data; + + pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; + pei_err = (u8 *) err + sizeof(struct cper_sec_proc_arm); + + ctx_len = sizeof(struct cper_arm_ctx_info) * err->context_info_num; + ctx_err = pei_err + sizeof(struct cper_arm_err_info) * + err->err_info_num; + + vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + + pei_len + ctx_len); + ven_err_data = ctx_err + sizeof(struct cper_arm_ctx_info) * + err->context_info_num; + + trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, + ven_err_data, vsei_len); } static int __init ras_init(void) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 0bdbc0d17d2f..fd9201214be8 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -168,11 +168,22 @@ TRACE_EVENT(mc_event, * This event is generated when hardware detects an ARM processor error * has occurred. UEFI 2.6 spec section N.2.4.4. */ + #define APEIL "ARM Processor Err Info data len" + #define APEID "ARM Processor Err Info raw data" + #define APECIL "ARM Processor Err Context Info data len" + #define APECID "ARM Processor Err Context Info raw data" + #define VSEIL "Vendor Specific Err Info data len" + #define VSEID "Vendor Specific Err Info raw data" TRACE_EVENT(arm_event, - TP_PROTO(const struct cper_sec_proc_arm *proc), + TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, + const u32 pei_len, + const u8 *ctx_err, + const u32 ctx_len, + const u8 *oem, + const u32 oem_len), - TP_ARGS(proc), + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len), TP_STRUCT__entry( __field(u64, mpidr) @@ -180,6 +191,12 @@ TRACE_EVENT(arm_event, __field(u32, running_state) __field(u32, psci_state) __field(u8, affinity) + __field(u32, pei_len) + __dynamic_array(u8, buf, pei_len) + __field(u32, ctx_len) + __dynamic_array(u8, buf1, ctx_len) + __field(u32, oem_len) + __dynamic_array(u8, buf2, oem_len) ), TP_fast_assign( @@ -199,12 +216,24 @@ TRACE_EVENT(arm_event, __entry->running_state = ~0; __entry->psci_state = ~0; } + __entry->pei_len = pei_len; + memcpy(__get_dynamic_array(buf), pei_err, pei_len); + __entry->ctx_len = ctx_len; + memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); + __entry->oem_len = oem_len; + memcpy(__get_dynamic_array(buf2), oem, oem_len); ), - TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " - "running state: %d; PSCI state: %d", - __entry->affinity, __entry->mpidr, __entry->midr, - __entry->running_state, __entry->psci_state) + TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; running state: %d; " + "PSCI state: %d; %s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s", + __entry->affinity, __entry->mpidr, __entry->midr, + __entry->running_state, __entry->psci_state, + APEIL, __entry->pei_len, APEID, + __print_hex(__get_dynamic_array(buf), __entry->pei_len), + APECIL, __entry->ctx_len, APECID, + __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), + VSEIL, __entry->oem_len, VSEID, + __print_hex(__get_dynamic_array(buf2), __entry->oem_len)) ); /* -- 2.25.1
>-----Original Message----- >From: linux-arm-kernel [mailto:linux-arm-kernel-bounces@lists.infradead.org] >On Behalf Of Jason Tian >Sent: 05 February 2021 02:22 >To: linux-kernel@vger.kernel.org; linux-edac@vger.kernel.org; linux-arm- >kernel@lists.infradead.org; james.morse@arm.com; >baicar@os.amperecomputing.com >Cc: zwang@amperecomputing.com; jason@os.amperecomputing.com >Subject: [PATCH v2] arm64/ras: Update code to trace out more data for ARM >processor > >The original arm_event trace code only traces out ARM processor error >information data. According to UEFI_2_8_A_Feb14 specification chapter >N2.4.4, the ARM processor error section includes several ARM processor >error information, several ARM processor context information and several >vendor specific error information structures. > >Add code to trace out all ARM processor context information and vendor >specific error information with raw hex format. > >Signed-off-by: Jason Tian <jason@os.amperecomputing.com> >--- > drivers/ras/ras.c | 22 +++++++++++++++++++++- > include/ras/ras_event.h | 41 +++++++++++++++++++++++++++++++++++--- >--- > 2 files changed, 56 insertions(+), 7 deletions(-) > Tested-by: Shiju Jose <shiju.jose@huawei.com> >diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index >95540ea8dd9d..6f3269da9476 100644 >--- a/drivers/ras/ras.c >+++ b/drivers/ras/ras.c >@@ -23,7 +23,27 @@ void log_non_standard_event(const guid_t >*sec_type, const guid_t *fru_id, > > void log_arm_hw_error(struct cper_sec_proc_arm *err) { >- trace_arm_event(err); >+ u32 pei_len; >+ u32 ctx_len; >+ u32 vsei_len; >+ u8 *pei_err; >+ u8 *ctx_err; >+ u8 *ven_err_data; >+ >+ pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; >+ pei_err = (u8 *) err + sizeof(struct cper_sec_proc_arm); >+ >+ ctx_len = sizeof(struct cper_arm_ctx_info) * err->context_info_num; >+ ctx_err = pei_err + sizeof(struct cper_arm_err_info) * >+ err->err_info_num; >+ >+ vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + >+ pei_len + ctx_len); >+ ven_err_data = ctx_err + sizeof(struct cper_arm_ctx_info) * >+ err->context_info_num; >+ >+ trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, >+ ven_err_data, vsei_len); > } > > static int __init ras_init(void) >diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index >0bdbc0d17d2f..fd9201214be8 100644 >--- a/include/ras/ras_event.h >+++ b/include/ras/ras_event.h >@@ -168,11 +168,22 @@ TRACE_EVENT(mc_event, > * This event is generated when hardware detects an ARM processor error > * has occurred. UEFI 2.6 spec section N.2.4.4. > */ >+ #define APEIL "ARM Processor Err Info data len" >+ #define APEID "ARM Processor Err Info raw data" >+ #define APECIL "ARM Processor Err Context Info data len" >+ #define APECID "ARM Processor Err Context Info raw data" >+ #define VSEIL "Vendor Specific Err Info data len" >+ #define VSEID "Vendor Specific Err Info raw data" > TRACE_EVENT(arm_event, > >- TP_PROTO(const struct cper_sec_proc_arm *proc), >+ TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, >+ const u32 pei_len, >+ const u8 *ctx_err, >+ const u32 ctx_len, >+ const u8 *oem, >+ const u32 oem_len), > >- TP_ARGS(proc), >+ TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len), > > TP_STRUCT__entry( > __field(u64, mpidr) >@@ -180,6 +191,12 @@ TRACE_EVENT(arm_event, > __field(u32, running_state) > __field(u32, psci_state) > __field(u8, affinity) >+ __field(u32, pei_len) >+ __dynamic_array(u8, buf, pei_len) >+ __field(u32, ctx_len) >+ __dynamic_array(u8, buf1, ctx_len) >+ __field(u32, oem_len) >+ __dynamic_array(u8, buf2, oem_len) > ), > > TP_fast_assign( >@@ -199,12 +216,24 @@ TRACE_EVENT(arm_event, > __entry->running_state = ~0; > __entry->psci_state = ~0; > } >+ __entry->pei_len = pei_len; >+ memcpy(__get_dynamic_array(buf), pei_err, pei_len); >+ __entry->ctx_len = ctx_len; >+ memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); >+ __entry->oem_len = oem_len; >+ memcpy(__get_dynamic_array(buf2), oem, oem_len); > ), > >- TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " >- "running state: %d; PSCI state: %d", >- __entry->affinity, __entry->mpidr, __entry->midr, >- __entry->running_state, __entry->psci_state) >+ TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; >running state: %d; " >+ "PSCI state: %d; %s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: >%s", >+ __entry->affinity, __entry->mpidr, __entry->midr, >+ __entry->running_state, __entry->psci_state, >+ APEIL, __entry->pei_len, APEID, >+ __print_hex(__get_dynamic_array(buf), __entry->pei_len), >+ APECIL, __entry->ctx_len, APECID, >+ __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), >+ VSEIL, __entry->oem_len, VSEID, >+ __print_hex(__get_dynamic_array(buf2), __entry- >>oem_len)) > ); > > /* >-- >2.25.1 > > >_______________________________________________ >linux-arm-kernel mailing list >linux-arm-kernel@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/linux-arm-kernel Thanks, Shiju
Hi All, May I know whether this patch can be merged or not? Thanks. Jason -----Original Message----- From: Shiju Jose <shiju.jose@huawei.com> Sent: Thursday, February 25, 2021 1:44 AM To: Jason Tian OS <jason@os.amperecomputing.com>; linux-kernel@vger.kernel.org; linux-edac@vger.kernel.org; linux-arm-kernel@lists.infradead.org; james.morse@arm.com; Tyler Baicar OS <baicar@os.amperecomputing.com> Cc: Frank Wang <zwang@amperecomputing.com> Subject: RE: [PATCH v2] arm64/ras: Update code to trace out more data for ARM processor >-----Original Message----- >From: linux-arm-kernel >[mailto:linux-arm-kernel-bounces@lists.infradead.org] >On Behalf Of Jason Tian >Sent: 05 February 2021 02:22 >To: linux-kernel@vger.kernel.org; linux-edac@vger.kernel.org; >linux-arm- kernel@lists.infradead.org; james.morse@arm.com; >baicar@os.amperecomputing.com >Cc: zwang@amperecomputing.com; jason@os.amperecomputing.com >Subject: [PATCH v2] arm64/ras: Update code to trace out more data for >ARM processor > >The original arm_event trace code only traces out ARM processor error >information data. According to UEFI_2_8_A_Feb14 specification chapter >N2.4.4, the ARM processor error section includes several ARM processor >error information, several ARM processor context information and >several vendor specific error information structures. > >Add code to trace out all ARM processor context information and vendor >specific error information with raw hex format. > >Signed-off-by: Jason Tian <jason@os.amperecomputing.com> >--- > drivers/ras/ras.c | 22 +++++++++++++++++++++- > include/ras/ras_event.h | 41 +++++++++++++++++++++++++++++++++++--- >--- > 2 files changed, 56 insertions(+), 7 deletions(-) > Tested-by: Shiju Jose <shiju.jose@huawei.com> >diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index >95540ea8dd9d..6f3269da9476 100644 >--- a/drivers/ras/ras.c >+++ b/drivers/ras/ras.c >@@ -23,7 +23,27 @@ void log_non_standard_event(const guid_t *sec_type, >const guid_t *fru_id, > > void log_arm_hw_error(struct cper_sec_proc_arm *err) { >- trace_arm_event(err); >+ u32 pei_len; >+ u32 ctx_len; >+ u32 vsei_len; >+ u8 *pei_err; >+ u8 *ctx_err; >+ u8 *ven_err_data; >+ >+ pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; >+ pei_err = (u8 *) err + sizeof(struct cper_sec_proc_arm); >+ >+ ctx_len = sizeof(struct cper_arm_ctx_info) * err->context_info_num; >+ ctx_err = pei_err + sizeof(struct cper_arm_err_info) * >+ err->err_info_num; >+ >+ vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + >+ pei_len + ctx_len); >+ ven_err_data = ctx_err + sizeof(struct cper_arm_ctx_info) * >+ err->context_info_num; >+ >+ trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, >+ ven_err_data, vsei_len); > } > > static int __init ras_init(void) >diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index >0bdbc0d17d2f..fd9201214be8 100644 >--- a/include/ras/ras_event.h >+++ b/include/ras/ras_event.h >@@ -168,11 +168,22 @@ TRACE_EVENT(mc_event, > * This event is generated when hardware detects an ARM processor >error > * has occurred. UEFI 2.6 spec section N.2.4.4. > */ >+ #define APEIL "ARM Processor Err Info data len" >+ #define APEID "ARM Processor Err Info raw data" >+ #define APECIL "ARM Processor Err Context Info data len" >+ #define APECID "ARM Processor Err Context Info raw data" >+ #define VSEIL "Vendor Specific Err Info data len" >+ #define VSEID "Vendor Specific Err Info raw data" > TRACE_EVENT(arm_event, > >- TP_PROTO(const struct cper_sec_proc_arm *proc), >+ TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, >+ const u32 pei_len, >+ const u8 *ctx_err, >+ const u32 ctx_len, >+ const u8 *oem, >+ const u32 oem_len), > >- TP_ARGS(proc), >+ TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len), > > TP_STRUCT__entry( > __field(u64, mpidr) >@@ -180,6 +191,12 @@ TRACE_EVENT(arm_event, > __field(u32, running_state) > __field(u32, psci_state) > __field(u8, affinity) >+ __field(u32, pei_len) >+ __dynamic_array(u8, buf, pei_len) >+ __field(u32, ctx_len) >+ __dynamic_array(u8, buf1, ctx_len) >+ __field(u32, oem_len) >+ __dynamic_array(u8, buf2, oem_len) > ), > > TP_fast_assign( >@@ -199,12 +216,24 @@ TRACE_EVENT(arm_event, > __entry->running_state = ~0; > __entry->psci_state = ~0; > } >+ __entry->pei_len = pei_len; >+ memcpy(__get_dynamic_array(buf), pei_err, pei_len); >+ __entry->ctx_len = ctx_len; >+ memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); >+ __entry->oem_len = oem_len; >+ memcpy(__get_dynamic_array(buf2), oem, oem_len); > ), > >- TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " >- "running state: %d; PSCI state: %d", >- __entry->affinity, __entry->mpidr, __entry->midr, >- __entry->running_state, __entry->psci_state) >+ TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; >running state: %d; " >+ "PSCI state: %d; %s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: >%s", >+ __entry->affinity, __entry->mpidr, __entry->midr, >+ __entry->running_state, __entry->psci_state, >+ APEIL, __entry->pei_len, APEID, >+ __print_hex(__get_dynamic_array(buf), __entry->pei_len), >+ APECIL, __entry->ctx_len, APECID, >+ __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), >+ VSEIL, __entry->oem_len, VSEID, >+ __print_hex(__get_dynamic_array(buf2), __entry- >>oem_len)) > ); > > /* >-- >2.25.1 > > >_______________________________________________ >linux-arm-kernel mailing list >linux-arm-kernel@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/linux-arm-kernel Thanks, Shiju
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 95540ea8dd9d..6f3269da9476 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -23,7 +23,27 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, void log_arm_hw_error(struct cper_sec_proc_arm *err) { - trace_arm_event(err); + u32 pei_len; + u32 ctx_len; + u32 vsei_len; + u8 *pei_err; + u8 *ctx_err; + u8 *ven_err_data; + + pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; + pei_err = (u8 *) err + sizeof(struct cper_sec_proc_arm); + + ctx_len = sizeof(struct cper_arm_ctx_info) * err->context_info_num; + ctx_err = pei_err + sizeof(struct cper_arm_err_info) * + err->err_info_num; + + vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + + pei_len + ctx_len); + ven_err_data = ctx_err + sizeof(struct cper_arm_ctx_info) * + err->context_info_num; + + trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, + ven_err_data, vsei_len); } static int __init ras_init(void) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 0bdbc0d17d2f..fd9201214be8 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -168,11 +168,22 @@ TRACE_EVENT(mc_event, * This event is generated when hardware detects an ARM processor error * has occurred. UEFI 2.6 spec section N.2.4.4. */ + #define APEIL "ARM Processor Err Info data len" + #define APEID "ARM Processor Err Info raw data" + #define APECIL "ARM Processor Err Context Info data len" + #define APECID "ARM Processor Err Context Info raw data" + #define VSEIL "Vendor Specific Err Info data len" + #define VSEID "Vendor Specific Err Info raw data" TRACE_EVENT(arm_event, - TP_PROTO(const struct cper_sec_proc_arm *proc), + TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, + const u32 pei_len, + const u8 *ctx_err, + const u32 ctx_len, + const u8 *oem, + const u32 oem_len), - TP_ARGS(proc), + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len), TP_STRUCT__entry( __field(u64, mpidr) @@ -180,6 +191,12 @@ TRACE_EVENT(arm_event, __field(u32, running_state) __field(u32, psci_state) __field(u8, affinity) + __field(u32, pei_len) + __dynamic_array(u8, buf, pei_len) + __field(u32, ctx_len) + __dynamic_array(u8, buf1, ctx_len) + __field(u32, oem_len) + __dynamic_array(u8, buf2, oem_len) ), TP_fast_assign( @@ -199,12 +216,24 @@ TRACE_EVENT(arm_event, __entry->running_state = ~0; __entry->psci_state = ~0; } + __entry->pei_len = pei_len; + memcpy(__get_dynamic_array(buf), pei_err, pei_len); + __entry->ctx_len = ctx_len; + memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); + __entry->oem_len = oem_len; + memcpy(__get_dynamic_array(buf2), oem, oem_len); ), - TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " - "running state: %d; PSCI state: %d", - __entry->affinity, __entry->mpidr, __entry->midr, - __entry->running_state, __entry->psci_state) + TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; running state: %d; " + "PSCI state: %d; %s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s", + __entry->affinity, __entry->mpidr, __entry->midr, + __entry->running_state, __entry->psci_state, + APEIL, __entry->pei_len, APEID, + __print_hex(__get_dynamic_array(buf), __entry->pei_len), + APECIL, __entry->ctx_len, APECID, + __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), + VSEIL, __entry->oem_len, VSEID, + __print_hex(__get_dynamic_array(buf2), __entry->oem_len)) ); /*
The original arm_event trace code only traces out ARM processor error information data. According to UEFI_2_8_A_Feb14 specification chapter N2.4.4, the ARM processor error section includes several ARM processor error information, several ARM processor context information and several vendor specific error information structures. Add code to trace out all ARM processor context information and vendor specific error information with raw hex format. Signed-off-by: Jason Tian <jason@os.amperecomputing.com> --- drivers/ras/ras.c | 22 +++++++++++++++++++++- include/ras/ras_event.h | 41 +++++++++++++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 7 deletions(-)