Message ID | 20200825144710.23584-2-Smita.KoralahalliChannabasappa@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Decode raw MSR values of MCA registers in BERT | expand |
Hi Smita, Thank you for the patch! Yet something to improve: [auto build test ERROR on tip/x86/core] [also build test ERROR on tip/auto-latest efi/next v5.9-rc2 next-20200825] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Smita-Koralahalli/Decode-raw-MSR-values-of-MCA-registers-in-BERT/20200825-224828 base: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git ef2ff0f5d6008d325c9a068e20981c0d0acc4d6b config: x86_64-randconfig-r024-20200826 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce (this is a W=1 build): # save the attached .config to linux build tree make W=1 ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): ld: drivers/firmware/efi/cper-x86.o: in function `cper_print_proc_ia': >> drivers/firmware/efi/cper-x86.c:350: undefined reference to `arch_apei_report_x86_error' # https://github.com/0day-ci/linux/commit/40a67e64577bd2fee8aa3218efdbec2122cbe08d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Smita-Koralahalli/Decode-raw-MSR-values-of-MCA-registers-in-BERT/20200825-224828 git checkout 40a67e64577bd2fee8aa3218efdbec2122cbe08d vim +350 drivers/firmware/efi/cper-x86.c 255 256 void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc) 257 { 258 int i; 259 struct cper_ia_err_info *err_info; 260 struct cper_ia_proc_ctx *ctx_info; 261 char newpfx[64], infopfx[64]; 262 u8 err_type; 263 264 if (proc->validation_bits & VALID_LAPIC_ID) 265 printk("%sLocal APIC_ID: 0x%llx\n", pfx, proc->lapic_id); 266 267 if (proc->validation_bits & VALID_CPUID_INFO) { 268 printk("%sCPUID Info:\n", pfx); 269 print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, proc->cpuid, 270 sizeof(proc->cpuid), 0); 271 } 272 273 snprintf(newpfx, sizeof(newpfx), "%s ", pfx); 274 275 err_info = (struct cper_ia_err_info *)(proc + 1); 276 for (i = 0; i < VALID_PROC_ERR_INFO_NUM(proc->validation_bits); i++) { 277 printk("%sError Information Structure %d:\n", pfx, i); 278 279 err_type = cper_get_err_type(&err_info->err_type); 280 printk("%sError Structure Type: %s\n", newpfx, 281 err_type < ARRAY_SIZE(cper_proc_error_type_strs) ? 282 cper_proc_error_type_strs[err_type] : "unknown"); 283 284 if (err_type >= N_ERR_TYPES) { 285 printk("%sError Structure Type: %pUl\n", newpfx, 286 &err_info->err_type); 287 } 288 289 if (err_info->validation_bits & INFO_VALID_CHECK_INFO) { 290 printk("%sCheck Information: 0x%016llx\n", newpfx, 291 err_info->check_info); 292 293 if (err_type < N_ERR_TYPES) { 294 snprintf(infopfx, sizeof(infopfx), "%s ", 295 newpfx); 296 297 print_err_info(infopfx, err_type, 298 err_info->check_info); 299 } 300 } 301 302 if (err_info->validation_bits & INFO_VALID_TARGET_ID) { 303 printk("%sTarget Identifier: 0x%016llx\n", 304 newpfx, err_info->target_id); 305 } 306 307 if (err_info->validation_bits & INFO_VALID_REQUESTOR_ID) { 308 printk("%sRequestor Identifier: 0x%016llx\n", 309 newpfx, err_info->requestor_id); 310 } 311 312 if (err_info->validation_bits & INFO_VALID_RESPONDER_ID) { 313 printk("%sResponder Identifier: 0x%016llx\n", 314 newpfx, err_info->responder_id); 315 } 316 317 if (err_info->validation_bits & INFO_VALID_IP) { 318 printk("%sInstruction Pointer: 0x%016llx\n", 319 newpfx, err_info->ip); 320 } 321 322 err_info++; 323 } 324 325 ctx_info = (struct cper_ia_proc_ctx *)err_info; 326 for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) { 327 int size = sizeof(*ctx_info) + ctx_info->reg_arr_size; 328 int groupsize = 4; 329 330 printk("%sContext Information Structure %d:\n", pfx, i); 331 332 printk("%sRegister Context Type: %s\n", newpfx, 333 ctx_info->reg_ctx_type < ARRAY_SIZE(ia_reg_ctx_strs) ? 334 ia_reg_ctx_strs[ctx_info->reg_ctx_type] : "unknown"); 335 336 printk("%sRegister Array Size: 0x%04x\n", newpfx, 337 ctx_info->reg_arr_size); 338 339 if (ctx_info->reg_ctx_type == CTX_TYPE_MSR) { 340 groupsize = 8; /* MSRs are 8 bytes wide. */ 341 printk("%sMSR Address: 0x%08x\n", newpfx, 342 ctx_info->msr_addr); 343 } 344 345 if (ctx_info->reg_ctx_type == CTX_TYPE_MMREG) { 346 printk("%sMM Register Address: 0x%016llx\n", newpfx, 347 ctx_info->mm_reg_addr); 348 } 349 > 350 if (arch_apei_report_x86_error(proc, ctx_info)) { --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cf503824529c..dbcb6bd80b7c 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -291,6 +291,11 @@ struct cper_sec_mem_err; extern void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err); +struct cper_ia_proc_ctx; +struct cper_sec_proc_ia; +int apei_mce_report_x86_error(const struct cper_sec_proc_ia *proc, + struct cper_ia_proc_ctx *ctx_info); + /* * Enumerate new IP types and HWID values in AMD processors which support * Scalable MCA. diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index c22fb55abcfd..053d6e994e87 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -43,3 +43,13 @@ void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) apei_mce_report_mem_error(sev, mem_err); #endif } + +int arch_apei_report_x86_error(const struct cper_sec_proc_ia *proc, + struct cper_ia_proc_ctx *ctx_info) +{ + int err = -EINVAL; +#ifdef CONFIG_X86_MCE + err = apei_mce_report_x86_error(proc, ctx_info); +#endif + return err; +} diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index af8d37962586..d7f405e55d31 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -51,6 +51,44 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) } EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); +int apei_mce_report_x86_error(const struct cper_sec_proc_ia *proc, + struct cper_ia_proc_ctx *ctx_info) +{ + const u64 *i_mce = ((const void *) (ctx_info + 1)); + unsigned int cpu; + struct mce m; + + if (!boot_cpu_has(X86_FEATURE_SMCA)) + return -EINVAL; + + mce_setup(&m); + + m.extcpu = -1; + m.socketid = -1; + + for_each_possible_cpu(cpu) { + if (cpu_data(cpu).initial_apicid == proc->lapic_id) { + m.extcpu = cpu; + m.socketid = cpu_data(m.extcpu).phys_proc_id; + break; + } + } + + m.apicid = proc->lapic_id; + m.bank = (ctx_info->msr_addr >> 4) & 0xFF; + m.status = *i_mce; + m.addr = *(i_mce + 1); + m.misc = *(i_mce + 2); + /* Skipping MCA_CONFIG */ + m.ipid = *(i_mce + 4); + m.synd = *(i_mce + 5); + + mce_log(&m); + + return 0; +} +EXPORT_SYMBOL_GPL(apei_mce_report_x86_error); + #define CPER_CREATOR_MCE \ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ 0x64, 0x90, 0xb8, 0x9d) diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c index 2531de49f56c..6622e9824416 100644 --- a/drivers/firmware/efi/cper-x86.c +++ b/drivers/firmware/efi/cper-x86.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2018, Advanced Micro Devices, Inc. -#include <linux/cper.h> +#include <acpi/apei.h> /* * We don't need a "CPER_IA" prefix since these are all locally defined. @@ -347,9 +347,11 @@ void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc) ctx_info->mm_reg_addr); } - printk("%sRegister Array:\n", newpfx); - print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize, - (ctx_info + 1), ctx_info->reg_arr_size, 0); + if (arch_apei_report_x86_error(proc, ctx_info)) { + printk("%sRegister Array:\n", newpfx); + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize, + (ctx_info + 1), ctx_info->reg_arr_size, 0); + } ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size); } diff --git a/include/acpi/apei.h b/include/acpi/apei.h index 680f80960c3d..e8ab17395887 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -51,6 +51,8 @@ int erst_clear(u64 record_id); int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data); void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err); +int arch_apei_report_x86_error(const struct cper_sec_proc_ia *proc, + struct cper_ia_proc_ctx *ctx_info); #endif #endif
Linux Kernel uses ACPI Boot Error Record Table (BERT) to report fatal errors that occurred in a previous boot. The MCA errors in the BERT are reported using the x86 Processor Error Common Platform Error Record (CPER) format. Currently, the record prints out the raw MSR values and AMD relies on the raw record to provide MCA information. Extract the raw MSR values of MCA registers from the BERT and feed it into the standard mce_log() function through the existing x86/MCA RAS infrastructure. This will result in better decoding from the EDAC MCE decoder or the default notifier. The implementation is SMCA specific as the raw MCA register values are given in the register offset order of the MCAX address space. Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Tony Luck <tony.luck@intel.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Len Brown <len.brown@intel.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Yazen Ghannam <yazen.ghannam@amd.com> Cc: x86@kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-edac@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-acpi@vger.kernel.org Cc: devel@acpica.org --- arch/x86/include/asm/mce.h | 5 +++++ arch/x86/kernel/acpi/apei.c | 10 +++++++++ arch/x86/kernel/cpu/mce/apei.c | 38 +++++++++++++++++++++++++++++++++ drivers/firmware/efi/cper-x86.c | 10 +++++---- include/acpi/apei.h | 2 ++ 5 files changed, 61 insertions(+), 4 deletions(-)