Message ID | 20220412154038.261750-3-Smita.KoralahalliChannabasappa@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | x86/mce: Support extended MCA_ADDR address on SMCA systems | expand |
On Tue, Apr 12, 2022 at 10:40:38AM -0500, Smita Koralahalli wrote: > diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c > index f809eacac523..4f2744324d9b 100644 > --- a/arch/x86/kernel/cpu/mce/amd.c > +++ b/arch/x86/kernel/cpu/mce/amd.c > @@ -722,6 +722,17 @@ bool amd_mce_is_memory_error(struct mce *m) > return m->bank == 4 && xec == 0x8; > } > > +void smca_feature_init(void) > +{ > + unsigned int bank; > + u64 mca_cfg; > + > + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { > + rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg); > + this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8)); > + } > +} We have smca_configure() for SMCA banks init and there it even reads MCx_CONFIG. Do you guys not see this? Or integrating new stuff into the existing code doesn't really matter - just bolt it on wherever it works?! > diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h > index 64dbae6b8a09..0f4934fb3d93 100644 > --- a/arch/x86/kernel/cpu/mce/internal.h > +++ b/arch/x86/kernel/cpu/mce/internal.h > @@ -177,6 +177,22 @@ struct mce_vendor_flags { > > extern struct mce_vendor_flags mce_flags; > > +struct mce_bank { > + u64 ctl; /* subevents to enable */ > + > + __u64 init : 1, /* initialise bank? */ > + > + /* > + * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: This bit indicates > + * the LSB field is found in MCA_STATUS, when set. > + */ > + lsb_in_status : 1, > + > + __reserved_1 : 62; > +}; Put those comments over the members, while at it: diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 0f4934fb3d93..770a31120fd2 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -178,17 +178,18 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; struct mce_bank { - u64 ctl; /* subevents to enable */ + /* subevents to enable */ + u64 ctl; - __u64 init : 1, /* initialise bank? */ + /* initialise bank? */ + __u64 init : 1, /* * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: This bit indicates * the LSB field is found in MCA_STATUS, when set. */ - lsb_in_status : 1, - - __reserved_1 : 62; + lsb_in_status : 1, + __reserved_1 : 62; }; > +DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); > + > enum mca_msr { > MCA_CTL, > MCA_STATUS, > @@ -190,7 +206,9 @@ extern bool filter_mce(struct mce *m); > #ifdef CONFIG_X86_MCE_AMD > extern bool amd_filter_mce(struct mce *m); > > -/* Extract [55:<lsb>] where lsb is the LS-*valid* bit of the address bits. */ > +/* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits > + * [56:0], else in bits [55:0] of MCA_ADDR. > + */ verify_comment_style: Warning: Multi-line comment needs to start text on the second line: [+/* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits] Documentation/process/maintainer-tip.rst, Section "Comment style".
On Wed, Apr 13, 2022 at 12:21:41PM +0200, Borislav Petkov wrote: > On Tue, Apr 12, 2022 at 10:40:38AM -0500, Smita Koralahalli wrote: > > diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c > > index f809eacac523..4f2744324d9b 100644 > > --- a/arch/x86/kernel/cpu/mce/amd.c > > +++ b/arch/x86/kernel/cpu/mce/amd.c > > @@ -722,6 +722,17 @@ bool amd_mce_is_memory_error(struct mce *m) > > return m->bank == 4 && xec == 0x8; > > } > > > > +void smca_feature_init(void) > > +{ > > + unsigned int bank; > > + u64 mca_cfg; > > + > > + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { > > + rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg); > > + this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8)); > > + } > > +} > > We have smca_configure() for SMCA banks init and there it even reads > MCx_CONFIG. > > Do you guys not see this? > > Or integrating new stuff into the existing code doesn't really matter - > just bolt it on wherever it works?! > This function gets called from __mcheck_cpu_init_early() so that the info is available before the MCA banks are polled in __mcheck_cpu_init_generic(). Maybe we can avoid some confusion by renaming this new function? It doesn't do any feature initialization, so maybe smca_feature_detect_early() would be better? The goal is to get just enough info that's needed before the bulk of generic and vendor MCA initiliazation happens. Or do you recommend unifying this with smca_configure()? Thanks, Yazen
On Wed, Apr 13, 2022 at 02:10:30PM +0000, Yazen Ghannam wrote: > This function gets called from __mcheck_cpu_init_early() so that the info is > available before the MCA banks are polled in __mcheck_cpu_init_generic(). Would that work? I've moved first bank polling into __mcheck_cpu_init_clear_banks() because, well, this function is clearing the banks so it might as well poll them first. First bank polling in a init_generic function doesn't make too much sense anyway. And __mcheck_cpu_check_banks() functionality is moved into __mcheck_cpu_init_clear_banks() because, well, silly. On a quick scan, I don't see problems with such move but the devil is in the detail. Hmm? --- diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 99e3ff9607a3..345e068215c4 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1732,21 +1732,8 @@ static void __mcheck_cpu_cap_init(void) static void __mcheck_cpu_init_generic(void) { - enum mcp_flags m_fl = 0; - mce_banks_t all_banks; u64 cap; - if (!mca_cfg.bootlog) - m_fl = MCP_DONTLOG; - - /* - * Log the machine checks left over from the previous reset. Log them - * only, do not start processing them. That will happen in mcheck_late_init() - * when all consumers have been registered on the notifier chain. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); - cr4_set_bits(X86_CR4_MCE); rdmsrl(MSR_IA32_MCG_CAP, cap); @@ -1757,33 +1744,21 @@ static void __mcheck_cpu_init_generic(void) static void __mcheck_cpu_init_clear_banks(void) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + enum mcp_flags m_fl = 0; + mce_banks_t all_banks; + u64 msrval; int i; - for (i = 0; i < this_cpu_read(mce_num_banks); i++) { - struct mce_bank *b = &mce_banks[i]; - - if (!b->init) - continue; - wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); - wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); - } -} + if (!mca_cfg.bootlog) + m_fl = MCP_DONTLOG; -/* - * Do a final check to see if there are any unused/RAZ banks. - * - * This must be done after the banks have been initialized and any quirks have - * been applied. - * - * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. - * Otherwise, a user who disables a bank will not be able to re-enable it - * without a system reboot. - */ -static void __mcheck_cpu_check_banks(void) -{ - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - u64 msrval; - int i; + /* + * Log the machine checks left over from the previous reset. Log them + * only, do not start processing them. That will happen in mcheck_late_init() + * when all consumers have been registered on the notifier chain. + */ + bitmap_fill(all_banks, MAX_NR_BANKS); + machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; @@ -1791,6 +1766,9 @@ static void __mcheck_cpu_check_banks(void) if (!b->init) continue; + wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); + wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); b->init = !!msrval; } @@ -2159,7 +2137,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_clear_banks(); - __mcheck_cpu_check_banks(); __mcheck_cpu_setup_timer(); }
On Wed, Apr 13, 2022 at 04:54:00PM +0200, Borislav Petkov wrote: > + if (!mca_cfg.bootlog) > + m_fl = MCP_DONTLOG; > > -/* > - * Do a final check to see if there are any unused/RAZ banks. > - * > - * This must be done after the banks have been initialized and any quirks have > - * been applied. > - * > - * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. > - * Otherwise, a user who disables a bank will not be able to re-enable it > - * without a system reboot. > - */ > -static void __mcheck_cpu_check_banks(void) > -{ > - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); > - u64 msrval; > - int i; > + /* > + * Log the machine checks left over from the previous reset. Log them > + * only, do not start processing them. That will happen in mcheck_late_init() > + * when all consumers have been registered on the notifier chain. > + */ > + bitmap_fill(all_banks, MAX_NR_BANKS); > + machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); If MCP_DONTLOG bit is set, then this does little. It will find banks with valid records, NOT log them, clear them. But then we loop and clear all banks. So maybe do: if (mca_cfg.bootlog) { bitmap_fill(all_banks, MAX_NR_BANKS); machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks); } > __mcheck_cpu_init_clear_banks(); This will a new name to indicate that it is logging as well as init & clear. Otherwise seeems fine. -Tony
On Wed, Apr 13, 2022 at 08:59:41AM -0700, Luck, Tony wrote: > If MCP_DONTLOG bit is set, then this does little. It will find > banks with valid records, NOT log them, clear them. But then we > loop and clear all banks. > > So maybe do: > > if (mca_cfg.bootlog) { > bitmap_fill(all_banks, MAX_NR_BANKS); > machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks); > } Ack, thx. > This will a new name to indicate that it is logging as well as init & clear. Ok. > Otherwise seeems fine. Thanks, here it is (untested yet). --- From: Borislav Petkov <bp@suse.de> Date: Wed, 13 Apr 2022 18:11:01 +0200 Subject: [PATCH] x86/mce: Cleanup bank processing on init Unify the bank preparation into __mcheck_cpu_init_clear_banks(), rename that function to what it does now - prepares banks. Do this so that generic and vendor banks init goes first so that settings done during that init can take effect before the first bank polling takes place. Move __mcheck_cpu_check_banks() into __mcheck_cpu_init_prepare_banks() as it already loops over the banks. Signed-off-by: Borislav Petkov <bp@suse.de> --- arch/x86/include/asm/mce.h | 3 +- arch/x86/kernel/cpu/mce/core.c | 64 ++++++++++------------------------ 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index cc73061e7255..5450df861ec5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -252,8 +252,7 @@ DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); enum mcp_flags { MCP_TIMESTAMP = BIT(0), /* log time stamp */ MCP_UC = BIT(1), /* log uncorrected errors */ - MCP_DONTLOG = BIT(2), /* only clear, don't log */ - MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */ + MCP_QUEUE_LOG = BIT(2), /* only queue to genpool */ }; bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 99e3ff9607a3..6e49dda09a2a 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -724,9 +724,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) log_it: error_seen = true; - if (flags & MCP_DONTLOG) - goto clear_it; - mce_read_aux(&m, i); m.severity = mce_severity(&m, NULL, NULL, false); /* @@ -1693,7 +1690,7 @@ static void __mcheck_cpu_mce_banks_init(void) /* * Init them all, __mcheck_cpu_apply_quirks() is going to apply * the required vendor quirks before - * __mcheck_cpu_init_clear_banks() does the final bank setup. + * __mcheck_cpu_init_prepare_banks() does the final bank setup. */ b->ctl = -1ULL; b->init = true; @@ -1732,21 +1729,8 @@ static void __mcheck_cpu_cap_init(void) static void __mcheck_cpu_init_generic(void) { - enum mcp_flags m_fl = 0; - mce_banks_t all_banks; u64 cap; - if (!mca_cfg.bootlog) - m_fl = MCP_DONTLOG; - - /* - * Log the machine checks left over from the previous reset. Log them - * only, do not start processing them. That will happen in mcheck_late_init() - * when all consumers have been registered on the notifier chain. - */ - bitmap_fill(all_banks, MAX_NR_BANKS); - machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); - cr4_set_bits(X86_CR4_MCE); rdmsrl(MSR_IA32_MCG_CAP, cap); @@ -1754,36 +1738,22 @@ static void __mcheck_cpu_init_generic(void) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); } -static void __mcheck_cpu_init_clear_banks(void) +static void __mcheck_cpu_init_prepare_banks(void) { struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); + mce_banks_t all_banks; + u64 msrval; int i; - for (i = 0; i < this_cpu_read(mce_num_banks); i++) { - struct mce_bank *b = &mce_banks[i]; - - if (!b->init) - continue; - wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); - wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + /* + * Log the machine checks left over from the previous reset. Log them + * only, do not start processing them. That will happen in mcheck_late_init() + * when all consumers have been registered on the notifier chain. + */ + if (mca_cfg.bootlog) { + bitmap_fill(all_banks, MAX_NR_BANKS); + machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks); } -} - -/* - * Do a final check to see if there are any unused/RAZ banks. - * - * This must be done after the banks have been initialized and any quirks have - * been applied. - * - * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. - * Otherwise, a user who disables a bank will not be able to re-enable it - * without a system reboot. - */ -static void __mcheck_cpu_check_banks(void) -{ - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); - u64 msrval; - int i; for (i = 0; i < this_cpu_read(mce_num_banks); i++) { struct mce_bank *b = &mce_banks[i]; @@ -1791,6 +1761,9 @@ static void __mcheck_cpu_check_banks(void) if (!b->init) continue; + wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); + wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); + rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); b->init = !!msrval; } @@ -2158,8 +2131,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) __mcheck_cpu_init_early(c); __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); - __mcheck_cpu_init_clear_banks(); - __mcheck_cpu_check_banks(); + __mcheck_cpu_init_prepare_banks(); __mcheck_cpu_setup_timer(); } @@ -2327,7 +2299,7 @@ static void mce_syscore_resume(void) { __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); - __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_init_prepare_banks(); } static struct syscore_ops mce_syscore_ops = { @@ -2345,7 +2317,7 @@ static void mce_cpu_restart(void *data) if (!mce_available(raw_cpu_ptr(&cpu_info))) return; __mcheck_cpu_init_generic(); - __mcheck_cpu_init_clear_banks(); + __mcheck_cpu_init_prepare_banks(); __mcheck_cpu_init_timer(); }
On Wed, Apr 13, 2022 at 06:19:11PM +0200, Borislav Petkov wrote: ... > static void __mcheck_cpu_init_generic(void) > { > - enum mcp_flags m_fl = 0; > - mce_banks_t all_banks; > u64 cap; > > - if (!mca_cfg.bootlog) > - m_fl = MCP_DONTLOG; > - > - /* > - * Log the machine checks left over from the previous reset. Log them > - * only, do not start processing them. That will happen in mcheck_late_init() > - * when all consumers have been registered on the notifier chain. > - */ > - bitmap_fill(all_banks, MAX_NR_BANKS); > - machine_check_poll(MCP_UC | MCP_QUEUE_LOG | m_fl, &all_banks); > - > cr4_set_bits(X86_CR4_MCE); I think the init logic breaks here. MCE now gets enabled before clearing old errors. So it's possible that the old errors get overwritten by new ones. > > rdmsrl(MSR_IA32_MCG_CAP, cap); > @@ -1754,36 +1738,22 @@ static void __mcheck_cpu_init_generic(void) > wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); > } > > -static void __mcheck_cpu_init_clear_banks(void) > +static void __mcheck_cpu_init_prepare_banks(void) > { > struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); > + mce_banks_t all_banks; > + u64 msrval; > int i; > > - for (i = 0; i < this_cpu_read(mce_num_banks); i++) { > - struct mce_bank *b = &mce_banks[i]; > - > - if (!b->init) > - continue; > - wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); > - wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); > + /* > + * Log the machine checks left over from the previous reset. Log them > + * only, do not start processing them. That will happen in mcheck_late_init() > + * when all consumers have been registered on the notifier chain. > + */ > + if (mca_cfg.bootlog) { > + bitmap_fill(all_banks, MAX_NR_BANKS); > + machine_check_poll(MCP_UC | MCP_QUEUE_LOG, &all_banks); > } > -} > - > -/* > - * Do a final check to see if there are any unused/RAZ banks. > - * > - * This must be done after the banks have been initialized and any quirks have > - * been applied. > - * > - * Do not call this from any user-initiated flows, e.g. CPU hotplug or sysfs. > - * Otherwise, a user who disables a bank will not be able to re-enable it > - * without a system reboot. > - */ > -static void __mcheck_cpu_check_banks(void) > -{ > - struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array); > - u64 msrval; > - int i; > > for (i = 0; i < this_cpu_read(mce_num_banks); i++) { > struct mce_bank *b = &mce_banks[i]; > @@ -1791,6 +1761,9 @@ static void __mcheck_cpu_check_banks(void) > if (!b->init) > continue; > > + wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); > + wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); Same idea here. STATUS should be cleared before turning on reporting in a bank using MCA_CTL. > + > rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); > b->init = !!msrval; > } > @@ -2158,8 +2131,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) > __mcheck_cpu_init_early(c); > __mcheck_cpu_init_generic(); > __mcheck_cpu_init_vendor(c); > - __mcheck_cpu_init_clear_banks(); > - __mcheck_cpu_check_banks(); > + __mcheck_cpu_init_prepare_banks(); I think moving __mcheck_cpu_init_generic() after __mcheck_cpu_init_prepare_banks() and swapping the MCA_STATUS and MCA_CTL writes above would be correct. In summary: 1) __mcheck_cpu_init_vendor() 2) __mcheck_cpu_init_prepare_banks() a) Read and clear MCA_STATUS. b) Initialize MCA_CTL. 3) __mcheck_cpu_init_generic() I realize this is still different than the original flow. But it seems to maintain the goal of this patch. And it actually matches the AMD documentation more closely than the original flow. One downside though is that the system goes longer with CR4.MCE cleared. So there's greater risk of encountering a shutdown due to a machine check error. Thanks, Yazen
On Wed, Apr 13, 2022 at 07:40:39PM +0000, Yazen Ghannam wrote: > I think the init logic breaks here. MCE now gets enabled before clearing old > errors. So it's possible that the old errors get overwritten by new ones. Err, I don't understand. CR4.MCE bit description has: "Regardless of whether machine-check exceptions are enabled, the processor records enabled-errors when they occur." I'm guessing enabled errors are those for which the respective bits in the MCi_CTL banks are set. And I think the CPU comes out of reset with those bits set. So the overwriting will happen regardless. The only difference here is that "[s]etting MCE to 1 enables the machine-check exception mechanism." So you'll get a #MC raised vs shutdown on a fatal error. Or am I missing an angle? > > @@ -1791,6 +1761,9 @@ static void __mcheck_cpu_check_banks(void) > > if (!b->init) > > continue; > > > > + wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); > > + wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); > > Same idea here. STATUS should be cleared before turning on reporting in a bank > using MCA_CTL. Look at the current code. Called in this order: __mcheck_cpu_init_clear_banks: wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); __mcheck_cpu_check_banks rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); b->init = !!msrval; STATUS *is* cleared after MCA_CTL now too. If this ordering is wrong - and it sounds like it is - then this needs to be a separate patch and Cc: <stable@vger.kernel.org> and needs to go in now. > One downside though is that the system goes longer with CR4.MCE cleared. So > there's greater risk of encountering a shutdown due to a machine check error. Yeah, I don't think the couple of msecs matter. Thx.
On Thu, Apr 14, 2022 at 11:11:01AM +0200, Borislav Petkov wrote: > On Wed, Apr 13, 2022 at 07:40:39PM +0000, Yazen Ghannam wrote: > > I think the init logic breaks here. MCE now gets enabled before clearing old > > errors. So it's possible that the old errors get overwritten by new ones. > > Err, I don't understand. CR4.MCE bit description has: > > "Regardless of whether machine-check exceptions are enabled, the > processor records enabled-errors when they occur." > > I'm guessing enabled errors are those for which the respective bits in > the MCi_CTL banks are set. And I think the CPU comes out of reset with > those bits set. > > So the overwriting will happen regardless. > > The only difference here is that "[s]etting MCE to 1 enables the > machine-check exception mechanism." So you'll get a #MC raised vs > shutdown on a fatal error. > > Or am I missing an angle? > I agree with you about the CR4.MCE statement. But MCi_CTL needs to be set by system software. The reset value is '0' at least on AMD systems. Here's a example scenario. 1) OS has fully booted. a) MCi_CTL, MCG_CTL, CR4.MCE are all enabled. 2) Fatal MCA error occurs causing hardware-initialzed reset. No OS handling. a) MCA state info is warm reset persistent. b) MCi_STATUS, MCi_ADDR, etc. have valid info. c) MCi_CTL, MCG_CTL, CR4.MCE are all set to reset value: '0'. 3) OS, or optionally BIOS, polls MCA banks and logs any valid errors. a) Since MCi_CTL, etc. are cleared due to reset, any errors detected are from before the reset. 4) MCi_STATUS is cleared to discard old error information. 5) MCA is initiliazed (MCi_CTL, MCG_CTL, CR4.MCE, etc.). Any error detected now is a new error from this session. > > > @@ -1791,6 +1761,9 @@ static void __mcheck_cpu_check_banks(void) > > > if (!b->init) > > > continue; > > > > > > + wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); > > > + wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); > > > > Same idea here. STATUS should be cleared before turning on reporting in a bank > > using MCA_CTL. > > Look at the current code. Called in this order: > > __mcheck_cpu_init_clear_banks: > wrmsrl(mca_msr_reg(i, MCA_CTL), b->ctl); > wrmsrl(mca_msr_reg(i, MCA_STATUS), 0); > __mcheck_cpu_check_banks > rdmsrl(mca_msr_reg(i, MCA_CTL), msrval); > b->init = !!msrval; > > STATUS *is* cleared after MCA_CTL now too. > > If this ordering is wrong - and it sounds like it is - then this needs > to be a separate patch and Cc: <stable@vger.kernel.org> and needs to go > in now. > I agree. The Intel SDM and AMD APM have the following procedure, in summary. 1) Set MCG_CTL 2) Set MCi_CTL for all banks 3) Read MCi_STATUS and log valid errors. 4) Clear MCi_STATUS 5) Set CR4.MCE I don't know of a reason why STATUS needs to be cleared after MCi_CTL is set. The only thing I can think of is that enabling MCi_CTL may cause spurious info logged in MCi_STATUS, and that needs to be cleared out. I'm asking AMD folks about it. Of course, this contradicts the flow I outlined above, and also the flow given in the AMD Processor Programming Reference (PPR). I wonder if the architectural documents have gotten stale compared to current guidelines. I'm asking about this too. Tony, Do you have any thoughts on this? > > One downside though is that the system goes longer with CR4.MCE cleared. So > > there's greater risk of encountering a shutdown due to a machine check error. > > Yeah, I don't think the couple of msecs matter. > Okay, yeah then maybe there should be another small patch to bring the init flow closer to x86 documentation. Thanks, Yazen
On Fri, Apr 15, 2022 at 02:56:54PM +0000, Yazen Ghannam wrote: > 3) OS, or optionally BIOS, polls MCA banks and logs any valid errors. > a) Since MCi_CTL, etc. are cleared due to reset, any errors detected are > from before the reset. On Intel not quite any error. H/w can still log to a bank but MCi_STATUS.EN bit will be zero. We've also had some BIOS code that did things that logged errors and then left them for the OS to find during boot. But this sequence does give more confidence that errors found in banks duing boot are "old". > I agree. The Intel SDM and AMD APM have the following procedure, in summary. > > 1) Set MCG_CTL > 2) Set MCi_CTL for all banks > 3) Read MCi_STATUS and log valid errors. > 4) Clear MCi_STATUS > 5) Set CR4.MCE Yes. That's what the pseudo-code in Intel SDM Example 15-1 says :-( > > I don't know of a reason why STATUS needs to be cleared after MCi_CTL is set. > The only thing I can think of is that enabling MCi_CTL may cause spurious info > logged in MCi_STATUS, and that needs to be cleared out. I'm asking AMD folks > about it. > > Of course, this contradicts the flow I outlined above, and also the flow given > in the AMD Processor Programming Reference (PPR). I wonder if the > architectural documents have gotten stale compared to current guidelines. I'm > asking about this too. I will ask architects about this sequence too. -Tony
On Fri, Apr 15, 2022 at 09:37:57AM -0700, Luck, Tony wrote: > On Fri, Apr 15, 2022 at 02:56:54PM +0000, Yazen Ghannam wrote: > > 3) OS, or optionally BIOS, polls MCA banks and logs any valid errors. > > a) Since MCi_CTL, etc. are cleared due to reset, any errors detected are > > from before the reset. > > On Intel not quite any error. H/w can still log to a bank but MCi_STATUS.EN bit > will be zero. We've also had some BIOS code that did things that logged errors > and then left them for the OS to find during boot. > > But this sequence does give more confidence that errors found in banks duing > boot are "old". > > > I agree. The Intel SDM and AMD APM have the following procedure, in summary. > > > > 1) Set MCG_CTL > > 2) Set MCi_CTL for all banks > > 3) Read MCi_STATUS and log valid errors. > > 4) Clear MCi_STATUS > > 5) Set CR4.MCE > > Yes. That's what the pseudo-code in Intel SDM Example 15-1 says :-( > > > > I don't know of a reason why STATUS needs to be cleared after MCi_CTL is set. > > The only thing I can think of is that enabling MCi_CTL may cause spurious info > > logged in MCi_STATUS, and that needs to be cleared out. I'm asking AMD folks > > about it. > > > > Of course, this contradicts the flow I outlined above, and also the flow given > > in the AMD Processor Programming Reference (PPR). I wonder if the > > architectural documents have gotten stale compared to current guidelines. I'm > > asking about this too. > > I will ask architects about this sequence too. > Hi everyone, It looks like the discrepancy between the Linux code and the x86 documents isn't a major concern for AMD systems. However, it is highly recommended that the banks are polled before enabling MCA to find any errors from before OS boot. It is possible that BIOS may enable MCA before the OS on some systems, but this isn't always the case. Tony, Did you get any feedback regarding the sequence above? Also, please see the patch below which is based on Boris' patch from earlier in this thread. Thanks, Yazen ------- From dc4f5b862080daae1aae22f1ec460d9c4c8b6d20 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam <yazen.ghannam@amd.com> Date: Thu, 19 May 2022 17:25:47 +0000 Subject: [PATCH] x86/mce: Remove __mcheck_cpu_init_early() The __mcheck_cpu_init_early() function was introduced so that some vendor-specific features are detected before the first MCA polling event done in __mcheck_cpu_init_generic(). Currently, __mcheck_cpu_init_early() is only used on AMD-based systems and additional code will be needed to support various system configurations. However, the current and future vendor-specific code should be done during vendor init. This keeps all the vendor code in a common location and simplifies the generic init flow. Move all the __mcheck_cpu_init_early() code into mce_amd_feature_init(). Also, move __mcheck_cpu_init_generic() after __mcheck_cpu_init_prepare_banks() so that MCA is enabled after the first MCA polling event. Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> --- arch/x86/kernel/cpu/mce/amd.c | 4 ++++ arch/x86/kernel/cpu/mce/core.c | 20 +++----------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 1c87501e0fa3..f65224a2b02d 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -681,6 +681,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) u32 low = 0, high = 0, address = 0; int offset = -1; + mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); + mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); + mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); + mce_flags.amd_threshold = 1; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { if (mce_flags.smca) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 5f406d135d32..9efd6d010e2d 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -1906,19 +1906,6 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) return 0; } -/* - * Init basic CPU features needed for early decoding of MCEs. - */ -static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) -{ - if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { - mce_flags.overflow_recov = !!cpu_has(c, X86_FEATURE_OVERFLOW_RECOV); - mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); - mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); - mce_flags.amd_threshold = 1; - } -} - static void mce_centaur_feature_init(struct cpuinfo_x86 *c) { struct mca_config *cfg = &mca_cfg; @@ -2139,10 +2126,9 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) mca_cfg.initialized = 1; - __mcheck_cpu_init_early(c); - __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(c); __mcheck_cpu_init_prepare_banks(); + __mcheck_cpu_init_generic(); __mcheck_cpu_setup_timer(); } @@ -2308,9 +2294,9 @@ static void mce_syscore_shutdown(void) */ static void mce_syscore_resume(void) { - __mcheck_cpu_init_generic(); __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info)); __mcheck_cpu_init_prepare_banks(); + __mcheck_cpu_init_generic(); } static struct syscore_ops mce_syscore_ops = { @@ -2327,8 +2313,8 @@ static void mce_cpu_restart(void *data) { if (!mce_available(raw_cpu_ptr(&cpu_info))) return; - __mcheck_cpu_init_generic(); __mcheck_cpu_init_prepare_banks(); + __mcheck_cpu_init_generic(); __mcheck_cpu_init_timer(); }
On Thu, Jun 09, 2022 at 07:19:29PM +0000, Yazen Ghannam wrote: > Also, please see the patch below which is based on Boris' patch from earlier > in this thread. Looks good to me. You can send me one against latest tip:ras/core whenever you can.
On Mon, Jun 27, 2022 at 05:56:34PM +0200, Borislav Petkov wrote: > On Thu, Jun 09, 2022 at 07:19:29PM +0000, Yazen Ghannam wrote: > > Also, please see the patch below which is based on Boris' patch from earlier > > in this thread. > > Looks good to me. You can send me one against latest tip:ras/core > whenever you can. > Thanks. Do you want a set with yours, mine, and Smita's patches all together? -Yazen
On Tue, Jul 12, 2022 at 01:51:25PM +0000, Yazen Ghannam wrote:
> Thanks. Do you want a set with yours, mine, and Smita's patches all together?
Sure, whatever you have ready.
No hurry, though, we'll probably have merge window next week...
Thx.
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index f809eacac523..4f2744324d9b 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -722,6 +722,17 @@ bool amd_mce_is_memory_error(struct mce *m) return m->bank == 4 && xec == 0x8; } +void smca_feature_init(void) +{ + unsigned int bank; + u64 mca_cfg; + + for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { + rdmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_cfg); + this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(mca_cfg & BIT(8)); + } +} + static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc) { struct mce m; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 39614c19da25..99e3ff9607a3 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -67,13 +67,7 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks); -struct mce_bank { - u64 ctl; /* subevents to enable */ - - __u64 init : 1, /* initialise bank? */ - __reserved_1 : 63; -}; -static DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); +DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); #define ATTR_LEN 16 /* One object for each MCE bank, shared by all CPUs */ @@ -1935,6 +1929,9 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) mce_flags.succor = !!cpu_has(c, X86_FEATURE_SUCCOR); mce_flags.smca = !!cpu_has(c, X86_FEATURE_SMCA); mce_flags.amd_threshold = 1; + + if (mce_flags.smca) + smca_feature_init(); } } diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 64dbae6b8a09..0f4934fb3d93 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -177,6 +177,22 @@ struct mce_vendor_flags { extern struct mce_vendor_flags mce_flags; +struct mce_bank { + u64 ctl; /* subevents to enable */ + + __u64 init : 1, /* initialise bank? */ + + /* + * (AMD) MCA_CONFIG[McaLsbInStatusSupported]: This bit indicates + * the LSB field is found in MCA_STATUS, when set. + */ + lsb_in_status : 1, + + __reserved_1 : 62; +}; + +DECLARE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array); + enum mca_msr { MCA_CTL, MCA_STATUS, @@ -190,7 +206,9 @@ extern bool filter_mce(struct mce *m); #ifdef CONFIG_X86_MCE_AMD extern bool amd_filter_mce(struct mce *m); -/* Extract [55:<lsb>] where lsb is the LS-*valid* bit of the address bits. */ +/* If MCA_CONFIG[McaLsbInStatusSupported] is set, extract ErrAddr in bits + * [56:0], else in bits [55:0] of MCA_ADDR. + */ static __always_inline void smca_extract_err_addr(struct mce *m) { u8 lsb; @@ -198,14 +216,22 @@ static __always_inline void smca_extract_err_addr(struct mce *m) if (!mce_flags.smca) return; - lsb = (m->addr >> 56) & 0x3f; + if (this_cpu_ptr(mce_banks_array)[m->bank].lsb_in_status) { + lsb = (m->status >> 24) & 0x3f; - m->addr &= GENMASK_ULL(55, lsb); + m->addr &= GENMASK_ULL(56, lsb); + } else { + lsb = (m->addr >> 56) & 0x3f; + + m->addr &= GENMASK_ULL(55, lsb); + } } +void smca_feature_init(void); #else static inline bool amd_filter_mce(struct mce *m) { return false; } static inline void smca_extract_err_addr(struct mce *m) { } +static inline void smca_feature_init(void) { } #endif #ifdef CONFIG_X86_ANCIENT_MCE