diff mbox series

[7/9] x86/mce: Unify AMD DFR handler with MCA Polling

Message ID 20240523155641.2805411-8-yazen.ghannam@amd.com (mailing list archive)
State New, archived
Headers show
Series AMD MCA interrupts rework | expand

Commit Message

Yazen Ghannam May 23, 2024, 3:56 p.m. UTC
AMD systems optionally support a deferred error interrupt. The interrupt
should be used as another signal to trigger MCA polling. This is similar
to how other MCA interrupts are handled.

Deferred errors do not require any special handling related to the
interrupt, e.g. resetting or rearming the interrupt, etc.

However, Scalable MCA systems include a pair of registers, MCA_DESTAT
and MCA_DEADDR, that should be checked for valid errors. This check
should be done whenever MCA registers are polled. Currently, the
deferred error interrupt does this check, but the MCA polling function
does not.

Call the MCA polling function when handling the deferred error
interrupt. This keeps all "polling" cases in a common function.

Call the polling function only for banks that have the deferred error
interrupt enabled.

Add an SMCA status check helper. This will do the same status check and
register clearing that the interrupt handler has done. And it extends
the common polling flow to find AMD deferred errors.

Remove old code whose functionality is already covered in the common MCA
code.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/kernel/cpu/mce/amd.c  | 99 ++--------------------------------
 arch/x86/kernel/cpu/mce/core.c | 46 ++++++++++++++--
 2 files changed, 46 insertions(+), 99 deletions(-)

Comments

Borislav Petkov June 4, 2024, 11:05 a.m. UTC | #1
On Thu, May 23, 2024 at 10:56:39AM -0500, Yazen Ghannam wrote:
> +static bool smca_log_poll_error(struct mce *m, u32 *status_reg)

That handing of *status_reg back'n'forth just to clear it in the end is
not nice. Let's get rid of it:

---
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 0a9cff329487..a0ba82fe6de3 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -669,7 +669,7 @@ static void reset_thr_limit(unsigned int bank)
 
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
-static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
+static bool smca_log_poll_error(struct mce *m, u32 status_reg)
 {
 	/*
 	 * If this is a deferred error found in MCA_STATUS, then clear
@@ -686,8 +686,8 @@ static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
 	 * If the MCA_DESTAT register has valid data, then use
 	 * it as the status register.
 	 */
-	*status_reg = MSR_AMD64_SMCA_MCx_DESTAT(m->bank);
-	m->status = mce_rdmsrl(*status_reg);
+	status_reg = MSR_AMD64_SMCA_MCx_DESTAT(m->bank);
+	m->status = mce_rdmsrl(status_reg);
 
 	if (!(m->status & MCI_STATUS_VAL))
 		return false;
@@ -695,6 +695,8 @@ static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
 	if (m->status & MCI_STATUS_ADDRV)
 		m->addr = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DEADDR(m->bank));
 
+	mce_wrmsrl(status_reg, 0);
+
 	return true;
 }
 
@@ -714,7 +716,7 @@ static bool ser_log_poll_error(struct mce *m)
 	return false;
 }
 
-static bool log_poll_error(enum mcp_flags flags, struct mce *m, u32 *status_reg)
+static bool log_poll_error(enum mcp_flags flags, struct mce *m, u32 status_reg)
 {
 	if (mce_flags.smca)
 		return smca_log_poll_error(m, status_reg);
@@ -789,7 +791,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!mca_cfg.cmci_disabled)
 			mce_track_storm(&m);
 
-		if (!log_poll_error(flags, &m, &status_reg))
+		if (!log_poll_error(flags, &m, status_reg))
 			continue;
 
 		if (flags & MCP_DONTLOG)
Borislav Petkov June 4, 2024, 11:18 a.m. UTC | #2
On Thu, May 23, 2024 at 10:56:39AM -0500, Yazen Ghannam wrote:
> -/*
> - * We have three scenarios for checking for Deferred errors:
> - *
> - * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> - *    clear MCA_DESTAT.
> - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> - *    log it.
> - */

I don't like it when you're killing those written down rules. Are they
not true anymore?

Because smca_log_poll_error() still does exactly that.
Yazen Ghannam Aug. 16, 2024, 2:08 p.m. UTC | #3
On Tue, Jun 04, 2024 at 01:05:28PM +0200, Borislav Petkov wrote:
> On Thu, May 23, 2024 at 10:56:39AM -0500, Yazen Ghannam wrote:
> > +static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
> 
> That handing of *status_reg back'n'forth just to clear it in the end is
> not nice. Let's get rid of it:
> 
> ---
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 0a9cff329487..a0ba82fe6de3 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -669,7 +669,7 @@ static void reset_thr_limit(unsigned int bank)
>  
>  DEFINE_PER_CPU(unsigned, mce_poll_count);
>  
> -static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
> +static bool smca_log_poll_error(struct mce *m, u32 status_reg)
>  {
>  	/*
>  	 * If this is a deferred error found in MCA_STATUS, then clear
> @@ -686,8 +686,8 @@ static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
>  	 * If the MCA_DESTAT register has valid data, then use
>  	 * it as the status register.
>  	 */
> -	*status_reg = MSR_AMD64_SMCA_MCx_DESTAT(m->bank);
> -	m->status = mce_rdmsrl(*status_reg);
> +	status_reg = MSR_AMD64_SMCA_MCx_DESTAT(m->bank);
> +	m->status = mce_rdmsrl(status_reg);
>  
>  	if (!(m->status & MCI_STATUS_VAL))
>  		return false;
> @@ -695,6 +695,8 @@ static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
>  	if (m->status & MCI_STATUS_ADDRV)
>  		m->addr = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DEADDR(m->bank));
>  
> +	mce_wrmsrl(status_reg, 0);
> +

I had to think on this for a while. The reason to clear the status
register at the very end is to make sure another error doesn't come in
and overwrite all the "aux" registers before we grab them.

***BUT*** the reason we are going down this path is because another
(higher priority) error *did* overwrite everything. And we're trying to
gather any leftover data. So all the "aux" registers are already
out-of-sync.

I don't think we can solve this in software. We'd need all the state
registers to be duplicated in hardware. We have status and address which
seem to be enough.

I'll see if this can be simplified even further.

Thanks,
Yazen
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1ac445a0dc12..c6594da95340 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -57,6 +57,7 @@ 
 
 static bool thresholding_irq_en;
 static DEFINE_PER_CPU_READ_MOSTLY(mce_banks_t, mce_thr_intr_banks);
+static DEFINE_PER_CPU_READ_MOSTLY(mce_banks_t, mce_dfr_intr_banks);
 
 static const char * const th_names[] = {
 	"load_store",
@@ -296,8 +297,10 @@  static void smca_configure(unsigned int bank, unsigned int cpu)
 		 * APIC based interrupt. First, check that no interrupt has been
 		 * set.
 		 */
-		if ((low & BIT(5)) && !((high >> 5) & 0x3))
+		if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
+			__set_bit(bank, this_cpu_ptr(mce_dfr_intr_banks));
 			high |= BIT(5);
+		}
 
 		this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
 
@@ -778,33 +781,6 @@  bool amd_mce_usable_address(struct mce *m)
 	return false;
 }
 
-static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
-{
-	struct mce m;
-
-	mce_setup(&m);
-
-	m.status = status;
-	m.misc   = misc;
-	m.bank   = bank;
-	m.tsc	 = rdtsc();
-
-	if (m.status & MCI_STATUS_ADDRV) {
-		m.addr = addr;
-
-		smca_extract_err_addr(&m);
-	}
-
-	if (mce_flags.smca) {
-		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
-
-		if (m.status & MCI_STATUS_SYNDV)
-			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
-	}
-
-	mce_log(&m);
-}
-
 DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
 {
 	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
@@ -814,75 +790,10 @@  DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
 	apic_eoi();
 }
 
-/*
- * Returns true if the logged error is deferred. False, otherwise.
- */
-static inline bool
-_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
-{
-	u64 status, addr = 0;
-
-	rdmsrl(msr_stat, status);
-	if (!(status & MCI_STATUS_VAL))
-		return false;
-
-	if (status & MCI_STATUS_ADDRV)
-		rdmsrl(msr_addr, addr);
-
-	__log_error(bank, status, addr, misc);
-
-	wrmsrl(msr_stat, 0);
-
-	return status & MCI_STATUS_DEFERRED;
-}
-
-static bool _log_error_deferred(unsigned int bank, u32 misc)
-{
-	if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
-			     mca_msr_reg(bank, MCA_ADDR), misc))
-		return false;
-
-	/*
-	 * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
-	 * Return true here to avoid accessing these registers.
-	 */
-	if (!mce_flags.smca)
-		return true;
-
-	/* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
-	wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
-	return true;
-}
-
-/*
- * We have three scenarios for checking for Deferred errors:
- *
- * 1) Non-SMCA systems check MCA_STATUS and log error if found.
- * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
- *    clear MCA_DESTAT.
- * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
- *    log it.
- */
-static void log_error_deferred(unsigned int bank)
-{
-	if (_log_error_deferred(bank, 0))
-		return;
-
-	/*
-	 * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
-	 * for a valid error.
-	 */
-	_log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
-			      MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
-}
-
 /* APIC interrupt handler for deferred errors */
 static void amd_deferred_error_interrupt(void)
 {
-	unsigned int bank;
-
-	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
-		log_error_deferred(bank);
+	machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_dfr_intr_banks));
 }
 
 static void reset_block(struct threshold_block *block)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index d6517b93c903..16c999b2cc1f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -637,7 +637,8 @@  static noinstr void mce_read_aux(struct mce *m, int i)
 	if (m->status & MCI_STATUS_MISCV)
 		m->misc = mce_rdmsrl(mca_msr_reg(i, MCA_MISC));
 
-	if (m->status & MCI_STATUS_ADDRV) {
+	/* Don't overwrite an address value that was saved earlier. */
+	if (m->status & MCI_STATUS_ADDRV && !m->addr) {
 		m->addr = mce_rdmsrl(mca_msr_reg(i, MCA_ADDR));
 
 		/*
@@ -668,6 +669,35 @@  static void reset_thr_limit(unsigned int bank)
 
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
+static bool smca_log_poll_error(struct mce *m, u32 *status_reg)
+{
+	/*
+	 * If this is a deferred error found in MCA_STATUS, then clear
+	 * the redundant data from the MCA_DESTAT register.
+	 */
+	if (m->status & MCI_STATUS_VAL) {
+		if (m->status & MCI_STATUS_DEFERRED)
+			mce_wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
+
+		return true;
+	}
+
+	/*
+	 * If the MCA_DESTAT register has valid data, then use
+	 * it as the status register.
+	 */
+	*status_reg = MSR_AMD64_SMCA_MCx_DESTAT(m->bank);
+	m->status = mce_rdmsrl(*status_reg);
+
+	if (!(m->status & MCI_STATUS_VAL))
+		return false;
+
+	if (m->status & MCI_STATUS_ADDRV)
+		m->addr = mce_rdmsrl(MSR_AMD64_SMCA_MCx_DEADDR(m->bank));
+
+	return true;
+}
+
 static bool ser_log_poll_error(struct mce *m)
 {
 	/* Log "not enabled" (speculative) errors */
@@ -684,8 +714,11 @@  static bool ser_log_poll_error(struct mce *m)
 	return false;
 }
 
-static bool log_poll_error(enum mcp_flags flags, struct mce *m)
+static bool log_poll_error(enum mcp_flags flags, struct mce *m, u32 *status_reg)
 {
+	if (mce_flags.smca)
+		return smca_log_poll_error(m, status_reg);
+
 	/* If this entry is not valid, ignore it. */
 	if (!(m->status & MCI_STATUS_VAL))
 		return false;
@@ -724,6 +757,7 @@  static bool log_poll_error(enum mcp_flags flags, struct mce *m)
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
 	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+	u32 status_reg;
 	struct mce m;
 	int i;
 
@@ -736,12 +770,14 @@  void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!mce_banks[i].ctl || !test_bit(i, *b))
 			continue;
 
+		status_reg = mca_msr_reg(i, MCA_STATUS);
+
 		m.misc = 0;
 		m.addr = 0;
 		m.bank = i;
 
 		barrier();
-		m.status = mce_rdmsrl(mca_msr_reg(i, MCA_STATUS));
+		m.status = mce_rdmsrl(status_reg);
 
 		/*
 		 * Update storm tracking here, before checking for the
@@ -753,7 +789,7 @@  void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		if (!mca_cfg.cmci_disabled)
 			mce_track_storm(&m);
 
-		if (!log_poll_error(flags, &m))
+		if (!log_poll_error(flags, &m, &status_reg))
 			continue;
 
 		if (flags & MCP_DONTLOG)
@@ -780,7 +816,7 @@  void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		/*
 		 * Clear state for this bank.
 		 */
-		mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
+		mce_wrmsrl(status_reg, 0);
 	}
 
 	/*