@@ -478,6 +478,129 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
threshold_restart_bank(&tr);
};
+#define MCI_STORM_INTERVAL (HZ)
+#define MCI_STORM_THRESHOLD 15
+
+enum {
+ MCI_STORM_NONE,
+ MCI_STORM_ACTIVE,
+};
+
+static DEFINE_PER_CPU(unsigned long, mci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, mci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, mci_storm_state);
+
+static DEFINE_PER_CPU(int, mci_backoff_cnt);
+
+static void _reset_block(struct threshold_block *block)
+{
+ struct thresh_restart tr;
+
+ memset(&tr, 0, sizeof(tr));
+ tr.b = block;
+ threshold_restart_bank(&tr);
+}
+
+static void toggle_interrupt_reset_block(struct threshold_block *block, bool on)
+{
+ if (!block)
+ return;
+
+ block->interrupt_enable = !!on;
+ _reset_block(block);
+}
+
+static void mci_toggle_interrupt_mode(bool on)
+{
+ struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
+ struct threshold_bank **bp = this_cpu_read(threshold_banks);
+ unsigned long flags;
+ unsigned int bank;
+
+ if (!bp)
+ return;
+
+ local_irq_save(flags);
+
+ for (bank = 0; bank < this_cpu_read(mce_num_banks); bank++) {
+ if (!(this_cpu_read(bank_map) & (1 << bank)))
+ continue;
+
+ first_block = bp[bank]->blocks;
+ if (!first_block)
+ continue;
+
+ toggle_interrupt_reset_block(first_block, on);
+
+ list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+ toggle_interrupt_reset_block(block, on);
+ }
+
+ local_irq_restore(flags);
+}
+
+bool mce_amd_mci_poll(bool err_seen)
+{
+ if (__this_cpu_read(mci_storm_state) == MCI_STORM_NONE)
+ return false;
+
+ if (err_seen)
+ this_cpu_write(mci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+ else
+ this_cpu_dec(mci_backoff_cnt);
+
+ return true;
+}
+
+unsigned long mci_amd_adjust_timer(unsigned long interval)
+{
+ if (__this_cpu_read(mci_storm_state) == MCI_STORM_ACTIVE) {
+ if (this_cpu_read(mci_backoff_cnt) > 0) {
+ mce_notify_irq();
+ return MCI_STORM_INTERVAL;
+ }
+
+ __this_cpu_write(mci_storm_state, MCI_STORM_NONE);
+ pr_notice("Storm subsided on CPU %d: switching to interrupt mode\n",
+ smp_processor_id());
+ mci_toggle_interrupt_mode(true);
+ }
+
+ return interval;
+}
+
+static bool storm_detect(void)
+{
+ unsigned int cnt = this_cpu_read(mci_storm_cnt);
+ unsigned long ts = this_cpu_read(mci_time_stamp);
+ unsigned long now = jiffies;
+
+ if (__this_cpu_read(mci_storm_state) != MCI_STORM_NONE)
+ return true;
+
+ if (time_before_eq(now, ts + MCI_STORM_INTERVAL)) {
+ cnt++;
+ } else {
+ cnt = 1;
+ this_cpu_write(mci_time_stamp, now);
+ }
+
+ this_cpu_write(mci_storm_cnt, cnt);
+
+ if (cnt <= MCI_STORM_THRESHOLD)
+ return false;
+
+ mci_toggle_interrupt_mode(false);
+ __this_cpu_write(mci_storm_state, MCI_STORM_ACTIVE);
+ mce_timer_kick(MCI_STORM_INTERVAL);
+ this_cpu_write(mci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+
+ pr_notice("Storm detected on CPU %d: switching to poll mode\n",
+ smp_processor_id());
+
+ return true;
+}
+
static int setup_APIC_mce_threshold(int reserved, int new)
{
if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
@@ -868,6 +991,9 @@ static void amd_threshold_interrupt(void)
struct threshold_bank **bp = this_cpu_read(threshold_banks);
unsigned int bank, cpu = smp_processor_id();
+ if (storm_detect())
+ return;
+
/*
* Validate that the threshold bank has been initialized already. The
* handler is installed at boot time, but on a hotplug event the
@@ -1554,6 +1554,13 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
+static bool mce_mci_poll_default(bool err_seen)
+{
+ return false;
+}
+
+static bool (*mce_mci_poll)(bool err_seen) = mce_mci_poll_default;
+
static void __start_timer(struct timer_list *t, unsigned long interval)
{
unsigned long when = jiffies + interval;
@@ -1577,9 +1584,11 @@ static void mce_timer_fn(struct timer_list *t)
iv = __this_cpu_read(mce_next_interval);
if (mce_available(this_cpu_ptr(&cpu_info))) {
- machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+ bool err_seen;
+
+ err_seen = machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
- if (mce_intel_cmci_poll()) {
+ if (mce_mci_poll(err_seen)) {
iv = mce_adjust_timer(iv);
goto done;
}
@@ -1938,10 +1947,13 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
case X86_VENDOR_INTEL:
mce_intel_feature_init(c);
mce_adjust_timer = cmci_intel_adjust_timer;
+ mce_mci_poll = mce_intel_cmci_poll;
break;
case X86_VENDOR_AMD: {
mce_amd_feature_init(c);
+ mce_adjust_timer = mci_amd_adjust_timer;
+ mce_mci_poll = mce_amd_mci_poll;
break;
}
@@ -127,7 +127,7 @@ static bool lmce_supported(void)
return tmp & FEAT_CTL_LMCE_ENABLED;
}
-bool mce_intel_cmci_poll(void)
+bool mce_intel_cmci_poll(bool err_seen)
{
if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
return false;
@@ -42,7 +42,7 @@ extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long cmci_intel_adjust_timer(unsigned long interval);
-bool mce_intel_cmci_poll(void);
+bool mce_intel_cmci_poll(bool err_seen);
void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
void intel_init_cmci(void);
@@ -51,7 +51,7 @@ void intel_clear_lmce(void);
bool intel_filter_mce(struct mce *m);
#else
# define cmci_intel_adjust_timer mce_adjust_timer_default
-static inline bool mce_intel_cmci_poll(void) { return false; }
+# define mce_intel_cmci_poll mce_mci_poll_default
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
static inline void intel_init_cmci(void) { }
@@ -186,8 +186,12 @@ enum mca_msr {
extern bool filter_mce(struct mce *m);
#ifdef CONFIG_X86_MCE_AMD
+unsigned long mci_amd_adjust_timer(unsigned long interval);
extern bool amd_filter_mce(struct mce *m);
+bool mce_amd_mci_poll(bool err_seen);
#else
+# define mci_amd_adjust_timer mce_adjust_timer_default
+# define mce_amd_mci_poll mce_mci_poll_default
static inline bool amd_filter_mce(struct mce *m) { return false; }
#endif
Extend the logic of handling CMCI storms to AMD threshold interrupts. Similar to CMCI storm handling, keep track of the rate at which each processor sees interrupts. If it exceeds threshold, disable interrupts and switch to polling of machine check banks. But unlike CMCI, re-enable threshold interrupts per CPU because MCA exceptions and interrupts are directed to a single CPU on AMD systems. As the threshold interrupts are per CPU, a global counter is not required to keep the count of all CPUs in the storm. Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> --- This patch mostly inherits existing Intel's CMCI storm logic and is not a per bank based approach. Commit 7bee1ef01f38395 ("x86/mce: Add per-bank CMCI storm mitigation") under Tony Luck's Linux Tree makes the existing CMCI storm more fine grained and adds a hook into machine_check_poll() to keep track of per-CPU, per-bank corrected error logs. --- arch/x86/kernel/cpu/mce/amd.c | 126 +++++++++++++++++++++++++++++ arch/x86/kernel/cpu/mce/core.c | 16 +++- arch/x86/kernel/cpu/mce/intel.c | 2 +- arch/x86/kernel/cpu/mce/internal.h | 8 +- 4 files changed, 147 insertions(+), 5 deletions(-)