lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20220217141609.119453-2-Smita.KoralahalliChannabasappa@amd.com>
Date:   Thu, 17 Feb 2022 08:16:08 -0600
From:   Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
To:     <x86@...nel.org>, <linux-edac@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>
CC:     Tony Luck <tony.luck@...el.com>, "H . Peter Anvin" <hpa@...or.com>,
        "Dave Hansen" <dave.hansen@...ux.intel.com>,
        Yazen Ghannam <yazen.ghannam@....com>,
        Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
Subject: [RFC PATCH 1/2] x86/mce: Handle AMD threshold interrupt storms

Extend the logic of handling CMCI storms to AMD threshold interrupts.

Similar to CMCI storm handling, keep track of the rate at which each
processor sees interrupts. If it exceeds threshold, disable interrupts
and switch to polling of machine check banks.

But unlike CMCI, re-enable threshold interrupts per CPU because MCA
exceptions and interrupts are directed to a single CPU on AMD systems.
As the threshold interrupts are per CPU, a global counter is not required
to keep the count of all CPUs in the storm.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@....com>
---
This patch mostly inherits existing Intel's CMCI storm logic and is not a
per bank based approach. Commit 7bee1ef01f38395 ("x86/mce: Add per-bank
CMCI storm mitigation") under Tony Luck's Linux Tree makes the existing
CMCI storm more fine grained and adds a hook into machine_check_poll()
to keep track of per-CPU, per-bank corrected error logs.
---
 arch/x86/kernel/cpu/mce/amd.c      | 126 +++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/mce/core.c     |  16 +++-
 arch/x86/kernel/cpu/mce/intel.c    |   2 +-
 arch/x86/kernel/cpu/mce/internal.h |   8 +-
 4 files changed, 147 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 1940d305db1c..53d9320d1470 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -478,6 +478,129 @@ static void mce_threshold_block_init(struct threshold_block *b, int offset)
 	threshold_restart_bank(&tr);
 };
 
+#define MCI_STORM_INTERVAL	(HZ)
+#define MCI_STORM_THRESHOLD	15
+
+enum {
+	MCI_STORM_NONE,
+	MCI_STORM_ACTIVE,
+};
+
+static DEFINE_PER_CPU(unsigned long, mci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, mci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, mci_storm_state);
+
+static DEFINE_PER_CPU(int, mci_backoff_cnt);
+
+static void _reset_block(struct threshold_block *block)
+{
+	struct thresh_restart tr;
+
+	memset(&tr, 0, sizeof(tr));
+	tr.b = block;
+	threshold_restart_bank(&tr);
+}
+
+static void toggle_interrupt_reset_block(struct threshold_block *block, bool on)
+{
+	if (!block)
+		return;
+
+	block->interrupt_enable = !!on;
+	_reset_block(block);
+}
+
+static void mci_toggle_interrupt_mode(bool on)
+{
+	struct threshold_block *first_block = NULL, *block = NULL, *tmp = NULL;
+	struct threshold_bank **bp = this_cpu_read(threshold_banks);
+	unsigned long flags;
+	unsigned int bank;
+
+	if (!bp)
+		return;
+
+	local_irq_save(flags);
+
+	for (bank = 0; bank < this_cpu_read(mce_num_banks); bank++) {
+		if (!(this_cpu_read(bank_map) & (1 << bank)))
+			continue;
+
+		first_block = bp[bank]->blocks;
+		if (!first_block)
+			continue;
+
+		toggle_interrupt_reset_block(first_block, on);
+
+		list_for_each_entry_safe(block, tmp, &first_block->miscj, miscj)
+			toggle_interrupt_reset_block(block, on);
+	}
+
+	local_irq_restore(flags);
+}
+
+bool mce_amd_mci_poll(bool err_seen)
+{
+	if (__this_cpu_read(mci_storm_state) == MCI_STORM_NONE)
+		return false;
+
+	if (err_seen)
+		this_cpu_write(mci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+	else
+		this_cpu_dec(mci_backoff_cnt);
+
+	return true;
+}
+
+unsigned long mci_amd_adjust_timer(unsigned long interval)
+{
+	if (__this_cpu_read(mci_storm_state) == MCI_STORM_ACTIVE) {
+		if (this_cpu_read(mci_backoff_cnt) > 0) {
+			mce_notify_irq();
+			return MCI_STORM_INTERVAL;
+		}
+
+		__this_cpu_write(mci_storm_state, MCI_STORM_NONE);
+		pr_notice("Storm subsided on CPU %d: switching to interrupt mode\n",
+			  smp_processor_id());
+		mci_toggle_interrupt_mode(true);
+	}
+
+	return interval;
+}
+
+static bool storm_detect(void)
+{
+	unsigned int cnt = this_cpu_read(mci_storm_cnt);
+	unsigned long ts = this_cpu_read(mci_time_stamp);
+	unsigned long now = jiffies;
+
+	if (__this_cpu_read(mci_storm_state) != MCI_STORM_NONE)
+		return true;
+
+	if (time_before_eq(now, ts + MCI_STORM_INTERVAL)) {
+		cnt++;
+	} else {
+		cnt = 1;
+		this_cpu_write(mci_time_stamp, now);
+	}
+
+	this_cpu_write(mci_storm_cnt, cnt);
+
+	if (cnt <= MCI_STORM_THRESHOLD)
+		return false;
+
+	mci_toggle_interrupt_mode(false);
+	__this_cpu_write(mci_storm_state, MCI_STORM_ACTIVE);
+	mce_timer_kick(MCI_STORM_INTERVAL);
+	this_cpu_write(mci_backoff_cnt, INITIAL_CHECK_INTERVAL);
+
+	pr_notice("Storm detected on CPU %d: switching to poll mode\n",
+		  smp_processor_id());
+
+	return true;
+}
+
 static int setup_APIC_mce_threshold(int reserved, int new)
 {
 	if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
@@ -868,6 +991,9 @@ static void amd_threshold_interrupt(void)
 	struct threshold_bank **bp = this_cpu_read(threshold_banks);
 	unsigned int bank, cpu = smp_processor_id();
 
+	if (storm_detect())
+		return;
+
 	/*
 	 * Validate that the threshold bank has been initialized already. The
 	 * handler is installed at boot time, but on a hotplug event the
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4c31656503bd..ec89b1115889 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1554,6 +1554,13 @@ static unsigned long mce_adjust_timer_default(unsigned long interval)
 
 static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default;
 
+static bool mce_mci_poll_default(bool err_seen)
+{
+	return false;
+}
+
+static bool (*mce_mci_poll)(bool err_seen) = mce_mci_poll_default;
+
 static void __start_timer(struct timer_list *t, unsigned long interval)
 {
 	unsigned long when = jiffies + interval;
@@ -1577,9 +1584,11 @@ static void mce_timer_fn(struct timer_list *t)
 	iv = __this_cpu_read(mce_next_interval);
 
 	if (mce_available(this_cpu_ptr(&cpu_info))) {
-		machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+		bool err_seen;
+
+		err_seen = machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
 
-		if (mce_intel_cmci_poll()) {
+		if (mce_mci_poll(err_seen)) {
 			iv = mce_adjust_timer(iv);
 			goto done;
 		}
@@ -1938,10 +1947,13 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
 		mce_adjust_timer = cmci_intel_adjust_timer;
+		mce_mci_poll = mce_intel_cmci_poll;
 		break;
 
 	case X86_VENDOR_AMD: {
 		mce_amd_feature_init(c);
+		mce_adjust_timer = mci_amd_adjust_timer;
+		mce_mci_poll = mce_amd_mci_poll;
 		break;
 		}
 
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 95275a5e57e0..6f8006d9620d 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -127,7 +127,7 @@ static bool lmce_supported(void)
 	return tmp & FEAT_CTL_LMCE_ENABLED;
 }
 
-bool mce_intel_cmci_poll(void)
+bool mce_intel_cmci_poll(bool err_seen)
 {
 	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
 		return false;
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a04b61e27827..aa03107a72b5 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -42,7 +42,7 @@ extern mce_banks_t mce_banks_ce_disabled;
 
 #ifdef CONFIG_X86_MCE_INTEL
 unsigned long cmci_intel_adjust_timer(unsigned long interval);
-bool mce_intel_cmci_poll(void);
+bool mce_intel_cmci_poll(bool err_seen);
 void mce_intel_hcpu_update(unsigned long cpu);
 void cmci_disable_bank(int bank);
 void intel_init_cmci(void);
@@ -51,7 +51,7 @@ void intel_clear_lmce(void);
 bool intel_filter_mce(struct mce *m);
 #else
 # define cmci_intel_adjust_timer mce_adjust_timer_default
-static inline bool mce_intel_cmci_poll(void) { return false; }
+# define mce_intel_cmci_poll mce_mci_poll_default
 static inline void mce_intel_hcpu_update(unsigned long cpu) { }
 static inline void cmci_disable_bank(int bank) { }
 static inline void intel_init_cmci(void) { }
@@ -186,8 +186,12 @@ enum mca_msr {
 extern bool filter_mce(struct mce *m);
 
 #ifdef CONFIG_X86_MCE_AMD
+unsigned long mci_amd_adjust_timer(unsigned long interval);
 extern bool amd_filter_mce(struct mce *m);
+bool mce_amd_mci_poll(bool err_seen);
 #else
+# define mci_amd_adjust_timer mce_adjust_timer_default
+# define mce_amd_mci_poll mce_mci_poll_default
 static inline bool amd_filter_mce(struct mce *m) { return false; }
 #endif
 
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ