lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <fc9a95163b055235b1a5007753a131a7250a409b.1748594841.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:28:00 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	dave.hansen@...ux.intel.com,
	x86@...nel.org,
	luto@...nel.org,
	kees@...nel.org,
	akpm@...ux-foundation.org,
	david@...hat.com,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org,
	peterz@...radead.org
Cc: dietmar.eggemann@....com,
	hpa@...or.com,
	acme@...nel.org,
	namhyung@...nel.org,
	mark.rutland@....com,
	alexander.shishkin@...ux.intel.com,
	jolsa@...nel.org,
	irogers@...gle.com,
	adrian.hunter@...el.com,
	kan.liang@...ux.intel.com,
	viro@...iv.linux.org.uk,
	brauner@...nel.org,
	jack@...e.cz,
	lorenzo.stoakes@...cle.com,
	Liam.Howlett@...cle.com,
	vbabka@...e.cz,
	rppt@...nel.org,
	surenb@...gle.com,
	mhocko@...e.com,
	rostedt@...dmis.org,
	bsegall@...gle.com,
	mgorman@...e.de,
	vschneid@...hat.com,
	jannh@...gle.com,
	pfalcato@...e.de,
	riel@...riel.com,
	harry.yoo@...cle.com,
	linux-kernel@...r.kernel.org,
	linux-perf-users@...r.kernel.org,
	linux-fsdevel@...r.kernel.org,
	linux-mm@...ck.org,
	duanxiongchun@...edance.com,
	yinhongbo@...edance.com,
	dengliang.1214@...edance.com,
	xieyongji@...edance.com,
	chaiwen.cc@...edance.com,
	songmuchun@...edance.com,
	yuanzhu@...edance.com,
	chengguozhu@...edance.com,
	sunjiadong.lff@...edance.com,
	Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 32/35] RPAL: fix unknown nmi on AMD CPU

In Lazy switch, the function event_sched_out() will be called. This
function deletes the perf event of the task being scheduled out, causing
the active_mask in cpu_hw_events to be cleared. In AMD's NMI handler, if
the bit corresponding to active_mask is not set, the CPU will not handle
the NMI event, ultimately triggering an unknown NMI error. Additionally,
event_sched_out() may call amd_pmu_wait_on_overflow(), leading to a busy
wait of up to 50us during lazy switch.

This patch adds two per_cpu variables. rpal_nmi_handle is set when an NMI
occurs. When encountering an unknown NMI, this NMI is skipped. rpal_nmi is
set before lazy switch and cleared after lazy switch, preventing the busy
wait caused by amd_pmu_wait_on_overflow().

Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
 arch/x86/events/amd/core.c | 14 ++++++++++++++
 arch/x86/kernel/nmi.c      | 20 ++++++++++++++++++++
 arch/x86/rpal/core.c       | 17 ++++++++++++++++-
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index b20661b8621d..633a9ac4e77c 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -719,6 +719,10 @@ static void amd_pmu_wait_on_overflow(int idx)
 	}
 }
 
+#ifdef CONFIG_RPAL
+DEFINE_PER_CPU(bool, rpal_nmi);
+#endif
+
 static void amd_pmu_check_overflow(void)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -732,6 +736,11 @@ static void amd_pmu_check_overflow(void)
 	if (in_nmi())
 		return;
 
+#ifdef CONFIG_RPAL
+	if (this_cpu_read(rpal_nmi))
+		return;
+#endif
+
 	/*
 	 * Check each counter for overflow and wait for it to be reset by the
 	 * NMI if it has overflowed. This relies on the fact that all active
@@ -807,6 +816,11 @@ static void amd_pmu_disable_event(struct perf_event *event)
 	if (in_nmi())
 		return;
 
+#ifdef CONFIG_RPAL
+	if (this_cpu_read(rpal_nmi))
+		return;
+#endif
+
 	amd_pmu_wait_on_overflow(event->hw.idx);
 }
 
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index be93ec7255bf..dd72b6d1c7f9 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -351,12 +351,23 @@ NOKPROBE_SYMBOL(unknown_nmi_error);
 
 static DEFINE_PER_CPU(bool, swallow_nmi);
 static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
+#ifdef CONFIG_RPAL
+DEFINE_PER_CPU(bool, rpal_nmi_handle);
+#endif
 
 static noinstr void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 	int handled;
 	bool b2b = false;
+#ifdef CONFIG_RPAL
+	bool rpal_handle = false;
+
+	if (__this_cpu_read(rpal_nmi_handle)) {
+		__this_cpu_write(rpal_nmi_handle, false);
+		rpal_handle = true;
+	}
+#endif
 
 	/*
 	 * Back-to-back NMIs are detected by comparing the RIP of the
@@ -471,6 +482,15 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
 	 */
 	if (b2b && __this_cpu_read(swallow_nmi))
 		__this_cpu_add(nmi_stats.swallow, 1);
+#ifdef CONFIG_RPAL
+	/*
+	 * Lazy switch may clear the bit in active_mask, causing
+	 * nmi event not handled. This will lead to unknown nmi,
+	 * try to avoid this.
+	 */
+	else if (rpal_handle)
+		goto out;
+#endif
 	else
 		unknown_nmi_error(reason, regs);
 
diff --git a/arch/x86/rpal/core.c b/arch/x86/rpal/core.c
index 6a22b9faa100..92281b557a6c 100644
--- a/arch/x86/rpal/core.c
+++ b/arch/x86/rpal/core.c
@@ -376,11 +376,26 @@ rpal_exception_context_switch(struct pt_regs *regs)
 	return next;
 }
 
+DECLARE_PER_CPU(bool, rpal_nmi_handle);
+DECLARE_PER_CPU(bool, rpal_nmi);
 __visible struct task_struct *rpal_nmi_context_switch(struct pt_regs *regs)
 {
 	struct task_struct *next;
 
-	next = rpal_kernel_context_switch(regs);
+	if (rpal_test_current_thread_flag(RPAL_LAZY_SWITCHED_BIT))
+		rpal_update_fsbase(regs);
+
+	next = rpal_misidentify();
+	if (unlikely(next != NULL)) {
+		next = rpal_fix_critical_section(next, regs);
+		if (next) {
+			__this_cpu_write(rpal_nmi_handle, true);
+			/* avoid wait in amd_pmu_check_overflow */
+			__this_cpu_write(rpal_nmi, true);
+			next = rpal_do_kernel_context_switch(next, regs);
+			__this_cpu_write(rpal_nmi, false);
+		}
+	}
 
 	return next;
 }
-- 
2.20.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ