[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <fc9a95163b055235b1a5007753a131a7250a409b.1748594841.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:28:00 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
mingo@...hat.com,
bp@...en8.de,
dave.hansen@...ux.intel.com,
x86@...nel.org,
luto@...nel.org,
kees@...nel.org,
akpm@...ux-foundation.org,
david@...hat.com,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
peterz@...radead.org
Cc: dietmar.eggemann@....com,
hpa@...or.com,
acme@...nel.org,
namhyung@...nel.org,
mark.rutland@....com,
alexander.shishkin@...ux.intel.com,
jolsa@...nel.org,
irogers@...gle.com,
adrian.hunter@...el.com,
kan.liang@...ux.intel.com,
viro@...iv.linux.org.uk,
brauner@...nel.org,
jack@...e.cz,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
jannh@...gle.com,
pfalcato@...e.de,
riel@...riel.com,
harry.yoo@...cle.com,
linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org,
duanxiongchun@...edance.com,
yinhongbo@...edance.com,
dengliang.1214@...edance.com,
xieyongji@...edance.com,
chaiwen.cc@...edance.com,
songmuchun@...edance.com,
yuanzhu@...edance.com,
chengguozhu@...edance.com,
sunjiadong.lff@...edance.com,
Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 32/35] RPAL: fix unknown nmi on AMD CPU
In Lazy switch, the function event_sched_out() will be called. This
function deletes the perf event of the task being scheduled out, causing
the active_mask in cpu_hw_events to be cleared. In AMD's NMI handler, if
the bit corresponding to active_mask is not set, the CPU will not handle
the NMI event, ultimately triggering an unknown NMI error. Additionally,
event_sched_out() may call amd_pmu_wait_on_overflow(), leading to a busy
wait of up to 50us during lazy switch.
This patch adds two per_cpu variables. rpal_nmi_handle is set when an NMI
occurs. When encountering an unknown NMI, this NMI is skipped. rpal_nmi is
set before lazy switch and cleared after lazy switch, preventing the busy
wait caused by amd_pmu_wait_on_overflow().
Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
arch/x86/events/amd/core.c | 14 ++++++++++++++
arch/x86/kernel/nmi.c | 20 ++++++++++++++++++++
arch/x86/rpal/core.c | 17 ++++++++++++++++-
3 files changed, 50 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index b20661b8621d..633a9ac4e77c 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -719,6 +719,10 @@ static void amd_pmu_wait_on_overflow(int idx)
}
}
+#ifdef CONFIG_RPAL
+DEFINE_PER_CPU(bool, rpal_nmi);
+#endif
+
static void amd_pmu_check_overflow(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -732,6 +736,11 @@ static void amd_pmu_check_overflow(void)
if (in_nmi())
return;
+#ifdef CONFIG_RPAL
+ if (this_cpu_read(rpal_nmi))
+ return;
+#endif
+
/*
* Check each counter for overflow and wait for it to be reset by the
* NMI if it has overflowed. This relies on the fact that all active
@@ -807,6 +816,11 @@ static void amd_pmu_disable_event(struct perf_event *event)
if (in_nmi())
return;
+#ifdef CONFIG_RPAL
+ if (this_cpu_read(rpal_nmi))
+ return;
+#endif
+
amd_pmu_wait_on_overflow(event->hw.idx);
}
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index be93ec7255bf..dd72b6d1c7f9 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -351,12 +351,23 @@ NOKPROBE_SYMBOL(unknown_nmi_error);
static DEFINE_PER_CPU(bool, swallow_nmi);
static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
+#ifdef CONFIG_RPAL
+DEFINE_PER_CPU(bool, rpal_nmi_handle);
+#endif
static noinstr void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
int handled;
bool b2b = false;
+#ifdef CONFIG_RPAL
+ bool rpal_handle = false;
+
+ if (__this_cpu_read(rpal_nmi_handle)) {
+ __this_cpu_write(rpal_nmi_handle, false);
+ rpal_handle = true;
+ }
+#endif
/*
* Back-to-back NMIs are detected by comparing the RIP of the
@@ -471,6 +482,15 @@ static noinstr void default_do_nmi(struct pt_regs *regs)
*/
if (b2b && __this_cpu_read(swallow_nmi))
__this_cpu_add(nmi_stats.swallow, 1);
+#ifdef CONFIG_RPAL
+ /*
+ * Lazy switch may clear the bit in active_mask, causing
+ * nmi event not handled. This will lead to unknown nmi,
+ * try to avoid this.
+ */
+ else if (rpal_handle)
+ goto out;
+#endif
else
unknown_nmi_error(reason, regs);
diff --git a/arch/x86/rpal/core.c b/arch/x86/rpal/core.c
index 6a22b9faa100..92281b557a6c 100644
--- a/arch/x86/rpal/core.c
+++ b/arch/x86/rpal/core.c
@@ -376,11 +376,26 @@ rpal_exception_context_switch(struct pt_regs *regs)
return next;
}
+DECLARE_PER_CPU(bool, rpal_nmi_handle);
+DECLARE_PER_CPU(bool, rpal_nmi);
__visible struct task_struct *rpal_nmi_context_switch(struct pt_regs *regs)
{
struct task_struct *next;
- next = rpal_kernel_context_switch(regs);
+ if (rpal_test_current_thread_flag(RPAL_LAZY_SWITCHED_BIT))
+ rpal_update_fsbase(regs);
+
+ next = rpal_misidentify();
+ if (unlikely(next != NULL)) {
+ next = rpal_fix_critical_section(next, regs);
+ if (next) {
+ __this_cpu_write(rpal_nmi_handle, true);
+ /* avoid wait in amd_pmu_check_overflow */
+ __this_cpu_write(rpal_nmi, true);
+ next = rpal_do_kernel_context_switch(next, regs);
+ __this_cpu_write(rpal_nmi, false);
+ }
+ }
return next;
}
--
2.20.1
Powered by blists - more mailing lists