[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251118025015.42491-6-cuiyunhui@bytedance.com>
Date: Tue, 18 Nov 2025 10:50:12 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: conor@...nel.org,
paul.walmsley@...ive.com,
palmer@...belt.com,
aou@...s.berkeley.edu,
alex@...ti.fr,
cuiyunhui@...edance.com,
luxu.kernel@...edance.com,
linux-kernel@...r.kernel.org,
linux-riscv@...ts.infradead.org,
jassisinghbrar@...il.com,
conor.dooley@...rochip.com,
valentina.fernandezalanis@...rochip.com,
catalin.marinas@....com,
will@...nel.org,
maz@...nel.org,
timothy.hayes@....com,
lpieralisi@...nel.org,
arnd@...db.de,
kees@...nel.org,
tglx@...utronix.de,
viresh.kumar@...aro.org,
boqun.feng@...il.com,
linux-arm-kernel@...ts.infradead.org,
cleger@...osinc.com,
atishp@...osinc.com,
ajones@...tanamicro.com
Subject: [PATCH v2 5/8] riscv: smp: retry CPU stop with NMI if IPI fails
Retry CPU stop with NMI when IPI fails and RISC-V SSE NMI is supported,
borrowed the code implementation from arm64.
Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
arch/riscv/include/asm/smp.h | 2 ++
arch/riscv/kernel/smp.c | 23 +++++++++++++++++++----
drivers/firmware/riscv/riscv_sse_nmi.c | 1 +
3 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index f53f1f0e7aa9e..e01ea962adfc4 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -63,6 +63,8 @@ static inline void cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
}
#endif
+void cpu_stop(void);
+
/* Secondary hart entry */
asmlinkage void smp_callin(void);
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 07ccc28f52172..aa1cfc344a2c6 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -69,7 +69,7 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
return -ENOENT;
}
-static void ipi_stop(void)
+void cpu_stop(void)
{
set_cpu_online(smp_processor_id(), false);
while (1)
@@ -127,7 +127,7 @@ static irqreturn_t handle_IPI(int irq, void *data)
generic_smp_call_function_interrupt();
break;
case IPI_CPU_STOP:
- ipi_stop();
+ cpu_stop();
break;
case IPI_CPU_CRASH_STOP:
cpu_crash_stop(cpu, get_irq_regs());
@@ -250,10 +250,9 @@ void tick_broadcast(const struct cpumask *mask)
void smp_send_stop(void)
{
unsigned long timeout;
+ cpumask_t mask;
if (num_online_cpus() > 1) {
- cpumask_t mask;
-
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -267,6 +266,22 @@ void smp_send_stop(void)
while (num_online_cpus() > 1 && timeout--)
udelay(1);
+ /*
+ * If CPUs are still online, try an NMI. There's no excuse for this to
+ * be slow, so we only give them an extra 10 ms to respond.
+ */
+ if (num_other_online_cpus()) {
+ smp_rmb();
+ cpumask_copy(&mask, cpu_online_mask);
+ cpumask_clear_cpu(smp_processor_id(), &mask);
+ pr_info("SMP: retry stop with NMI for CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
+ send_nmi_mask(&mask, LOCAL_NMI_STOP);
+ timeout = USEC_PER_MSEC * 10;
+ while (num_other_online_cpus() && timeout--)
+ udelay(1);
+ }
+
if (num_online_cpus() > 1)
pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
diff --git a/drivers/firmware/riscv/riscv_sse_nmi.c b/drivers/firmware/riscv/riscv_sse_nmi.c
index e4c20dce40f9a..0ff0bda53608a 100644
--- a/drivers/firmware/riscv/riscv_sse_nmi.c
+++ b/drivers/firmware/riscv/riscv_sse_nmi.c
@@ -55,6 +55,7 @@ static int local_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
unsigned int cpu = smp_processor_id();
NMI_HANDLE(LOCAL_NMI_CRASH, cpu_crash_stop, cpu, regs);
+ NMI_HANDLE(LOCAL_NMI_STOP, cpu_stop);
atomic_set(&local_nmi_arg, LOCAL_NMI_NONE);
--
2.39.5
Powered by blists - more mailing lists