[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251027133431.15321-4-cuiyunhui@bytedance.com>
Date: Mon, 27 Oct 2025 21:34:31 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: paul.walmsley@...ive.com,
palmer@...belt.com,
aou@...s.berkeley.edu,
alex@...ti.fr,
conor@...nel.org,
cuiyunhui@...edance.com,
luxu.kernel@...edance.com,
atishp@...osinc.com,
cleger@...osinc.com,
ajones@...tanamicro.com,
apatel@...tanamicro.com,
linux-kernel@...r.kernel.org,
linux-riscv@...ts.infradead.org,
songshuaishuai@...ylab.org,
bjorn@...osinc.com,
charlie@...osinc.com,
masahiroy@...nel.org,
valentina.fernandezalanis@...rochip.com,
jassisinghbrar@...il.com,
conor.dooley@...rochip.com
Subject: [PATCH 3/3] riscv: crash: use NMI to stop the CPU
NMI is more robust than IPI for stopping CPUs during crashes,
especially with interrupts disabled. Add SBI_SSE_EVENT_LOCAL_CRASH_NMI
eventid to implement NMI for stopping CPUs.
Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
arch/riscv/include/asm/crash.h | 1 +
arch/riscv/include/asm/sbi.h | 1 +
arch/riscv/kernel/crash.c | 31 +++++++++++++-
drivers/firmware/riscv/sse_nmi.c | 71 +++++++++++++++++++++++++++++++-
include/linux/sse_nmi.h | 8 ++++
5 files changed, 109 insertions(+), 3 deletions(-)
create mode 100644 include/linux/sse_nmi.h
diff --git a/arch/riscv/include/asm/crash.h b/arch/riscv/include/asm/crash.h
index b64df919277d4..5076f297cbc15 100644
--- a/arch/riscv/include/asm/crash.h
+++ b/arch/riscv/include/asm/crash.h
@@ -5,6 +5,7 @@
#ifdef CONFIG_KEXEC_CORE
void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs);
+void cpu_crash_stop(unsigned int cpu, struct pt_regs *regs);
#else
static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 52d3fdf2d4cc1..65cce85237879 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -487,6 +487,7 @@ enum sbi_sse_attr_id {
#define SBI_SSE_EVENT_GLOBAL_LOW_PRIO_RAS 0x00108000
#define SBI_SSE_EVENT_LOCAL_SOFTWARE_INJECTED 0xffff0000
#define SBI_SSE_EVENT_LOCAL_UNKNOWN_NMI 0xffff0001
+#define SBI_SSE_EVENT_LOCAL_CRASH_NMI 0xffff0002
#define SBI_SSE_EVENT_GLOBAL_SOFTWARE_INJECTED 0xffff8000
#define SBI_SSE_EVENT_PLATFORM BIT(14)
diff --git a/arch/riscv/kernel/crash.c b/arch/riscv/kernel/crash.c
index 12598bbc2df04..9f3f0becfdd95 100644
--- a/arch/riscv/kernel/crash.c
+++ b/arch/riscv/kernel/crash.c
@@ -3,14 +3,16 @@
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/kexec.h>
+#include <linux/sse_nmi.h>
#include <linux/smp.h>
#include <linux/sched.h>
+#include <asm/crash.h>
#include <asm/cpu_ops.h>
static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
-inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+void cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
{
crash_save_cpu(regs, cpu);
@@ -27,6 +29,11 @@ inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
wait_for_interrupt();
}
+inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+ cpu_crash_stop(cpu, regs);
+}
+
/*
* The number of CPUs online, not counting this CPU (which may not be
* fully online and so not counted in num_online_cpus()).
@@ -38,6 +45,24 @@ static inline unsigned int num_other_online_cpus(void)
return num_online_cpus() - this_cpu_online;
}
+#ifdef CONFIG_RISCV_SSE_NMI
+static int send_nmi_stop_cpu(cpumask_t *mask)
+{
+ unsigned int cpu;
+ int ret = 0;
+
+ for_each_cpu(cpu, mask)
+ ret += carsh_nmi_stop_cpu(cpu);
+
+ return ret;
+}
+#else
+static inline int send_nmi_stop_cpu(cpumask_t *mask)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
void crash_smp_send_stop(void)
{
static int cpus_stopped;
@@ -66,7 +91,9 @@ void crash_smp_send_stop(void)
atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
pr_crit("SMP: stopping secondary CPUs\n");
- send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
+
+ if (send_nmi_stop_cpu(&mask))
+ send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
/* Wait up to one second for other CPUs to stop */
timeout = USEC_PER_SEC;
diff --git a/drivers/firmware/riscv/sse_nmi.c b/drivers/firmware/riscv/sse_nmi.c
index 2c1eaea2bbabc..152d787075345 100644
--- a/drivers/firmware/riscv/sse_nmi.c
+++ b/drivers/firmware/riscv/sse_nmi.c
@@ -4,13 +4,16 @@
#include <linux/nmi.h>
#include <linux/riscv_sbi_sse.h>
+#include <linux/sse_nmi.h>
#include <linux/sysctl.h>
+#include <asm/crash.h>
#include <asm/irq_regs.h>
#include <asm/sbi.h>
int unknown_nmi_panic;
static struct sse_event *unknown_nmi_evt;
+static struct sse_event *crash_nmi_evt;
static struct ctl_table_header *unknown_nmi_sysctl_header;
static int __init setup_unknown_nmi_panic(char *str)
@@ -32,6 +35,12 @@ const struct ctl_table unknown_nmi_table[] = {
},
};
+static inline struct sbiret sbi_sse_ecall(int fid, unsigned long arg0,
+ unsigned long arg1)
+{
+ return sbi_ecall(SBI_EXT_SSE, fid, arg0, arg1, 0, 0, 0, 0);
+}
+
static int unknown_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
{
pr_emerg("NMI received for unknown on CPU %d.\n", smp_processor_id());
@@ -73,9 +82,69 @@ static int unknown_nmi_init(void)
return ret;
}
+#ifdef CONFIG_KEXEC_CORE
+int carsh_nmi_stop_cpu(unsigned int cpu)
+{
+ unsigned int hart_id = cpuid_to_hartid_map(cpu);
+ u32 evt = SBI_SSE_EVENT_LOCAL_CRASH_NMI;
+ struct sbiret ret;
+
+ ret = sbi_sse_ecall(SBI_SSE_EVENT_INJECT, evt, hart_id);
+ if (ret.error) {
+ pr_err("Failed to signal event %x, error %ld\n", evt, ret.error);
+ return sbi_err_map_linux_errno(ret.error);
+ }
+
+ return 0;
+}
+
+static int crash_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
+{
+ cpu_crash_stop(smp_processor_id(), regs);
+
+ return 0;
+}
+
+static int crash_nmi_init(void)
+{
+ int ret;
+
+ crash_nmi_evt = sse_event_register(SBI_SSE_EVENT_LOCAL_CRASH_NMI, 0,
+ crash_nmi_handler, NULL);
+ if (IS_ERR(crash_nmi_evt))
+ return PTR_ERR(crash_nmi_evt);
+
+ ret = sse_event_enable(crash_nmi_evt);
+ if (ret) {
+ sse_event_unregister(crash_nmi_evt);
+ return ret;
+ }
+
+ pr_info("Using SSE for crash NMI event delivery\n");
+
+ return 0;
+}
+#endif
+
static int __init sse_nmi_init(void)
{
- return unknown_nmi_init();
+ int ret;
+
+ ret = unknown_nmi_init();
+ if (ret) {
+ pr_err("Unknown_nmi_init failed with error %d\n", ret);
+ return ret;
+ }
+
+#ifdef CONFIG_KEXEC_CORE
+ ret = crash_nmi_init();
+ if (ret) {
+ pr_err("Crash_nmi_init failed with error %d\n", ret);
+ return ret;
+ }
+#endif
+
+ return 0;
}
late_initcall(sse_nmi_init);
diff --git a/include/linux/sse_nmi.h b/include/linux/sse_nmi.h
new file mode 100644
index 0000000000000..548a348ac0a46
--- /dev/null
+++ b/include/linux/sse_nmi.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_RISCV_SSE_NMI_H
+#define __LINUX_RISCV_SSE_NMI_H
+
+int carsh_nmi_stop_cpu(unsigned int cpu);
+
+#endif
--
2.39.5
Powered by blists - more mailing lists