lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251027133431.15321-4-cuiyunhui@bytedance.com>
Date: Mon, 27 Oct 2025 21:34:31 +0800
From: Yunhui Cui <cuiyunhui@...edance.com>
To: paul.walmsley@...ive.com,
	palmer@...belt.com,
	aou@...s.berkeley.edu,
	alex@...ti.fr,
	conor@...nel.org,
	cuiyunhui@...edance.com,
	luxu.kernel@...edance.com,
	atishp@...osinc.com,
	cleger@...osinc.com,
	ajones@...tanamicro.com,
	apatel@...tanamicro.com,
	linux-kernel@...r.kernel.org,
	linux-riscv@...ts.infradead.org,
	songshuaishuai@...ylab.org,
	bjorn@...osinc.com,
	charlie@...osinc.com,
	masahiroy@...nel.org,
	valentina.fernandezalanis@...rochip.com,
	jassisinghbrar@...il.com,
	conor.dooley@...rochip.com
Subject: [PATCH 3/3] riscv: crash: use NMI to stop the CPU

NMI is more robust than IPI for stopping CPUs during crashes,
especially with interrupts disabled. Add SBI_SSE_EVENT_LOCAL_CRASH_NMI
eventid to implement NMI for stopping CPUs.

Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
---
 arch/riscv/include/asm/crash.h   |  1 +
 arch/riscv/include/asm/sbi.h     |  1 +
 arch/riscv/kernel/crash.c        | 31 +++++++++++++-
 drivers/firmware/riscv/sse_nmi.c | 71 +++++++++++++++++++++++++++++++-
 include/linux/sse_nmi.h          |  8 ++++
 5 files changed, 109 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/sse_nmi.h

diff --git a/arch/riscv/include/asm/crash.h b/arch/riscv/include/asm/crash.h
index b64df919277d4..5076f297cbc15 100644
--- a/arch/riscv/include/asm/crash.h
+++ b/arch/riscv/include/asm/crash.h
@@ -5,6 +5,7 @@
 
 #ifdef CONFIG_KEXEC_CORE
 void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs);
+void cpu_crash_stop(unsigned int cpu, struct pt_regs *regs);
 #else
 static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
 {
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 52d3fdf2d4cc1..65cce85237879 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -487,6 +487,7 @@ enum sbi_sse_attr_id {
 #define SBI_SSE_EVENT_GLOBAL_LOW_PRIO_RAS	0x00108000
 #define SBI_SSE_EVENT_LOCAL_SOFTWARE_INJECTED	0xffff0000
 #define SBI_SSE_EVENT_LOCAL_UNKNOWN_NMI		0xffff0001
+#define SBI_SSE_EVENT_LOCAL_CRASH_NMI		0xffff0002
 #define SBI_SSE_EVENT_GLOBAL_SOFTWARE_INJECTED	0xffff8000
 
 #define SBI_SSE_EVENT_PLATFORM		BIT(14)
diff --git a/arch/riscv/kernel/crash.c b/arch/riscv/kernel/crash.c
index 12598bbc2df04..9f3f0becfdd95 100644
--- a/arch/riscv/kernel/crash.c
+++ b/arch/riscv/kernel/crash.c
@@ -3,14 +3,16 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/kexec.h>
+#include <linux/sse_nmi.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
 
+#include <asm/crash.h>
 #include <asm/cpu_ops.h>
 
 static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0);
 
-inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+void cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
 {
 	crash_save_cpu(regs, cpu);
 
@@ -27,6 +29,11 @@ inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
 		wait_for_interrupt();
 }
 
+inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
+{
+	cpu_crash_stop(cpu, regs);
+}
+
 /*
  * The number of CPUs online, not counting this CPU (which may not be
  * fully online and so not counted in num_online_cpus()).
@@ -38,6 +45,24 @@ static inline unsigned int num_other_online_cpus(void)
 	return num_online_cpus() - this_cpu_online;
 }
 
+#ifdef CONFIG_RISCV_SSE_NMI
+static int send_nmi_stop_cpu(cpumask_t *mask)
+{
+	unsigned int cpu;
+	int ret = 0;
+
+	for_each_cpu(cpu, mask)
+		ret += carsh_nmi_stop_cpu(cpu);
+
+	return ret;
+}
+#else
+static inline int send_nmi_stop_cpu(cpumask_t *mask)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 void crash_smp_send_stop(void)
 {
 	static int cpus_stopped;
@@ -66,7 +91,9 @@ void crash_smp_send_stop(void)
 	atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
 
 	pr_crit("SMP: stopping secondary CPUs\n");
-	send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
+
+	if (send_nmi_stop_cpu(&mask))
+		send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
 
 	/* Wait up to one second for other CPUs to stop */
 	timeout = USEC_PER_SEC;
diff --git a/drivers/firmware/riscv/sse_nmi.c b/drivers/firmware/riscv/sse_nmi.c
index 2c1eaea2bbabc..152d787075345 100644
--- a/drivers/firmware/riscv/sse_nmi.c
+++ b/drivers/firmware/riscv/sse_nmi.c
@@ -4,13 +4,16 @@
 
 #include <linux/nmi.h>
 #include <linux/riscv_sbi_sse.h>
+#include <linux/sse_nmi.h>
 #include <linux/sysctl.h>
 
+#include <asm/crash.h>
 #include <asm/irq_regs.h>
 #include <asm/sbi.h>
 
 int unknown_nmi_panic;
 static struct sse_event *unknown_nmi_evt;
+static struct sse_event *crash_nmi_evt;
 static struct ctl_table_header *unknown_nmi_sysctl_header;
 
 static int __init setup_unknown_nmi_panic(char *str)
@@ -32,6 +35,12 @@ const struct ctl_table unknown_nmi_table[] = {
 	},
 };
 
+static inline struct sbiret sbi_sse_ecall(int fid, unsigned long arg0,
+					  unsigned long arg1)
+{
+	return sbi_ecall(SBI_EXT_SSE, fid, arg0, arg1, 0, 0, 0, 0);
+}
+
 static int unknown_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
 {
 	pr_emerg("NMI received for unknown on CPU %d.\n", smp_processor_id());
@@ -73,9 +82,69 @@ static int unknown_nmi_init(void)
 	return ret;
 }
 
+#ifdef CONFIG_KEXEC_CORE
+int carsh_nmi_stop_cpu(unsigned int cpu)
+{
+	unsigned int hart_id = cpuid_to_hartid_map(cpu);
+	u32 evt = SBI_SSE_EVENT_LOCAL_CRASH_NMI;
+	struct sbiret ret;
+
+	ret = sbi_sse_ecall(SBI_SSE_EVENT_INJECT, evt, hart_id);
+	if (ret.error) {
+		pr_err("Failed to signal event %x, error %ld\n", evt, ret.error);
+		return sbi_err_map_linux_errno(ret.error);
+	}
+
+	return 0;
+}
+
+static int crash_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
+{
+	cpu_crash_stop(smp_processor_id(), regs);
+
+	return 0;
+}
+
+static int crash_nmi_init(void)
+{
+	int ret;
+
+	crash_nmi_evt = sse_event_register(SBI_SSE_EVENT_LOCAL_CRASH_NMI, 0,
+				 crash_nmi_handler, NULL);
+	if (IS_ERR(crash_nmi_evt))
+		return PTR_ERR(crash_nmi_evt);
+
+	ret = sse_event_enable(crash_nmi_evt);
+	if (ret) {
+		sse_event_unregister(crash_nmi_evt);
+		return ret;
+	}
+
+	pr_info("Using SSE for crash NMI event delivery\n");
+
+	return 0;
+}
+#endif
+
 static int __init sse_nmi_init(void)
 {
-	return unknown_nmi_init();
+	int ret;
+
+	ret = unknown_nmi_init();
+	if (ret) {
+		pr_err("Unknown_nmi_init failed with error %d\n", ret);
+		return ret;
+	}
+
+#ifdef CONFIG_KEXEC_CORE
+	ret = crash_nmi_init();
+	if (ret) {
+		pr_err("Crash_nmi_init failed with error %d\n", ret);
+		return ret;
+	}
+#endif
+
+	return 0;
 }
 
 late_initcall(sse_nmi_init);
diff --git a/include/linux/sse_nmi.h b/include/linux/sse_nmi.h
new file mode 100644
index 0000000000000..548a348ac0a46
--- /dev/null
+++ b/include/linux/sse_nmi.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_RISCV_SSE_NMI_H
+#define __LINUX_RISCV_SSE_NMI_H
+
+int carsh_nmi_stop_cpu(unsigned int cpu);
+
+#endif
-- 
2.39.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ