[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAEEQ3wkpWExOg+xLEy18Le+89kowgu6LOj0HMCVcqJKG+7Wdhg@mail.gmail.com>
Date: Thu, 27 Nov 2025 18:13:51 +0800
From: yunhui cui <cuiyunhui@...edance.com>
To: conor@...nel.org, paul.walmsley@...ive.com, palmer@...belt.com,
aou@...s.berkeley.edu, alex@...ti.fr, cuiyunhui@...edance.com,
luxu.kernel@...edance.com, linux-kernel@...r.kernel.org,
linux-riscv@...ts.infradead.org, jassisinghbrar@...il.com,
conor.dooley@...rochip.com, valentina.fernandezalanis@...rochip.com,
catalin.marinas@....com, will@...nel.org, maz@...nel.org,
timothy.hayes@....com, lpieralisi@...nel.org, arnd@...db.de, kees@...nel.org,
tglx@...utronix.de, viresh.kumar@...aro.org, boqun.feng@...il.com,
linux-arm-kernel@...ts.infradead.org, cleger@...osinc.com,
atishp@...osinc.com, ajones@...tanamicro.com
Subject: Re: [PATCH v2 4/8] riscv: smp: use NMI for crash stop
Hi All,
On Tue, Nov 18, 2025 at 10:51 AM Yunhui Cui <cuiyunhui@...edance.com> wrote:
>
> Use NMI instead of IPI for crash stop if RISC-V SSE NMI is supported.
>
> Signed-off-by: Yunhui Cui <cuiyunhui@...edance.com>
> ---
> arch/riscv/kernel/smp.c | 14 +++++++++++++-
> drivers/firmware/riscv/riscv_sse_nmi.c | 10 ++++++++++
> 2 files changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
> index 669325e68a21a..07ccc28f52172 100644
> --- a/arch/riscv/kernel/smp.c
> +++ b/arch/riscv/kernel/smp.c
> @@ -16,6 +16,7 @@
> #include <linux/kgdb.h>
> #include <linux/percpu.h>
> #include <linux/profile.h>
> +#include <linux/riscv_sse_nmi.h>
> #include <linux/smp.h>
> #include <linux/sched.h>
> #include <linux/seq_file.h>
> @@ -300,7 +301,18 @@ void crash_smp_send_stop(void)
> atomic_set(&waiting_for_crash_ipi, num_other_online_cpus());
>
> pr_crit("SMP: stopping secondary CPUs\n");
> - send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
> +
> + /*
> + * IPI performs better than NMI, but attempting IPI first and
> + * falling back to NMI on failure requires recording CPUs that failed
> + * to stop. This adds complexity to cpu_crash_stop(). Since this operation
> + * is rare and typically in the final phase, directly replace IPI
> + * with NMI.
> + */
> + if (!nmi_support())
> + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP);
> + else
> + send_nmi_mask(&mask, LOCAL_NMI_CRASH);
>
> /* Wait up to one second for other CPUs to stop */
> timeout = USEC_PER_SEC;
> diff --git a/drivers/firmware/riscv/riscv_sse_nmi.c b/drivers/firmware/riscv/riscv_sse_nmi.c
> index 1763f43961ab6..e4c20dce40f9a 100644
> --- a/drivers/firmware/riscv/riscv_sse_nmi.c
> +++ b/drivers/firmware/riscv/riscv_sse_nmi.c
> @@ -10,6 +10,9 @@
> #include <asm/sbi.h>
> #include <asm/smp.h>
>
> +#define NMI_HANDLE(mask, func, ...) \
> + do { if (type & (mask)) func(__VA_ARGS__); } while (0)
> +
> bool nmi_available;
> static struct sse_event *local_nmi_evt;
> static atomic_t local_nmi_arg = ATOMIC_INIT(LOCAL_NMI_NONE);
> @@ -48,6 +51,13 @@ void send_nmi_mask(cpumask_t *mask, enum local_nmi_type type)
>
> static int local_nmi_handler(u32 evt, void *arg, struct pt_regs *regs)
> {
> + enum local_nmi_type type = atomic_read((atomic_t *)arg);
> + unsigned int cpu = smp_processor_id();
> +
> + NMI_HANDLE(LOCAL_NMI_CRASH, cpu_crash_stop, cpu, regs);
> +
> + atomic_set(&local_nmi_arg, LOCAL_NMI_NONE);
Do not perform direct zero clearing on local_nmi_arg; instead, only
clear the processed bits. Otherwise, it may miss responses to some NMI
types.
> +
> return 0;
> }
>
> --
> 2.39.5
>
Thanks,
Yunhui
Powered by blists - more mailing lists