[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mhng-60581B88-6FC7-4349-96B6-730D908ABF4A@palmerdabbelt-mac>
Date: Mon, 23 Jun 2025 15:17:19 -0700 (PDT)
From: Palmer Dabbelt <palmer@...belt.com>
To: namcao@...utronix.de
CC: Paul Walmsley <paul.walmsley@...ive.com>, aou@...s.berkeley.edu,
Alexandre Ghiti <alex@...ti.fr>, bigeasy@...utronix.de, clrkwllms@...nel.org, rostedt@...dmis.org,
linux-rt-devel@...ts.linux.dev, linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org,
namcao@...utronix.de
Subject: Re: [PATCH] riscv: Enable interrupt during exception handling
On Fri, 20 Jun 2025 04:43:46 PDT (-0700), namcao@...utronix.de wrote:
> force_sig_fault() takes a spinlock, which is a sleeping lock with
> CONFIG_PREEMPT_RT=y. However, exception handling calls force_sig_fault()
> with interrupt disabled, causing a sleeping in atomic context warning.
>
> This can be reproduced using userspace programs such as:
> int main() { asm ("ebreak"); }
> or
> int main() { asm ("unimp"); }
>
> There is no reason that interrupt must be disabled during exception
> handling.
Looks like they used to and we lost it as of f0bddf50586d ("riscv:
entry: Convert to generic entry"), where we'd check the PIE bit and
quickly re-enable interrupts if they were enabled before.
This way of doing it also seems fine, and it's kind of nice that we can
just read the code to figure out how the interrupt contexts are managed.
So I think it's fine to keep it this way, even though it's more code.
I'm kind of split on a Fixes tag here. One could argue it's a
regression, as having interrupts disabled during exceptions is going to
cause all sorts of performance issues for users. Seems a bit risk to
just backport, though...
That said, if nobody noticed then it's probably a good sign nobody is
really paying attention and we should just backport it before anyone
notices...
> Considering the previous struggle with a similar bug [1][2], fix
> this problem once for all by enabling interrupt during exception handling
> whenever possible:
> - If exception comes from user (interrupt handling was for sure enabled)
> - If exception comes from kernel and interrupt handling was enabled
>
> This also has the added benefit of avoiding unnecessary delays in interrupt
> handling.
>
> This patch mimics x86's implementation.
>
> Link: https://lore.kernel.org/linux-riscv/20250411073850.3699180-3-nylon.chen@sifive.com [1]
> Link: https://lore.kernel.org/linux-riscv/20250422162324.956065-3-cleger@rivosinc.com [2]
> Suggested-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
> Signed-off-by: Nam Cao <namcao@...utronix.de>
> ---
> arch/riscv/kernel/traps.c | 36 ++++++++++++++++++++++++++++++------
> arch/riscv/mm/fault.c | 4 ----
> 2 files changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index 9c83848797a78..f7d2372dc0eb6 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -6,6 +6,7 @@
> #include <linux/cpu.h>
> #include <linux/kernel.h>
> #include <linux/init.h>
> +#include <linux/irqflags.h>
> #include <linux/randomize_kstack.h>
> #include <linux/sched.h>
> #include <linux/sched/debug.h>
> @@ -72,6 +73,18 @@ static void dump_instr(const char *loglvl, struct pt_regs *regs)
> printk("%sCode: %s\n", loglvl, str);
> }
>
> +static void cond_local_irq_enable(struct pt_regs *regs)
> +{
> + if (!regs_irqs_disabled(regs))
> + local_irq_enable();
> +}
> +
> +static void cond_local_irq_disable(struct pt_regs *regs)
> +{
> + if (!regs_irqs_disabled(regs))
> + local_irq_disable();
> +}
> +
> void die(struct pt_regs *regs, const char *str)
> {
> static int die_counter;
> @@ -151,11 +164,15 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs) \
> { \
> if (user_mode(regs)) { \
> irqentry_enter_from_user_mode(regs); \
> + local_irq_enable(); \
> do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
> + local_irq_disable(); \
> irqentry_exit_to_user_mode(regs); \
> } else { \
> irqentry_state_t state = irqentry_nmi_enter(regs); \
> + cond_local_irq_enable(regs); \
> do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
> + cond_local_irq_disable(regs); \
> irqentry_nmi_exit(regs, state); \
> } \
> }
> @@ -173,24 +190,23 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
>
> if (user_mode(regs)) {
> irqentry_enter_from_user_mode(regs);
> -
> local_irq_enable();
>
> handled = riscv_v_first_use_handler(regs);
> -
> - local_irq_disable();
> -
> if (!handled)
> do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
> "Oops - illegal instruction");
>
> + local_irq_disable();
> irqentry_exit_to_user_mode(regs);
> } else {
> irqentry_state_t state = irqentry_nmi_enter(regs);
> + cond_local_irq_enable(regs);
>
> do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
> "Oops - illegal instruction");
>
> + cond_local_irq_disable(regs);
> irqentry_nmi_exit(regs, state);
> }
> }
> @@ -225,6 +241,7 @@ static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type
> local_irq_enable();
> } else {
> state = irqentry_nmi_enter(regs);
> + cond_local_irq_enable(regs);
> }
>
> if (misaligned_handler[type].handler(regs))
> @@ -235,6 +252,7 @@ static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type
> local_irq_disable();
> irqentry_exit_to_user_mode(regs);
> } else {
> + cond_local_irq_disable(regs);
> irqentry_nmi_exit(regs, state);
> }
> }
> @@ -308,15 +326,19 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
> {
> if (user_mode(regs)) {
> irqentry_enter_from_user_mode(regs);
> + local_irq_enable();
>
> handle_break(regs);
>
> + local_irq_disable();
> irqentry_exit_to_user_mode(regs);
> } else {
> irqentry_state_t state = irqentry_nmi_enter(regs);
> + cond_local_irq_enable(regs);
>
> handle_break(regs);
>
> + cond_local_irq_disable(regs);
> irqentry_nmi_exit(regs, state);
> }
> }
> @@ -355,10 +377,12 @@ void do_trap_ecall_u(struct pt_regs *regs)
> syscall_exit_to_user_mode(regs);
> } else {
> irqentry_state_t state = irqentry_nmi_enter(regs);
> + cond_local_irq_enable(regs);
>
> do_trap_error(regs, SIGILL, ILL_ILLTRP, regs->epc,
> "Oops - environment call from U-mode");
>
> + cond_local_irq_disable(regs);
> irqentry_nmi_exit(regs, state);
> }
>
> @@ -368,11 +392,11 @@ void do_trap_ecall_u(struct pt_regs *regs)
> asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
> {
> irqentry_state_t state = irqentry_enter(regs);
> + cond_local_irq_enable(regs);
>
> handle_page_fault(regs);
>
> - local_irq_disable();
> -
> + cond_local_irq_disable(regs);
> irqentry_exit(regs, state);
> }
> #endif
> diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> index 0194324a0c506..6d23ed0ce8a28 100644
> --- a/arch/riscv/mm/fault.c
> +++ b/arch/riscv/mm/fault.c
> @@ -306,10 +306,6 @@ void handle_page_fault(struct pt_regs *regs)
> return;
> }
>
> - /* Enable interrupts if they were enabled in the parent context. */
> - if (!regs_irqs_disabled(regs))
> - local_irq_enable();
> -
> /*
> * If we're in an interrupt, have no user context, or are running
> * in an atomic region, then we must not take the fault.
> --
> 2.39.5
Powered by blists - more mailing lists