lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mhng-60581B88-6FC7-4349-96B6-730D908ABF4A@palmerdabbelt-mac>
Date: Mon, 23 Jun 2025 15:17:19 -0700 (PDT)
From: Palmer Dabbelt <palmer@...belt.com>
To: namcao@...utronix.de
CC: Paul Walmsley <paul.walmsley@...ive.com>, aou@...s.berkeley.edu,
  Alexandre Ghiti <alex@...ti.fr>, bigeasy@...utronix.de, clrkwllms@...nel.org, rostedt@...dmis.org,
  linux-rt-devel@...ts.linux.dev, linux-riscv@...ts.infradead.org, linux-kernel@...r.kernel.org,
  namcao@...utronix.de
Subject:     Re: [PATCH] riscv: Enable interrupt during exception handling

On Fri, 20 Jun 2025 04:43:46 PDT (-0700), namcao@...utronix.de wrote:
> force_sig_fault() takes a spinlock, which is a sleeping lock with
> CONFIG_PREEMPT_RT=y. However, exception handling calls force_sig_fault()
> with interrupt disabled, causing a sleeping in atomic context warning.
>
> This can be reproduced using userspace programs such as:
>     int main() { asm ("ebreak"); }
> or
>     int main() { asm ("unimp"); }
>
> There is no reason that interrupt must be disabled during exception
> handling.

Looks like they used to and we lost it as of f0bddf50586d ("riscv: 
entry: Convert to generic entry"), where we'd check the PIE bit and 
quickly re-enable interrupts if they were enabled before.

This way of doing it also seems fine, and it's kind of nice that we can 
just read the code to figure out how the interrupt contexts are managed.  
So I think it's fine to keep it this way, even though it's more code.

I'm kind of split on a Fixes tag here.  One could argue it's a 
regression, as having interrupts disabled during exceptions is going to 
cause all sorts of performance issues for users.  Seems a bit risk to 
just backport, though...

That said, if nobody noticed then it's probably a good sign nobody is 
really paying attention and we should just backport it before anyone 
notices...

> Considering the previous struggle with a similar bug [1][2], fix
> this problem once for all by enabling interrupt during exception handling
> whenever possible:
>   - If exception comes from user (interrupt handling was for sure enabled)
>   - If exception comes from kernel and interrupt handling was enabled
>
> This also has the added benefit of avoiding unnecessary delays in interrupt
> handling.
>
> This patch mimics x86's implementation.
>
> Link: https://lore.kernel.org/linux-riscv/20250411073850.3699180-3-nylon.chen@sifive.com [1]
> Link: https://lore.kernel.org/linux-riscv/20250422162324.956065-3-cleger@rivosinc.com [2]
> Suggested-by: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
> Signed-off-by: Nam Cao <namcao@...utronix.de>
> ---
>  arch/riscv/kernel/traps.c | 36 ++++++++++++++++++++++++++++++------
>  arch/riscv/mm/fault.c     |  4 ----
>  2 files changed, 30 insertions(+), 10 deletions(-)
>
> diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
> index 9c83848797a78..f7d2372dc0eb6 100644
> --- a/arch/riscv/kernel/traps.c
> +++ b/arch/riscv/kernel/traps.c
> @@ -6,6 +6,7 @@
>  #include <linux/cpu.h>
>  #include <linux/kernel.h>
>  #include <linux/init.h>
> +#include <linux/irqflags.h>
>  #include <linux/randomize_kstack.h>
>  #include <linux/sched.h>
>  #include <linux/sched/debug.h>
> @@ -72,6 +73,18 @@ static void dump_instr(const char *loglvl, struct pt_regs *regs)
>  	printk("%sCode: %s\n", loglvl, str);
>  }
>  
> +static void cond_local_irq_enable(struct pt_regs *regs)
> +{
> +	if (!regs_irqs_disabled(regs))
> +		local_irq_enable();
> +}
> +
> +static void cond_local_irq_disable(struct pt_regs *regs)
> +{
> +	if (!regs_irqs_disabled(regs))
> +		local_irq_disable();
> +}
> +
>  void die(struct pt_regs *regs, const char *str)
>  {
>  	static int die_counter;
> @@ -151,11 +164,15 @@ asmlinkage __visible __trap_section void name(struct pt_regs *regs)		\
>  {										\
>  	if (user_mode(regs)) {							\
>  		irqentry_enter_from_user_mode(regs);				\
> +		local_irq_enable();						\
>  		do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
> +		local_irq_disable();						\
>  		irqentry_exit_to_user_mode(regs);				\
>  	} else {								\
>  		irqentry_state_t state = irqentry_nmi_enter(regs);		\
> +		cond_local_irq_enable(regs);					\
>  		do_trap_error(regs, signo, code, regs->epc, "Oops - " str);	\
> +		cond_local_irq_disable(regs);					\
>  		irqentry_nmi_exit(regs, state);					\
>  	}									\
>  }
> @@ -173,24 +190,23 @@ asmlinkage __visible __trap_section void do_trap_insn_illegal(struct pt_regs *re
>  
>  	if (user_mode(regs)) {
>  		irqentry_enter_from_user_mode(regs);
> -
>  		local_irq_enable();
>  
>  		handled = riscv_v_first_use_handler(regs);
> -
> -		local_irq_disable();
> -
>  		if (!handled)
>  			do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
>  				      "Oops - illegal instruction");
>  
> +		local_irq_disable();
>  		irqentry_exit_to_user_mode(regs);
>  	} else {
>  		irqentry_state_t state = irqentry_nmi_enter(regs);
> +		cond_local_irq_enable(regs);
>  
>  		do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->epc,
>  			      "Oops - illegal instruction");
>  
> +		cond_local_irq_disable(regs);
>  		irqentry_nmi_exit(regs, state);
>  	}
>  }
> @@ -225,6 +241,7 @@ static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type
>  		local_irq_enable();
>  	} else {
>  		state = irqentry_nmi_enter(regs);
> +		cond_local_irq_enable(regs);
>  	}
>  
>  	if (misaligned_handler[type].handler(regs))
> @@ -235,6 +252,7 @@ static void do_trap_misaligned(struct pt_regs *regs, enum misaligned_access_type
>  		local_irq_disable();
>  		irqentry_exit_to_user_mode(regs);
>  	} else {
> +		cond_local_irq_disable(regs);
>  		irqentry_nmi_exit(regs, state);
>  	}
>  }
> @@ -308,15 +326,19 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
>  {
>  	if (user_mode(regs)) {
>  		irqentry_enter_from_user_mode(regs);
> +		local_irq_enable();
>  
>  		handle_break(regs);
>  
> +		local_irq_disable();
>  		irqentry_exit_to_user_mode(regs);
>  	} else {
>  		irqentry_state_t state = irqentry_nmi_enter(regs);
> +		cond_local_irq_enable(regs);
>  
>  		handle_break(regs);
>  
> +		cond_local_irq_disable(regs);
>  		irqentry_nmi_exit(regs, state);
>  	}
>  }
> @@ -355,10 +377,12 @@ void do_trap_ecall_u(struct pt_regs *regs)
>  		syscall_exit_to_user_mode(regs);
>  	} else {
>  		irqentry_state_t state = irqentry_nmi_enter(regs);
> +		cond_local_irq_enable(regs);
>  
>  		do_trap_error(regs, SIGILL, ILL_ILLTRP, regs->epc,
>  			"Oops - environment call from U-mode");
>  
> +		cond_local_irq_disable(regs);
>  		irqentry_nmi_exit(regs, state);
>  	}
>  
> @@ -368,11 +392,11 @@ void do_trap_ecall_u(struct pt_regs *regs)
>  asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
>  {
>  	irqentry_state_t state = irqentry_enter(regs);
> +	cond_local_irq_enable(regs);
>  
>  	handle_page_fault(regs);
>  
> -	local_irq_disable();
> -
> +	cond_local_irq_disable(regs);
>  	irqentry_exit(regs, state);
>  }
>  #endif
> diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
> index 0194324a0c506..6d23ed0ce8a28 100644
> --- a/arch/riscv/mm/fault.c
> +++ b/arch/riscv/mm/fault.c
> @@ -306,10 +306,6 @@ void handle_page_fault(struct pt_regs *regs)
>  		return;
>  	}
>  
> -	/* Enable interrupts if they were enabled in the parent context. */
> -	if (!regs_irqs_disabled(regs))
> -		local_irq_enable();
> -
>  	/*
>  	 * If we're in an interrupt, have no user context, or are running
>  	 * in an atomic region, then we must not take the fault.
> -- 
> 2.39.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ