lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <89a25fd0-2d49-4062-a194-55f21350e5a8@kernel.org>
Date: Tue, 16 Dec 2025 10:46:28 +0100
From: "Christophe Leroy (CS GROUP)" <chleroy@...nel.org>
To: Mukesh Kumar Chaurasiya <mkchauras@...ux.ibm.com>, maddy@...ux.ibm.com,
 mpe@...erman.id.au, npiggin@...il.com, oleg@...hat.com, kees@...nel.org,
 luto@...capital.net, wad@...omium.org, mchauras@...ux.ibm.com,
 thuth@...hat.com, sshegde@...ux.ibm.com, charlie@...osinc.com,
 macro@...am.me.uk, akpm@...ux-foundation.org, ldv@...ace.io, deller@....de,
 ankur.a.arora@...cle.com, segher@...nel.crashing.org, tglx@...utronix.de,
 thomas.weissschuh@...utronix.de, peterz@...radead.org,
 menglong8.dong@...il.com, bigeasy@...utronix.de, namcao@...utronix.de,
 kan.liang@...ux.intel.com, mingo@...nel.org, atrajeev@...ux.vnet.ibm.com,
 mark.barnett@....com, linuxppc-dev@...ts.ozlabs.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 4/8] powerpc: Introduce syscall exit arch functions



Le 14/12/2025 à 14:02, Mukesh Kumar Chaurasiya a écrit :
> From: Mukesh Kumar Chaurasiya <mchauras@...ux.ibm.com>
> 
> Add PowerPC-specific implementations of the generic syscall exit hooks
> used by the generic entry/exit framework:
> 
>   - arch_exit_to_user_mode_work_prepare()
>   - arch_exit_to_user_mode_work()
> 
> These helpers handle user state restoration when returning from the
> kernel to userspace, including FPU/VMX/VSX state, transactional memory,
> KUAP restore, and per-CPU accounting.
> 
> Additionally, move check_return_regs_valid() from interrupt.c to
> interrupt.h so it can be shared by the new entry/exit logic, and add
> arch_do_signal_or_restart() for use with the generic entry flow.
> 
> No functional change is intended with this patch.
> 
> Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@...ux.ibm.com>
> ---
>   arch/powerpc/include/asm/entry-common.h | 49 +++++++++++++++
>   arch/powerpc/include/asm/interrupt.h    | 82 +++++++++++++++++++++++++
>   arch/powerpc/kernel/interrupt.c         | 81 ------------------------
>   arch/powerpc/kernel/signal.c            | 14 +++++
>   4 files changed, 145 insertions(+), 81 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
> index 093ece06ef79..e8ebd42a4e6d 100644
> --- a/arch/powerpc/include/asm/entry-common.h
> +++ b/arch/powerpc/include/asm/entry-common.h
> @@ -8,6 +8,7 @@
>   #include <asm/cputime.h>
>   #include <asm/interrupt.h>
>   #include <asm/stacktrace.h>
> +#include <asm/switch_to.h>
>   #include <asm/tm.h>
>   
>   static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
> @@ -104,5 +105,53 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
>   
>   #define arch_enter_from_user_mode arch_enter_from_user_mode
>   
> +static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
> +						  unsigned long ti_work)
> +{
> +	unsigned long mathflags;
> +
> +	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
> +		if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
> +		    unlikely((ti_work & _TIF_RESTORE_TM))) {
> +			restore_tm_state(regs);
> +		} else {
> +			mathflags = MSR_FP;
> +
> +			if (cpu_has_feature(CPU_FTR_VSX))
> +				mathflags |= MSR_VEC | MSR_VSX;
> +			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
> +				mathflags |= MSR_VEC;
> +
> +			/*
> +			 * If userspace MSR has all available FP bits set,
> +			 * then they are live and no need to restore. If not,
> +			 * it means the regs were given up and restore_math
> +			 * may decide to restore them (to avoid taking an FP
> +			 * fault).
> +			 */
> +			if ((regs->msr & mathflags) != mathflags)
> +				restore_math(regs);
> +		}
> +	}
> +
> +	check_return_regs_valid(regs);
> +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> +	local_paca->tm_scratch = regs->msr;
> +#endif
> +	/* Restore user access locks last */
> +	kuap_user_restore(regs);
> +}
> +
> +#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
> +
> +static __always_inline void arch_exit_to_user_mode(void)
> +{
> +	booke_load_dbcr0();
> +
> +	account_cpu_user_exit();
> +}
> +
> +#define arch_exit_to_user_mode arch_exit_to_user_mode
> +
>   #endif /* CONFIG_GENERIC_IRQ_ENTRY */
>   #endif /* _ASM_PPC_ENTRY_COMMON_H */
> diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
> index ca8a2cda9400..77ff8e33f8cd 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -68,6 +68,8 @@
>   
>   #include <linux/context_tracking.h>
>   #include <linux/hardirq.h>
> +#include <linux/sched/debug.h> /* for show_regs */
> +
>   #include <asm/cputime.h>
>   #include <asm/firmware.h>
>   #include <asm/ftrace.h>
> @@ -172,6 +174,86 @@ static inline void booke_restore_dbcr0(void)
>   #endif
>   }
>   
> +static inline void check_return_regs_valid(struct pt_regs *regs)

This was previously a notrace function. Should it be marked 
__always_inline instead of just inline ?

> +{
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	unsigned long trap, srr0, srr1;
> +	static bool warned;
> +	u8 *validp;
> +	char *h;
> +
> +	if (trap_is_scv(regs))
> +		return;
> +
> +	trap = TRAP(regs);
> +	// EE in HV mode sets HSRRs like 0xea0
> +	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
> +		trap = 0xea0;
> +
> +	switch (trap) {
> +	case 0x980:
> +	case INTERRUPT_H_DATA_STORAGE:
> +	case 0xe20:
> +	case 0xe40:
> +	case INTERRUPT_HMI:
> +	case 0xe80:
> +	case 0xea0:
> +	case INTERRUPT_H_FAC_UNAVAIL:
> +	case 0x1200:
> +	case 0x1500:
> +	case 0x1600:
> +	case 0x1800:
> +		validp = &local_paca->hsrr_valid;
> +		if (!READ_ONCE(*validp))
> +			return;
> +
> +		srr0 = mfspr(SPRN_HSRR0);
> +		srr1 = mfspr(SPRN_HSRR1);
> +		h = "H";
> +
> +		break;
> +	default:
> +		validp = &local_paca->srr_valid;
> +		if (!READ_ONCE(*validp))
> +			return;
> +
> +		srr0 = mfspr(SPRN_SRR0);
> +		srr1 = mfspr(SPRN_SRR1);
> +		h = "";
> +		break;
> +	}
> +
> +	if (srr0 == regs->nip && srr1 == regs->msr)
> +		return;
> +
> +	/*
> +	 * A NMI / soft-NMI interrupt may have come in after we found
> +	 * srr_valid and before the SRRs are loaded. The interrupt then
> +	 * comes in and clobbers SRRs and clears srr_valid. Then we load
> +	 * the SRRs here and test them above and find they don't match.
> +	 *
> +	 * Test validity again after that, to catch such false positives.
> +	 *
> +	 * This test in general will have some window for false negatives
> +	 * and may not catch and fix all such cases if an NMI comes in
> +	 * later and clobbers SRRs without clearing srr_valid, but hopefully
> +	 * such things will get caught most of the time, statistically
> +	 * enough to be able to get a warning out.
> +	 */
> +	if (!READ_ONCE(*validp))
> +		return;
> +
> +	if (!data_race(warned)) {
> +		data_race(warned = true);
> +		pr_warn("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
> +		pr_warn("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
> +		show_regs(regs);
> +	}
> +
> +	WRITE_ONCE(*validp, 0); /* fixup */
> +#endif
> +}
> +
>   static inline void interrupt_enter_prepare(struct pt_regs *regs)
>   {
>   #ifdef CONFIG_PPC64
> diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
> index 2a09ac5dabd6..f53d432f6087 100644
> --- a/arch/powerpc/kernel/interrupt.c
> +++ b/arch/powerpc/kernel/interrupt.c
> @@ -4,7 +4,6 @@
>   #include <linux/err.h>
>   #include <linux/compat.h>
>   #include <linux/rseq.h>
> -#include <linux/sched/debug.h> /* for show_regs */
>   
>   #include <asm/kup.h>
>   #include <asm/cputime.h>
> @@ -78,86 +77,6 @@ static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
>   	return true;
>   }
>   
> -static notrace void check_return_regs_valid(struct pt_regs *regs)
> -{
> -#ifdef CONFIG_PPC_BOOK3S_64
> -	unsigned long trap, srr0, srr1;
> -	static bool warned;
> -	u8 *validp;
> -	char *h;
> -
> -	if (trap_is_scv(regs))
> -		return;
> -
> -	trap = TRAP(regs);
> -	// EE in HV mode sets HSRRs like 0xea0
> -	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
> -		trap = 0xea0;
> -
> -	switch (trap) {
> -	case 0x980:
> -	case INTERRUPT_H_DATA_STORAGE:
> -	case 0xe20:
> -	case 0xe40:
> -	case INTERRUPT_HMI:
> -	case 0xe80:
> -	case 0xea0:
> -	case INTERRUPT_H_FAC_UNAVAIL:
> -	case 0x1200:
> -	case 0x1500:
> -	case 0x1600:
> -	case 0x1800:
> -		validp = &local_paca->hsrr_valid;
> -		if (!READ_ONCE(*validp))
> -			return;
> -
> -		srr0 = mfspr(SPRN_HSRR0);
> -		srr1 = mfspr(SPRN_HSRR1);
> -		h = "H";
> -
> -		break;
> -	default:
> -		validp = &local_paca->srr_valid;
> -		if (!READ_ONCE(*validp))
> -			return;
> -
> -		srr0 = mfspr(SPRN_SRR0);
> -		srr1 = mfspr(SPRN_SRR1);
> -		h = "";
> -		break;
> -	}
> -
> -	if (srr0 == regs->nip && srr1 == regs->msr)
> -		return;
> -
> -	/*
> -	 * A NMI / soft-NMI interrupt may have come in after we found
> -	 * srr_valid and before the SRRs are loaded. The interrupt then
> -	 * comes in and clobbers SRRs and clears srr_valid. Then we load
> -	 * the SRRs here and test them above and find they don't match.
> -	 *
> -	 * Test validity again after that, to catch such false positives.
> -	 *
> -	 * This test in general will have some window for false negatives
> -	 * and may not catch and fix all such cases if an NMI comes in
> -	 * later and clobbers SRRs without clearing srr_valid, but hopefully
> -	 * such things will get caught most of the time, statistically
> -	 * enough to be able to get a warning out.
> -	 */
> -	if (!READ_ONCE(*validp))
> -		return;
> -
> -	if (!data_race(warned)) {
> -		data_race(warned = true);
> -		printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
> -		printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
> -		show_regs(regs);
> -	}
> -
> -	WRITE_ONCE(*validp, 0); /* fixup */
> -#endif
> -}
> -
>   static notrace unsigned long
>   interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
>   {
> diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
> index aa17e62f3754..719930cf4ae1 100644
> --- a/arch/powerpc/kernel/signal.c
> +++ b/arch/powerpc/kernel/signal.c
> @@ -22,6 +22,11 @@
>   
>   #include "signal.h"
>   
> +/* This will be removed */
> +#ifdef CONFIG_GENERIC_ENTRY

Is this #ifdef really needed ?

> +#include <linux/entry-common.h>
> +#endif /* CONFIG_GENERIC_ENTRY */
> +
>   #ifdef CONFIG_VSX
>   unsigned long copy_fpr_to_user(void __user *to,
>   			       struct task_struct *task)
> @@ -368,3 +373,12 @@ void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
>   		printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm,
>   				   task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
>   }
> +
> +#ifdef CONFIG_GENERIC_ENTRY

Why is this #ifdef needed ?

> +void arch_do_signal_or_restart(struct pt_regs *regs)
> +{
> +	BUG_ON(regs != current->thread.regs);

Is this BUG_ON() needed ? Can't we use something smoother ?

> +	local_paca->generic_fw_flags |= GFW_RESTORE_ALL;
> +	do_signal(current);
> +}
> +#endif /* CONFIG_GENERIC_ENTRY */


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ