lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <91d280af-fb41-4c05-8b96-113717ecd64a@kernel.org>
Date: Tue, 16 Dec 2025 10:58:16 +0100
From: "Christophe Leroy (CS GROUP)" <chleroy@...nel.org>
To: Mukesh Kumar Chaurasiya <mkchauras@...ux.ibm.com>, maddy@...ux.ibm.com,
 mpe@...erman.id.au, npiggin@...il.com, oleg@...hat.com, kees@...nel.org,
 luto@...capital.net, wad@...omium.org, mchauras@...ux.ibm.com,
 thuth@...hat.com, sshegde@...ux.ibm.com, charlie@...osinc.com,
 macro@...am.me.uk, akpm@...ux-foundation.org, ldv@...ace.io, deller@....de,
 ankur.a.arora@...cle.com, segher@...nel.crashing.org, tglx@...utronix.de,
 thomas.weissschuh@...utronix.de, peterz@...radead.org,
 menglong8.dong@...il.com, bigeasy@...utronix.de, namcao@...utronix.de,
 kan.liang@...ux.intel.com, mingo@...nel.org, atrajeev@...ux.vnet.ibm.com,
 mark.barnett@....com, linuxppc-dev@...ts.ozlabs.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 6/8] powerpc: Prepare for IRQ entry exit



Le 14/12/2025 à 14:02, Mukesh Kumar Chaurasiya a écrit :
> From: Mukesh Kumar Chaurasiya <mchauras@...ux.ibm.com>
> 
> Move interrupt entry and exit helper routines from interrupt.h into the
> PowerPC-specific entry-common.h header as a preparatory step for enabling
> the generic entry/exit framework.
> 
> This consolidation places all PowerPC interrupt entry/exit handling in a
> single common header, aligning with the generic entry infrastructure.
> The helpers provide architecture-specific handling for interrupt and NMI
> entry/exit sequences, including:
> 
>   - arch_interrupt_enter/exit_prepare()
>   - arch_interrupt_async_enter/exit_prepare()
>   - arch_interrupt_nmi_enter/exit_prepare()
>   - Supporting helpers such as nap_adjust_return(), check_return_regs_valid(),
>     debug register maintenance, and soft mask handling.
> 
> The functions are copied verbatim from interrupt.h to avoid functional
> changes at this stage. Subsequent patches will integrate these routines
> into the generic entry/exit flow.

Can we move them instead of duplicating them ?

> 
> No functional change intended.
> 
> Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@...ux.ibm.com>
> ---
>   arch/powerpc/include/asm/entry-common.h | 422 ++++++++++++++++++++++++
>   1 file changed, 422 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/entry-common.h b/arch/powerpc/include/asm/entry-common.h
> index e8ebd42a4e6d..e8bde4c67eaf 100644
> --- a/arch/powerpc/include/asm/entry-common.h
> +++ b/arch/powerpc/include/asm/entry-common.h
> @@ -7,10 +7,432 @@
>   
>   #include <asm/cputime.h>
>   #include <asm/interrupt.h>
> +#include <asm/runlatch.h>
>   #include <asm/stacktrace.h>
>   #include <asm/switch_to.h>
>   #include <asm/tm.h>
>   
> +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
> +/*
> + * WARN/BUG is handled with a program interrupt so minimise checks here to
> + * avoid recursion and maximise the chance of getting the first oops handled.
> + */
> +#define INT_SOFT_MASK_BUG_ON(regs, cond)				\
> +do {									\
> +	if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM)))	\
> +		BUG_ON(cond);						\
> +} while (0)
> +#else
> +#define INT_SOFT_MASK_BUG_ON(regs, cond)
> +#endif
> +
> +#ifdef CONFIG_PPC_BOOK3S_64
> +extern char __end_soft_masked[];
> +bool search_kernel_soft_mask_table(unsigned long addr);
> +unsigned long search_kernel_restart_table(unsigned long addr);
> +
> +DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
> +
> +static inline bool is_implicit_soft_masked(struct pt_regs *regs)
> +{
> +	if (user_mode(regs))
> +		return false;
> +
> +	if (regs->nip >= (unsigned long)__end_soft_masked)
> +		return false;
> +
> +	return search_kernel_soft_mask_table(regs->nip);
> +}
> +
> +static inline void srr_regs_clobbered(void)
> +{
> +	local_paca->srr_valid = 0;
> +	local_paca->hsrr_valid = 0;
> +}
> +#else
> +static inline unsigned long search_kernel_restart_table(unsigned long addr)
> +{
> +	return 0;
> +}
> +
> +static inline bool is_implicit_soft_masked(struct pt_regs *regs)
> +{
> +	return false;
> +}
> +
> +static inline void srr_regs_clobbered(void)
> +{
> +}
> +#endif
> +
> +static inline void nap_adjust_return(struct pt_regs *regs)
> +{
> +#ifdef CONFIG_PPC_970_NAP
> +	if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
> +		/* Can avoid a test-and-clear because NMIs do not call this */
> +		clear_thread_local_flags(_TLF_NAPPING);
> +		regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
> +	}
> +#endif
> +}
> +
> +static inline void booke_load_dbcr0(void)
> +{
> +#ifdef CONFIG_PPC_ADV_DEBUG_REGS
> +	unsigned long dbcr0 = current->thread.debug.dbcr0;
> +
> +	if (likely(!(dbcr0 & DBCR0_IDM)))
> +		return;
> +
> +	/*
> +	 * Check to see if the dbcr0 register is set up to debug.
> +	 * Use the internal debug mode bit to do this.
> +	 */
> +	mtmsr(mfmsr() & ~MSR_DE);
> +	if (IS_ENABLED(CONFIG_PPC32)) {
> +		isync();
> +		global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
> +	}
> +	mtspr(SPRN_DBCR0, dbcr0);
> +	mtspr(SPRN_DBSR, -1);
> +#endif
> +}
> +
> +static inline void booke_restore_dbcr0(void)
> +{
> +#ifdef CONFIG_PPC_ADV_DEBUG_REGS
> +	unsigned long dbcr0 = current->thread.debug.dbcr0;
> +
> +	if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) {
> +		mtspr(SPRN_DBSR, -1);
> +		mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
> +	}
> +#endif
> +}
> +
> +static inline void check_return_regs_valid(struct pt_regs *regs)
> +{
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	unsigned long trap, srr0, srr1;
> +	static bool warned;
> +	u8 *validp;
> +	char *h;
> +
> +	if (trap_is_scv(regs))
> +		return;
> +
> +	trap = TRAP(regs);
> +	// EE in HV mode sets HSRRs like 0xea0
> +	if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
> +		trap = 0xea0;
> +
> +	switch (trap) {
> +	case 0x980:
> +	case INTERRUPT_H_DATA_STORAGE:
> +	case 0xe20:
> +	case 0xe40:
> +	case INTERRUPT_HMI:
> +	case 0xe80:
> +	case 0xea0:
> +	case INTERRUPT_H_FAC_UNAVAIL:
> +	case 0x1200:
> +	case 0x1500:
> +	case 0x1600:
> +	case 0x1800:
> +		validp = &local_paca->hsrr_valid;
> +		if (!READ_ONCE(*validp))
> +			return;
> +
> +		srr0 = mfspr(SPRN_HSRR0);
> +		srr1 = mfspr(SPRN_HSRR1);
> +		h = "H";
> +
> +		break;
> +	default:
> +		validp = &local_paca->srr_valid;
> +		if (!READ_ONCE(*validp))
> +			return;
> +
> +		srr0 = mfspr(SPRN_SRR0);
> +		srr1 = mfspr(SPRN_SRR1);
> +		h = "";
> +		break;
> +	}
> +
> +	if (srr0 == regs->nip && srr1 == regs->msr)
> +		return;
> +
> +	/*
> +	 * A NMI / soft-NMI interrupt may have come in after we found
> +	 * srr_valid and before the SRRs are loaded. The interrupt then
> +	 * comes in and clobbers SRRs and clears srr_valid. Then we load
> +	 * the SRRs here and test them above and find they don't match.
> +	 *
> +	 * Test validity again after that, to catch such false positives.
> +	 *
> +	 * This test in general will have some window for false negatives
> +	 * and may not catch and fix all such cases if an NMI comes in
> +	 * later and clobbers SRRs without clearing srr_valid, but hopefully
> +	 * such things will get caught most of the time, statistically
> +	 * enough to be able to get a warning out.
> +	 */
> +	if (!READ_ONCE(*validp))
> +		return;
> +
> +	if (!data_race(warned)) {
> +		data_race(warned = true);
> +		pr_warn("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
> +		pr_warn("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
> +		show_regs(regs);
> +	}
> +
> +	WRITE_ONCE(*validp, 0); /* fixup */
> +#endif
> +}
> +
> +static inline void arch_interrupt_enter_prepare(struct pt_regs *regs)
> +{
> +#ifdef CONFIG_PPC64
> +	irq_soft_mask_set(IRQS_ALL_DISABLED);
> +
> +	/*
> +	 * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
> +	 * Asynchronous interrupts get here with HARD_DIS set (see below), so
> +	 * this enables MSR[EE] for synchronous interrupts. IRQs remain
> +	 * soft-masked. The interrupt handler may later call
> +	 * interrupt_cond_local_irq_enable() to achieve a regular process
> +	 * context.
> +	 */
> +	if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
> +		INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE));
> +		__hard_irq_enable();
> +	} else {
> +		__hard_RI_enable();
> +	}
> +	/* Enable MSR[RI] early, to support kernel SLB and hash faults */
> +#endif
> +
> +	if (!regs_irqs_disabled(regs))
> +		trace_hardirqs_off();
> +
> +	if (user_mode(regs)) {
> +		kuap_lock();
> +		CT_WARN_ON(ct_state() != CT_STATE_USER);
> +		user_exit_irqoff();
> +
> +		account_cpu_user_entry();
> +		account_stolen_time();
> +	} else {
> +		kuap_save_and_lock(regs);
> +		/*
> +		 * CT_WARN_ON comes here via program_check_exception,
> +		 * so avoid recursion.
> +		 */
> +		if (TRAP(regs) != INTERRUPT_PROGRAM)
> +			CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
> +				   ct_state() != CT_STATE_IDLE);
> +		INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
> +		INT_SOFT_MASK_BUG_ON(regs, regs_irqs_disabled(regs) &&
> +				     search_kernel_restart_table(regs->nip));
> +	}
> +	INT_SOFT_MASK_BUG_ON(regs, !regs_irqs_disabled(regs) &&
> +			     !(regs->msr & MSR_EE));
> +
> +	booke_restore_dbcr0();
> +}
> +
> +/*
> + * Care should be taken to note that arch_interrupt_exit_prepare and
> + * arch_interrupt_async_exit_prepare do not necessarily return immediately to
> + * regs context (e.g., if regs is usermode, we don't necessarily return to
> + * user mode). Other interrupts might be taken between here and return,
> + * context switch / preemption may occur in the exit path after this, or a
> + * signal may be delivered, etc.
> + *
> + * The real interrupt exit code is platform specific, e.g.,
> + * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s.
> + *
> + * However arch_interrupt_nmi_exit_prepare does return directly to regs, because
> + * NMIs do not do "exit work" or replay soft-masked interrupts.
> + */
> +static inline void arch_interrupt_exit_prepare(struct pt_regs *regs)
> +{
> +}
> +
> +static inline void arch_interrupt_async_enter_prepare(struct pt_regs *regs)
> +{
> +#ifdef CONFIG_PPC64
> +	/* Ensure arch_interrupt_enter_prepare does not enable MSR[EE] */
> +	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
> +#endif
> +	arch_interrupt_enter_prepare(regs);
> +#ifdef CONFIG_PPC_BOOK3S_64
> +	/*
> +	 * RI=1 is set by arch_interrupt_enter_prepare, so this thread flags access
> +	 * has to come afterward (it can cause SLB faults).
> +	 */
> +	if (cpu_has_feature(CPU_FTR_CTRL) &&
> +	    !test_thread_local_flags(_TLF_RUNLATCH))
> +		__ppc64_runlatch_on();
> +#endif
> +	irq_enter();
> +}
> +
> +static inline void arch_interrupt_async_exit_prepare(struct pt_regs *regs)
> +{
> +	/*
> +	 * Adjust at exit so the main handler sees the true NIA. This must
> +	 * come before irq_exit() because irq_exit can enable interrupts, and
> +	 * if another interrupt is taken before nap_adjust_return has run
> +	 * here, then that interrupt would return directly to idle nap return.
> +	 */
> +	nap_adjust_return(regs);
> +
> +	irq_exit();
> +	arch_interrupt_exit_prepare(regs);
> +}
> +
> +struct interrupt_nmi_state {
> +#ifdef CONFIG_PPC64
> +	u8 irq_soft_mask;
> +	u8 irq_happened;
> +	u8 ftrace_enabled;
> +	u64 softe;
> +#endif
> +};
> +
> +static inline bool nmi_disables_ftrace(struct pt_regs *regs)
> +{
> +	/* Allow DEC and PMI to be traced when they are soft-NMI */
> +	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
> +		if (TRAP(regs) == INTERRUPT_DECREMENTER)
> +			return false;
> +		if (TRAP(regs) == INTERRUPT_PERFMON)
> +			return false;
> +	}
> +	if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
> +		if (TRAP(regs) == INTERRUPT_PERFMON)
> +			return false;
> +	}
> +
> +	return true;
> +}
> +
> +static inline void arch_interrupt_nmi_enter_prepare(struct pt_regs *regs,
> +					       struct interrupt_nmi_state *state)

CHECK: Alignment should match open parenthesis
#354: FILE: arch/powerpc/include/asm/entry-common.h:322:
+static inline void arch_interrupt_nmi_enter_prepare(struct pt_regs *regs,
+					       struct interrupt_nmi_state *state)


> +{
> +#ifdef CONFIG_PPC64
> +	state->irq_soft_mask = local_paca->irq_soft_mask;
> +	state->irq_happened = local_paca->irq_happened;
> +	state->softe = regs->softe;
> +
> +	/*
> +	 * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
> +	 * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
> +	 * because that goes through irq tracing which we don't want in NMI.
> +	 */
> +	local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
> +	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
> +
> +	if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
> +		/*
> +		 * Adjust regs->softe to be soft-masked if it had not been
> +		 * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
> +		 * not yet set disabled), or if it was in an implicit soft
> +		 * masked state. This makes regs_irqs_disabled(regs)
> +		 * behave as expected.
> +		 */
> +		regs->softe = IRQS_ALL_DISABLED;
> +	}
> +
> +	__hard_RI_enable();
> +
> +	/* Don't do any per-CPU operations until interrupt state is fixed */
> +
> +	if (nmi_disables_ftrace(regs)) {
> +		state->ftrace_enabled = this_cpu_get_ftrace_enabled();
> +		this_cpu_set_ftrace_enabled(0);
> +	}
> +#endif
> +
> +	/* If data relocations are enabled, it's safe to use nmi_enter() */
> +	if (mfmsr() & MSR_DR) {
> +		nmi_enter();
> +		return;
> +	}
> +
> +	/*
> +	 * But do not use nmi_enter() for pseries hash guest taking a real-mode
> +	 * NMI because not everything it touches is within the RMA limit.
> +	 */
> +	if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
> +	    firmware_has_feature(FW_FEATURE_LPAR) &&
> +	    !radix_enabled())
> +		return;
> +
> +	/*
> +	 * Likewise, don't use it if we have some form of instrumentation (like
> +	 * KASAN shadow) that is not safe to access in real mode (even on radix)
> +	 */
> +	if (IS_ENABLED(CONFIG_KASAN))
> +		return;
> +
> +	/*
> +	 * Likewise, do not use it in real mode if percpu first chunk is not
> +	 * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there
> +	 * are chances where percpu allocation can come from vmalloc area.
> +	 */
> +	if (percpu_first_chunk_is_paged)
> +		return;
> +
> +	/* Otherwise, it should be safe to call it */
> +	nmi_enter();
> +}
> +
> +static inline void arch_interrupt_nmi_exit_prepare(struct pt_regs *regs,
> +					      struct interrupt_nmi_state *state)
> +{

CHECK: Alignment should match open parenthesis
#425: FILE: arch/powerpc/include/asm/entry-common.h:393:
+static inline void arch_interrupt_nmi_exit_prepare(struct pt_regs *regs,
+					      struct interrupt_nmi_state *state)

> +	if (mfmsr() & MSR_DR) {
> +		// nmi_exit if relocations are on
> +		nmi_exit();
> +	} else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
> +		   firmware_has_feature(FW_FEATURE_LPAR) &&
> +		   !radix_enabled()) {
> +		// no nmi_exit for a pseries hash guest taking a real mode exception
> +	} else if (IS_ENABLED(CONFIG_KASAN)) {
> +		// no nmi_exit for KASAN in real mode
> +	} else if (percpu_first_chunk_is_paged) {
> +		// no nmi_exit if percpu first chunk is not embedded
> +	} else {
> +		nmi_exit();
> +	}
> +
> +	/*
> +	 * nmi does not call nap_adjust_return because nmi should not create
> +	 * new work to do (must use irq_work for that).
> +	 */
> +
> +#ifdef CONFIG_PPC64
> +#ifdef CONFIG_PPC_BOOK3S
> +	if (regs_irqs_disabled(regs)) {
> +		unsigned long rst = search_kernel_restart_table(regs->nip);
> +
> +		if (rst)
> +			regs_set_return_ip(regs, rst);
> +	}
> +#endif
> +
> +	if (nmi_disables_ftrace(regs))
> +		this_cpu_set_ftrace_enabled(state->ftrace_enabled);
> +
> +	/* Check we didn't change the pending interrupt mask. */
> +	WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
> +	regs->softe = state->softe;
> +	local_paca->irq_happened = state->irq_happened;
> +	local_paca->irq_soft_mask = state->irq_soft_mask;
> +#endif
> +}
> +
>   static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
>   {
>   	if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ