lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d255d8a9-8e45-485e-9853-80c343bbb73b@suse.com>
Date: Fri, 31 Oct 2025 12:22:58 +0200
From: Nikolay Borisov <nik.borisov@...e.com>
To: David Kaplan <david.kaplan@....com>, Thomas Gleixner
 <tglx@...utronix.de>, Borislav Petkov <bp@...en8.de>,
 Peter Zijlstra <peterz@...radead.org>, Josh Poimboeuf <jpoimboe@...nel.org>,
 Pawan Gupta <pawan.kumar.gupta@...ux.intel.com>,
 Ingo Molnar <mingo@...hat.com>, Dave Hansen <dave.hansen@...ux.intel.com>,
 x86@...nel.org, "H . Peter Anvin" <hpa@...or.com>
Cc: Alexander Graf <graf@...zon.com>,
 Boris Ostrovsky <boris.ostrovsky@...cle.com>, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 50/56] x86/alternative: Add re-patch support



On 10/13/25 17:34, David Kaplan wrote:
> Updating alternatives is done under the biggest hammers possible.  The
> freezer is used to freeze all processes and kernel threads at safe
> points to ensure they are not in the middle of a sequence we're about to
> patch.  Then stop_machine_nmi() synchronizes all CPUs and puts them into
> a tight spin loop while re-patching occurs.  The actual patching is done
> using simple memcpy, just like during boot.
> 
> Signed-off-by: David Kaplan <david.kaplan@....com>
> ---
>   arch/x86/include/asm/alternative.h |   6 ++
>   arch/x86/kernel/alternative.c      | 131 +++++++++++++++++++++++++++++
>   2 files changed, 137 insertions(+)
> 
> diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
> index 61ce8a4b1aa6..f0b863292c3c 100644
> --- a/arch/x86/include/asm/alternative.h
> +++ b/arch/x86/include/asm/alternative.h
> @@ -19,6 +19,7 @@
>   #ifndef __ASSEMBLER__
>   
>   #include <linux/stddef.h>
> +#include <linux/static_call_types.h>
>   
>   /*
>    * Alternative inline assembly for SMP.
> @@ -89,6 +90,9 @@ extern s32 __cfi_sites[],	__cfi_sites_end[];
>   extern s32 __ibt_endbr_seal[],	__ibt_endbr_seal_end[];
>   extern s32 __smp_locks[],	__smp_locks_end[];
>   
> +extern struct static_call_site __start_static_call_sites[],
> +			       __stop_static_call_sites[];
> +
>   /*
>    * Debug flag that can be tested to see whether alternative
>    * instructions were patched in already:
> @@ -98,6 +102,8 @@ extern int alternatives_patched;
>   struct module;
>   
>   #ifdef CONFIG_DYNAMIC_MITIGATIONS
> +extern void cpu_update_alternatives(void);
> +extern void cpu_prepare_repatch_alternatives(void);
>   extern void reset_retpolines(s32 *start, s32 *end, struct module *mod);
>   extern void reset_returns(s32 *start, s32 *end, struct module *mod);
>   extern void reset_alternatives(struct alt_instr *start, struct alt_instr *end,
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 23bb3386ec5e..613cb645bd9f 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -6,12 +6,15 @@
>   #include <linux/vmalloc.h>
>   #include <linux/memory.h>
>   #include <linux/execmem.h>
> +#include <linux/stop_machine.h>
> +#include <linux/freezer.h>
>   
>   #include <asm/text-patching.h>
>   #include <asm/insn.h>
>   #include <asm/ibt.h>
>   #include <asm/set_memory.h>
>   #include <asm/nmi.h>
> +#include <asm/bugs.h>
>   
>   int __read_mostly alternatives_patched;
>   
> @@ -3468,4 +3471,132 @@ void its_free_all(struct module *mod)
>   	its_page = NULL;
>   }
>   #endif
> +static atomic_t thread_ack;
> +
> +/*
> + * This function is called by ALL online CPUs but only CPU0 will do the
> + * re-patching.  It is important that all other cores spin in the tight loop
> + * below (and not in multi_cpu_stop) because they cannot safely do return
> + * instructions while returns are being patched.  Therefore, spin them here
> + * (with interrupts disabled) until CPU0 has finished its work.
> + */
> +static int __cpu_update_alternatives(void *__unused)
> +{
> +	if (smp_processor_id()) {
> +		atomic_dec(&thread_ack);
> +		while (!READ_ONCE(alternatives_patched))
> +			cpu_relax();
> +
> +		cpu_bugs_update_speculation_msrs();
> +	} else {
> +		repatch_in_progress = true;
> +
> +		/* Wait for all cores to enter this function. */
> +		while (atomic_read(&thread_ack))
> +			cpu_relax();
> +
> +		/* These must be un-done in the opposite order in which they were applied. */
> +		reset_alternatives(__alt_instructions, __alt_instructions_end, NULL);
> +		reset_builtin_callthunks();
> +		reset_returns(__return_sites, __return_sites_end, NULL);
> +		reset_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
> +
> +		apply_retpolines(__retpoline_sites, __retpoline_sites_end, NULL);
> +		apply_returns(__return_sites, __return_sites_end, NULL);

This triggers the following splat:

[  363.467469] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575
[  363.467472] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 18, name: migration/0
[  363.467472] preempt_count: 110001, expected: 0
[  363.467473] RCU nest depth: 0, expected: 0
[  363.467474] no locks held by migration/0/18.
[  363.467474] irq event stamp: 1280
[  363.467475] hardirqs last  enabled at (1279): [<ffffffff91fd1444>] _raw_spin_unlock_irq+0x24/0x50
[  363.467479] hardirqs last disabled at (1280): [<ffffffff913c98f9>] multi_cpu_stop+0x119/0x170
[  363.467482] softirqs last  enabled at (0): [<ffffffff9129eaab>] copy_process+0x7fb/0x1990
[  363.467484] softirqs last disabled at (0): [<0000000000000000>] 0x0
[  363.467485] Preemption disabled at:
[  363.467486] [<ffffffff913c8e63>] cpu_stopper_thread+0x93/0x150
[  363.467488] CPU: 0 UID: 0 PID: 18 Comm: migration/0 Not tainted 6.18.0-rc1-default+ #9 PREEMPT(none)
[  363.467490] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  363.467491] Stopper: multi_cpu_stop+0x0/0x170 <- __stop_cpus.constprop.0+0x77/0xb0
[  363.467493] Call Trace:
[  363.467494]  <NMI>
[  363.467496]  dump_stack_lvl+0x62/0x90
[  363.467498]  __might_resched+0x19f/0x2b0
[  363.467501]  ? its_return_thunk+0x10/0x10
[  363.467503]  __mutex_lock+0x67/0x1060
[  363.467504]  ? look_up_lock_class+0x59/0x130
[  363.467506]  ? look_up_lock_class+0x59/0x130
[  363.467508]  ? __static_call_fixup+0x4f/0xa0
[  363.467510]  ? insn_get_prefixes+0x1a4/0x3f0
[  363.467512]  ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[  363.467513]  ? its_return_thunk+0x10/0x10
[  363.467514]  ? its_return_thunk+0x10/0x10
[  363.467516]  ? __static_call_fixup+0x4f/0xa0
[  363.467517]  __static_call_fixup+0x4f/0xa0
[  363.467518]  ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[  363.467519]  apply_returns+0x13e/0x370
[  363.467523]  ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[  363.467524]  ? __SCT__x86_pmu_disable_all+0x7/0x8
[  363.467525]  ? __SCT__x86_pmu_handle_irq+0x5/0x8
[  363.467527]  ? __copy_user_flushcache+0xf3/0x100
[  363.467528]  ? its_return_thunk+0x10/0x10
[  363.467529]  __cpu_update_alternatives+0x1e3/0x240
[  363.467531]  ? x2apic_send_IPI+0x40/0x60
[  363.467533]  stop_machine_nmi_handler+0x29/0x40
[  363.467534]  default_do_nmi+0x137/0x1a0
[  363.467536]  exc_nmi+0xef/0x120
[  363.467538]  end_repeat_nmi+0xf/0x53

[  363.467578] ================================
[  363.467578] WARNING: inconsistent lock state
[  363.467578] 6.18.0-rc1-default+ #9 Tainted: G        W
[  363.467579] --------------------------------
[  363.467579] inconsistent {INITIAL USE} -> {IN-NMI} usage.
[  363.467580] migration/0/18 [HC1[1]:SC0[0]:HE0:SE1] takes:
[  363.467581] ffffffff92668c28 (text_mutex){+.+.}-{4:4}, at: __static_call_fixup+0x4f/0xa0
[  363.467583] {INITIAL USE} state was registered at:
[  363.467584] irq event stamp: 1280
[  363.467584] hardirqs last  enabled at (1279): [<ffffffff91fd1444>] _raw_spin_unlock_irq+0x24/0x50
[  363.467586] hardirqs last disabled at (1280): [<ffffffff913c98f9>] multi_cpu_stop+0x119/0x170
[  363.467587] softirqs last  enabled at (0): [<ffffffff9129eaab>] copy_process+0x7fb/0x1990
[  363.467588] softirqs last disabled at (0): [<0000000000000000>] 0x0
[  363.467589]
                other info that might help us debug this:
[  363.467590]  Possible unsafe locking scenario:

[  363.467590]        CPU0
[  363.467590]        ----
[  363.467590]   lock(text_mutex);
[  363.467591]   <Interrupt>
[  363.467591]     lock(text_mutex);
[  363.467592]
                 *** DEADLOCK ***

[  363.467592] no locks held by migration/0/18.
[  363.467592]
                stack backtrace:
[  363.467593] CPU: 0 UID: 0 PID: 18 Comm: migration/0 Tainted: G        W           6.18.0-rc1-default+ #9 PREEMPT(none)
[  363.467594] Tainted: [W]=WARN
[  363.467595] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  363.467595] Stopper: multi_cpu_stop+0x0/0x170 <- __stop_cpus.constprop.0+0x77/0xb0
[  363.467597] Call Trace:
[  363.467598]  <NMI>
[  363.467598]  dump_stack_lvl+0x62/0x90
[  363.467600]  print_usage_bug.part.0+0x22c/0x2c0
[  363.467602]  lock_acquire+0x208/0x2d0
[  363.467604]  ? __static_call_fixup+0x4f/0xa0
[  363.467605]  ? its_return_thunk+0x10/0x10
[  363.467607]  __mutex_lock+0xb3/0x1060
[  363.467607]  ? __static_call_fixup+0x4f/0xa0
[  363.467608]  ? look_up_lock_class+0x59/0x130
[  363.467610]  ? look_up_lock_class+0x59/0x130
[  363.467611]  ? __static_call_fixup+0x4f/0xa0
[  363.467613]  ? insn_get_prefixes+0x1a4/0x3f0
[  363.467614]  ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[  363.467615]  ? its_return_thunk+0x10/0x10
[  363.467617]  ? its_return_thunk+0x10/0x10
[  363.467618]  ? __static_call_fixup+0x4f/0xa0
[  363.467619]  __static_call_fixup+0x4f/0xa0
[  363.467619]  ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[  363.467621]  apply_returns+0x13e/0x370
[  363.467624]  ? __SCT__tp_func_emulate_vsyscall+0x8/0x8
[  363.467625]  ? __SCT__x86_pmu_disable_all+0x7/0x8
[  363.467626]  ? __SCT__x86_pmu_handle_irq+0x5/0x8
[  363.467627]  ? __copy_user_flushcache+0xf3/0x100
[  363.467628]  ? its_return_thunk+0x10/0x10
[  363.467630]  __cpu_update_alternatives+0x1e3/0x240
[  363.467631]  ? x2apic_send_IPI+0x40/0x60
[  363.467633]  stop_machine_nmi_handler+0x29/0x40
[  363.467634]  default_do_nmi+0x137/0x1a0
[  363.467635]  exc_nmi+0xef/0x120
[  363.467637]  end_repeat_nmi+0xf/0x53


The reason being apply_returns->__static_call_fixup acquires text_mutex from NMI context.


<snip>



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ