[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260102224432.172b1247@pumpkin>
Date: Fri, 2 Jan 2026 22:44:32 +0000
From: David Laight <david.laight.linux@...il.com>
To: Ryan Roberts <ryan.roberts@....com>
Cc: Catalin Marinas <catalin.marinas@....com>, Will Deacon
<will@...nel.org>, Huacai Chen <chenhuacai@...nel.org>, Madhavan Srinivasan
<maddy@...ux.ibm.com>, Michael Ellerman <mpe@...erman.id.au>, Paul Walmsley
<pjw@...nel.org>, Palmer Dabbelt <palmer@...belt.com>, Albert Ou
<aou@...s.berkeley.edu>, Heiko Carstens <hca@...ux.ibm.com>, Vasily Gorbik
<gor@...ux.ibm.com>, Alexander Gordeev <agordeev@...ux.ibm.com>, Thomas
Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>, Borislav
Petkov <bp@...en8.de>, Dave Hansen <dave.hansen@...ux.intel.com>, Kees Cook
<kees@...nel.org>, "Gustavo A. R. Silva" <gustavoars@...nel.org>, Arnd
Bergmann <arnd@...db.de>, Mark Rutland <mark.rutland@....com>, "Jason A.
Donenfeld" <Jason@...c4.com>, Ard Biesheuvel <ardb@...nel.org>, Jeremy
Linton <jeremy.linton@....com>, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, loongarch@...ts.linux.dev,
linuxppc-dev@...ts.ozlabs.org, linux-riscv@...ts.infradead.org,
linux-s390@...r.kernel.org, linux-hardening@...r.kernel.org,
stable@...r.kernel.org
Subject: Re: [PATCH v3 1/3] randomize_kstack: Maintain kstack_offset per
task
On Fri, 2 Jan 2026 13:11:52 +0000
Ryan Roberts <ryan.roberts@....com> wrote:
> kstack_offset was previously maintained per-cpu, but this caused a
> couple of issues. So let's instead make it per-task.
>
> Issue 1: add_random_kstack_offset() and choose_random_kstack_offset()
> expected and required to be called with interrupts and preemption
> disabled so that it could manipulate per-cpu state. But arm64, loongarch
> and risc-v are calling them with interrupts and preemption enabled. I
> don't _think_ this causes any functional issues, but it's certainly
> unexpected and could lead to manipulating the wrong cpu's state, which
> could cause a minor performance degradation due to bouncing the cache
> lines. By maintaining the state per-task those functions can safely be
> called in preemptible context.
>
> Issue 2: add_random_kstack_offset() is called before executing the
> syscall and expands the stack using a previously chosen rnadom offset.
<>
David
> choose_random_kstack_offset() is called after executing the syscall and
> chooses and stores a new random offset for the next syscall. With
> per-cpu storage for this offset, an attacker could force cpu migration
> during the execution of the syscall and prevent the offset from being
> updated for the original cpu such that it is predictable for the next
> syscall on that cpu. By maintaining the state per-task, this problem
> goes away because the per-task random offset is updated after the
> syscall regardless of which cpu it is executing on.
>
> Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall")
> Closes: https://lore.kernel.org/all/dd8c37bc-795f-4c7a-9086-69e584d8ab24@arm.com/
> Cc: stable@...r.kernel.org
> Signed-off-by: Ryan Roberts <ryan.roberts@....com>
> ---
> include/linux/randomize_kstack.h | 26 +++++++++++++++-----------
> include/linux/sched.h | 4 ++++
> init/main.c | 1 -
> kernel/fork.c | 2 ++
> 4 files changed, 21 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h
> index 1d982dbdd0d0..5d3916ca747c 100644
> --- a/include/linux/randomize_kstack.h
> +++ b/include/linux/randomize_kstack.h
> @@ -9,7 +9,6 @@
>
> DECLARE_STATIC_KEY_MAYBE(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
> randomize_kstack_offset);
> -DECLARE_PER_CPU(u32, kstack_offset);
>
> /*
> * Do not use this anywhere else in the kernel. This is used here because
> @@ -50,15 +49,14 @@ DECLARE_PER_CPU(u32, kstack_offset);
> * add_random_kstack_offset - Increase stack utilization by previously
> * chosen random offset
> *
> - * This should be used in the syscall entry path when interrupts and
> - * preempt are disabled, and after user registers have been stored to
> - * the stack. For testing the resulting entropy, please see:
> - * tools/testing/selftests/lkdtm/stack-entropy.sh
> + * This should be used in the syscall entry path after user registers have been
> + * stored to the stack. Preemption may be enabled. For testing the resulting
> + * entropy, please see: tools/testing/selftests/lkdtm/stack-entropy.sh
> */
> #define add_random_kstack_offset() do { \
> if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
> &randomize_kstack_offset)) { \
> - u32 offset = raw_cpu_read(kstack_offset); \
> + u32 offset = current->kstack_offset; \
> u8 *ptr = __kstack_alloca(KSTACK_OFFSET_MAX(offset)); \
> /* Keep allocation even after "ptr" loses scope. */ \
> asm volatile("" :: "r"(ptr) : "memory"); \
> @@ -69,9 +67,9 @@ DECLARE_PER_CPU(u32, kstack_offset);
> * choose_random_kstack_offset - Choose the random offset for the next
> * add_random_kstack_offset()
> *
> - * This should only be used during syscall exit when interrupts and
> - * preempt are disabled. This position in the syscall flow is done to
> - * frustrate attacks from userspace attempting to learn the next offset:
> + * This should only be used during syscall exit. Preemption may be enabled. This
> + * position in the syscall flow is done to frustrate attacks from userspace
> + * attempting to learn the next offset:
> * - Maximize the timing uncertainty visible from userspace: if the
> * offset is chosen at syscall entry, userspace has much more control
> * over the timing between choosing offsets. "How long will we be in
> @@ -85,14 +83,20 @@ DECLARE_PER_CPU(u32, kstack_offset);
> #define choose_random_kstack_offset(rand) do { \
> if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
> &randomize_kstack_offset)) { \
> - u32 offset = raw_cpu_read(kstack_offset); \
> + u32 offset = current->kstack_offset; \
> offset = ror32(offset, 5) ^ (rand); \
> - raw_cpu_write(kstack_offset, offset); \
> + current->kstack_offset = offset; \
> } \
> } while (0)
> +
> +static inline void random_kstack_task_init(struct task_struct *tsk)
> +{
> + tsk->kstack_offset = 0;
> +}
> #else /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
> #define add_random_kstack_offset() do { } while (0)
> #define choose_random_kstack_offset(rand) do { } while (0)
> +#define random_kstack_task_init(tsk) do { } while (0)
> #endif /* CONFIG_RANDOMIZE_KSTACK_OFFSET */
>
> #endif
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d395f2810fac..9e0080ed1484 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1591,6 +1591,10 @@ struct task_struct {
> unsigned long prev_lowest_stack;
> #endif
>
> +#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
> + u32 kstack_offset;
> +#endif
> +
> #ifdef CONFIG_X86_MCE
> void __user *mce_vaddr;
> __u64 mce_kflags;
> diff --git a/init/main.c b/init/main.c
> index b84818ad9685..27fcbbde933e 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -830,7 +830,6 @@ static inline void initcall_debug_enable(void)
> #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
> DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
> randomize_kstack_offset);
> -DEFINE_PER_CPU(u32, kstack_offset);
>
> static int __init early_randomize_kstack_offset(char *buf)
> {
> diff --git a/kernel/fork.c b/kernel/fork.c
> index b1f3915d5f8e..b061e1edbc43 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -95,6 +95,7 @@
> #include <linux/thread_info.h>
> #include <linux/kstack_erase.h>
> #include <linux/kasan.h>
> +#include <linux/randomize_kstack.h>
> #include <linux/scs.h>
> #include <linux/io_uring.h>
> #include <linux/bpf.h>
> @@ -2231,6 +2232,7 @@ __latent_entropy struct task_struct *copy_process(
> if (retval)
> goto bad_fork_cleanup_io;
>
> + random_kstack_task_init(p);
> stackleak_task_init(p);
>
> if (pid != &init_struct_pid) {
Powered by blists - more mailing lists