lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAMj1kXGV1xeZXF7adHwbUsg6+JpLyueWaiS89pS7XFm3fKuw6A@mail.gmail.com>
Date: Fri, 31 Oct 2025 15:16:03 +0100
From: Ard Biesheuvel <ardb@...nel.org>
To: Ard Biesheuvel <ardb+git@...gle.com>
Cc: linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org, 
	linux-crypto@...r.kernel.org, herbert@...dor.apana.org.au, 
	ebiggers@...nel.org
Subject: Re: [PATCH v4 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD
 buffers on the stack

On Fri, 31 Oct 2025 at 11:40, Ard Biesheuvel <ardb+git@...gle.com> wrote:
>
> From: Ard Biesheuvel <ardb@...nel.org>
>
> Commit aefbab8e77eb16b5
>
>   ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
>
> added a 'kernel_fpsimd_state' field to struct thread_struct, which is
> the arch-specific portion of struct task_struct, and is allocated for
> each task in the system. The size of this field is 528 bytes, resulting
> in non-negligible bloat of task_struct, and the resulting memory
> overhead may impact performance on systems with many processes.
>
> This allocation is only used if the task is scheduled out or interrupted
> by a softirq while using the FP/SIMD unit in kernel mode, and so it is
> possible to transparently allocate this buffer on the caller's stack
> instead.
>
> So tweak the 'ksimd' scoped guard implementation so that a stack buffer
> is allocated and passed to both kernel_neon_begin() and
> kernel_neon_end(), and either record it in the task struct, or use it
> directly to preserve the task mode kernel FP/SIMD when running in
> softirq context. Passing the address to both functions, and checking the
> addresses for consistency ensures that callers of the updated bare
> begin/end API use it in a manner that is consistent with the new context
> switch semantics.
>
> Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
> ---
>  arch/arm64/include/asm/fpu.h       |  4 +-
>  arch/arm64/include/asm/neon.h      |  4 +-
>  arch/arm64/include/asm/processor.h |  7 ++-
>  arch/arm64/include/asm/simd.h      |  7 ++-
>  arch/arm64/kernel/fpsimd.c         | 53 ++++++++++++++------
>  5 files changed, 54 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
> index bdc4c6304c6a..751e88a96734 100644
> --- a/arch/arm64/include/asm/fpu.h
> +++ b/arch/arm64/include/asm/fpu.h
> @@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void)
>  {
>         BUG_ON(!in_task());
>         preempt_disable();
> -       kernel_neon_begin();
> +       kernel_neon_begin(NULL);
>  }
>
>  static inline void kernel_fpu_end(void)
>  {
> -       kernel_neon_end();
> +       kernel_neon_end(NULL);
>         preempt_enable();
>  }
>
> diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
> index d4b1d172a79b..acebee4605b5 100644
> --- a/arch/arm64/include/asm/neon.h
> +++ b/arch/arm64/include/asm/neon.h
> @@ -13,7 +13,7 @@
>
>  #define cpu_has_neon()         system_supports_fpsimd()
>
> -void kernel_neon_begin(void);
> -void kernel_neon_end(void);
> +void kernel_neon_begin(struct user_fpsimd_state *);
> +void kernel_neon_end(struct user_fpsimd_state *);
>
>  #endif /* ! __ASM_NEON_H */
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 61d62bfd5a7b..de3c3b65461d 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -172,7 +172,12 @@ struct thread_struct {
>         unsigned long           fault_code;     /* ESR_EL1 value */
>         struct debug_info       debug;          /* debugging */
>
> -       struct user_fpsimd_state        kernel_fpsimd_state;
> +       /*
> +        * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
> +        * address of a caller provided buffer that will be used to preserve a
> +        * task's kernel mode FPSIMD state while it is scheduled out.
> +        */
> +       struct user_fpsimd_state        *kernel_fpsimd_state;
>         unsigned int                    kernel_fpsimd_cpu;
>  #ifdef CONFIG_ARM64_PTR_AUTH
>         struct ptrauth_keys_user        keys_user;
> diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> index d9f83c478736..7ddb25df5c98 100644
> --- a/arch/arm64/include/asm/simd.h
> +++ b/arch/arm64/include/asm/simd.h
> @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
>
>  #endif /* ! CONFIG_KERNEL_MODE_NEON */
>
> -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
> +DEFINE_LOCK_GUARD_1(ksimd,
> +                   struct user_fpsimd_state,
> +                   kernel_neon_begin(_T->lock),
> +                   kernel_neon_end(_T->lock))
>
> -#define scoped_ksimd() scoped_guard(ksimd)
> +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
>
>  #endif
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e3f8f51748bc..1c652ce4d40d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
>          * Elide the load if this CPU holds the most recent kernel mode
>          * FPSIMD context of the current task.
>          */
> -       if (last->st == &task->thread.kernel_fpsimd_state &&
> +       if (last->st == task->thread.kernel_fpsimd_state &&
>             task->thread.kernel_fpsimd_cpu == smp_processor_id())
>                 return;
>
> -       fpsimd_load_state(&task->thread.kernel_fpsimd_state);
> +       fpsimd_load_state(task->thread.kernel_fpsimd_state);
>  }
>
>  static void fpsimd_save_kernel_state(struct task_struct *task)
>  {
>         struct cpu_fp_state cpu_fp_state = {
> -               .st             = &task->thread.kernel_fpsimd_state,
> +               .st             = task->thread.kernel_fpsimd_state,
>                 .to_save        = FP_STATE_FPSIMD,
>         };
>
> -       fpsimd_save_state(&task->thread.kernel_fpsimd_state);
> +       BUG_ON(!cpu_fp_state.st);
> +
> +       fpsimd_save_state(task->thread.kernel_fpsimd_state);
>         fpsimd_bind_state_to_cpu(&cpu_fp_state);
>
>         task->thread.kernel_fpsimd_cpu = smp_processor_id();
> @@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
>  void fpsimd_flush_task_state(struct task_struct *t)
>  {
>         t->thread.fpsimd_cpu = NR_CPUS;
> +       t->thread.kernel_fpsimd_state = NULL;
>         /*
>          * If we don't support fpsimd, bail out after we have
>          * reset the fpsimd_cpu for this task and clear the
> @@ -1833,8 +1836,13 @@ void fpsimd_save_and_flush_cpu_state(void)
>   *
>   * The caller may freely use the FPSIMD registers until kernel_neon_end() is
>   * called.
> + *
> + * Unless called from non-preemptible task context, @state must point to a
> + * caller provided buffer that will be used to preserve the task's kernel mode
> + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
> + * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
>   */
> -void kernel_neon_begin(void)
> +void kernel_neon_begin(struct user_fpsimd_state *state)
>  {
>         if (WARN_ON(!system_supports_fpsimd()))
>                 return;
> @@ -1846,7 +1854,7 @@ void kernel_neon_begin(void)
>         /* Save unsaved fpsimd state, if any: */
>         if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
>                 BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
> -               fpsimd_save_kernel_state(current);
> +               fpsimd_save_state(state);
>         } else {
>                 fpsimd_save_user_state();
>
> @@ -1867,8 +1875,17 @@ void kernel_neon_begin(void)
>                  * mode in task context. So in this case, setting the flag here
>                  * is always appropriate.
>                  */
> -               if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
> +               if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
> +                       /*
> +                        * Record the caller provided buffer as the kernel mode
> +                        * FP/SIMD buffer for this task, so that the state can
> +                        * be preserved and restored on a context switch.
> +                        */
> +                       WARN_ON(current->thread.kernel_fpsimd_state != NULL);
> +                       WARN_ON(preemptible() && !state);

This is in the wrong place: we are never preemptible here, even when
called from preemptible context.

Will fix for the next rev.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ