[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAMj1kXGV1xeZXF7adHwbUsg6+JpLyueWaiS89pS7XFm3fKuw6A@mail.gmail.com>
Date: Fri, 31 Oct 2025 15:16:03 +0100
From: Ard Biesheuvel <ardb@...nel.org>
To: Ard Biesheuvel <ardb+git@...gle.com>
Cc: linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
linux-crypto@...r.kernel.org, herbert@...dor.apana.org.au,
ebiggers@...nel.org
Subject: Re: [PATCH v4 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD
buffers on the stack
On Fri, 31 Oct 2025 at 11:40, Ard Biesheuvel <ardb+git@...gle.com> wrote:
>
> From: Ard Biesheuvel <ardb@...nel.org>
>
> Commit aefbab8e77eb16b5
>
> ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
>
> added a 'kernel_fpsimd_state' field to struct thread_struct, which is
> the arch-specific portion of struct task_struct, and is allocated for
> each task in the system. The size of this field is 528 bytes, resulting
> in non-negligible bloat of task_struct, and the resulting memory
> overhead may impact performance on systems with many processes.
>
> This allocation is only used if the task is scheduled out or interrupted
> by a softirq while using the FP/SIMD unit in kernel mode, and so it is
> possible to transparently allocate this buffer on the caller's stack
> instead.
>
> So tweak the 'ksimd' scoped guard implementation so that a stack buffer
> is allocated and passed to both kernel_neon_begin() and
> kernel_neon_end(), and either record it in the task struct, or use it
> directly to preserve the task mode kernel FP/SIMD when running in
> softirq context. Passing the address to both functions, and checking the
> addresses for consistency ensures that callers of the updated bare
> begin/end API use it in a manner that is consistent with the new context
> switch semantics.
>
> Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
> ---
> arch/arm64/include/asm/fpu.h | 4 +-
> arch/arm64/include/asm/neon.h | 4 +-
> arch/arm64/include/asm/processor.h | 7 ++-
> arch/arm64/include/asm/simd.h | 7 ++-
> arch/arm64/kernel/fpsimd.c | 53 ++++++++++++++------
> 5 files changed, 54 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
> index bdc4c6304c6a..751e88a96734 100644
> --- a/arch/arm64/include/asm/fpu.h
> +++ b/arch/arm64/include/asm/fpu.h
> @@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void)
> {
> BUG_ON(!in_task());
> preempt_disable();
> - kernel_neon_begin();
> + kernel_neon_begin(NULL);
> }
>
> static inline void kernel_fpu_end(void)
> {
> - kernel_neon_end();
> + kernel_neon_end(NULL);
> preempt_enable();
> }
>
> diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
> index d4b1d172a79b..acebee4605b5 100644
> --- a/arch/arm64/include/asm/neon.h
> +++ b/arch/arm64/include/asm/neon.h
> @@ -13,7 +13,7 @@
>
> #define cpu_has_neon() system_supports_fpsimd()
>
> -void kernel_neon_begin(void);
> -void kernel_neon_end(void);
> +void kernel_neon_begin(struct user_fpsimd_state *);
> +void kernel_neon_end(struct user_fpsimd_state *);
>
> #endif /* ! __ASM_NEON_H */
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 61d62bfd5a7b..de3c3b65461d 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -172,7 +172,12 @@ struct thread_struct {
> unsigned long fault_code; /* ESR_EL1 value */
> struct debug_info debug; /* debugging */
>
> - struct user_fpsimd_state kernel_fpsimd_state;
> + /*
> + * Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
> + * address of a caller provided buffer that will be used to preserve a
> + * task's kernel mode FPSIMD state while it is scheduled out.
> + */
> + struct user_fpsimd_state *kernel_fpsimd_state;
> unsigned int kernel_fpsimd_cpu;
> #ifdef CONFIG_ARM64_PTR_AUTH
> struct ptrauth_keys_user keys_user;
> diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> index d9f83c478736..7ddb25df5c98 100644
> --- a/arch/arm64/include/asm/simd.h
> +++ b/arch/arm64/include/asm/simd.h
> @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
>
> #endif /* ! CONFIG_KERNEL_MODE_NEON */
>
> -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
> +DEFINE_LOCK_GUARD_1(ksimd,
> + struct user_fpsimd_state,
> + kernel_neon_begin(_T->lock),
> + kernel_neon_end(_T->lock))
>
> -#define scoped_ksimd() scoped_guard(ksimd)
> +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
>
> #endif
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index e3f8f51748bc..1c652ce4d40d 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
> * Elide the load if this CPU holds the most recent kernel mode
> * FPSIMD context of the current task.
> */
> - if (last->st == &task->thread.kernel_fpsimd_state &&
> + if (last->st == task->thread.kernel_fpsimd_state &&
> task->thread.kernel_fpsimd_cpu == smp_processor_id())
> return;
>
> - fpsimd_load_state(&task->thread.kernel_fpsimd_state);
> + fpsimd_load_state(task->thread.kernel_fpsimd_state);
> }
>
> static void fpsimd_save_kernel_state(struct task_struct *task)
> {
> struct cpu_fp_state cpu_fp_state = {
> - .st = &task->thread.kernel_fpsimd_state,
> + .st = task->thread.kernel_fpsimd_state,
> .to_save = FP_STATE_FPSIMD,
> };
>
> - fpsimd_save_state(&task->thread.kernel_fpsimd_state);
> + BUG_ON(!cpu_fp_state.st);
> +
> + fpsimd_save_state(task->thread.kernel_fpsimd_state);
> fpsimd_bind_state_to_cpu(&cpu_fp_state);
>
> task->thread.kernel_fpsimd_cpu = smp_processor_id();
> @@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
> void fpsimd_flush_task_state(struct task_struct *t)
> {
> t->thread.fpsimd_cpu = NR_CPUS;
> + t->thread.kernel_fpsimd_state = NULL;
> /*
> * If we don't support fpsimd, bail out after we have
> * reset the fpsimd_cpu for this task and clear the
> @@ -1833,8 +1836,13 @@ void fpsimd_save_and_flush_cpu_state(void)
> *
> * The caller may freely use the FPSIMD registers until kernel_neon_end() is
> * called.
> + *
> + * Unless called from non-preemptible task context, @state must point to a
> + * caller provided buffer that will be used to preserve the task's kernel mode
> + * FPSIMD context when it is scheduled out, or if it is interrupted by kernel
> + * mode FPSIMD occurring in softirq context. May be %NULL otherwise.
> */
> -void kernel_neon_begin(void)
> +void kernel_neon_begin(struct user_fpsimd_state *state)
> {
> if (WARN_ON(!system_supports_fpsimd()))
> return;
> @@ -1846,7 +1854,7 @@ void kernel_neon_begin(void)
> /* Save unsaved fpsimd state, if any: */
> if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
> BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
> - fpsimd_save_kernel_state(current);
> + fpsimd_save_state(state);
> } else {
> fpsimd_save_user_state();
>
> @@ -1867,8 +1875,17 @@ void kernel_neon_begin(void)
> * mode in task context. So in this case, setting the flag here
> * is always appropriate.
> */
> - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
> + /*
> + * Record the caller provided buffer as the kernel mode
> + * FP/SIMD buffer for this task, so that the state can
> + * be preserved and restored on a context switch.
> + */
> + WARN_ON(current->thread.kernel_fpsimd_state != NULL);
> + WARN_ON(preemptible() && !state);
This is in the wrong place: we are never preemptible here, even when
called from preemptible context.
Will fix for the next rev.
Powered by blists - more mailing lists