[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251008154533.3089255-44-ardb+git@google.com>
Date: Wed, 8 Oct 2025 17:45:55 +0200
From: Ard Biesheuvel <ardb+git@...gle.com>
To: linux-arm-kernel@...ts.infradead.org
Cc: linux-kernel@...r.kernel.org, linux-crypto@...r.kernel.org,
herbert@...dor.apana.org.au, ebiggers@...nel.org,
Ard Biesheuvel <ardb@...nel.org>
Subject: [PATCH v3 21/21] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers
on the stack
From: Ard Biesheuvel <ardb@...nel.org>
Commit aefbab8e77eb16b5
("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-negligible bloat of task_struct, and the resulting memory
overhead may impact performance on systems with many processes.
This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and so it is
possible to transparently allocate this buffer on the caller's stack
instead.
So tweak the 'ksimd' scoped guard implementation so that a stack buffer
is allocated and passed to both kernel_neon_begin() and
kernel_neon_end(), and either record it in the task struct, or use it
directly to preserve the task mode kernel FP/SIMD when running in
softirq context. Passing the address to both functions, and checking the
addresses for consistency ensures that callers of the updated bare
begin/end API use it in a manner that is consistent with the new context
switch semantics.
Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
---
arch/arm64/include/asm/fpu.h | 4 +-
arch/arm64/include/asm/neon.h | 4 +-
arch/arm64/include/asm/processor.h | 2 +-
arch/arm64/include/asm/simd.h | 7 +++-
arch/arm64/kernel/fpsimd.c | 44 +++++++++++++-------
5 files changed, 40 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/include/asm/fpu.h b/arch/arm64/include/asm/fpu.h
index 52a1c513bdf3..e252c518de05 100644
--- a/arch/arm64/include/asm/fpu.h
+++ b/arch/arm64/include/asm/fpu.h
@@ -15,12 +15,12 @@ static inline void kernel_fpu_begin(void)
{
BUG_ON(!in_task());
preempt_disable();
- kernel_neon_begin();
+ kernel_neon_begin(NULL);
}
static inline void kernel_fpu_end(void)
{
- kernel_neon_end();
+ kernel_neon_end(NULL);
preempt_disable();
}
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index d4b1d172a79b..acebee4605b5 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
-void kernel_neon_begin(void);
-void kernel_neon_end(void);
+void kernel_neon_begin(struct user_fpsimd_state *);
+void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 61d62bfd5a7b..226e635c53d9 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,7 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index d9f83c478736..7ddb25df5c98 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
-DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
+DEFINE_LOCK_GUARD_1(ksimd,
+ struct user_fpsimd_state,
+ kernel_neon_begin(_T->lock),
+ kernel_neon_end(_T->lock))
-#define scoped_ksimd() scoped_guard(ksimd)
+#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index e3f8f51748bc..cab866d52bb7 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1489,21 +1489,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1774,6 +1776,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1834,7 +1837,7 @@ void fpsimd_save_and_flush_cpu_state(void)
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
@@ -1846,7 +1849,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
- fpsimd_save_kernel_state(current);
+ fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@@ -1867,8 +1870,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ WARN_ON(current->thread.kernel_fpsimd_state != NULL);
+ current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1887,21 +1898,26 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
+ if (!test_thread_flag(TIF_KERNEL_FPSTATE))
+ return;
+
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
- test_thread_flag(TIF_KERNEL_FPSTATE))
- fpsimd_load_kernel_state(current);
- else
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
+ fpsimd_load_state(state);
+ } else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
+ WARN_ON(current->thread.kernel_fpsimd_state != state);
+ current->thread.kernel_fpsimd_state = NULL;
+ }
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@@ -1937,7 +1953,7 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
@@ -1986,7 +2002,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
- kernel_neon_end();
+ kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
--
2.51.0.710.ga91ca5db03-goog
Powered by blists - more mailing lists