[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250918063539.2640512-12-ardb+git@google.com>
Date: Thu, 18 Sep 2025 08:35:45 +0200
From: Ard Biesheuvel <ardb+git@...gle.com>
To: linux-arm-kernel@...ts.infradead.org
Cc: linux-crypto@...r.kernel.org, linux-kernel@...r.kernel.org,
herbert@...dor.apana.org.au, ebiggers@...nel.org,
Ard Biesheuvel <ardb@...nel.org>, Marc Zyngier <maz@...nel.org>, Will Deacon <will@...nel.org>,
Mark Rutland <mark.rutland@....com>, Kees Cook <keescook@...omium.org>,
Catalin Marinas <catalin.marinas@....com>, Mark Brown <broonie@...nel.org>
Subject: [PATCH 5/5] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
From: Ard Biesheuvel <ardb@...nel.org>
Commit aefbab8e77eb16b5
("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-trivial bloat of task_struct, and the resulting memory overhead
may impact performance on systems with many processes.
This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and given that
calls to kernel_neon_begin() and kernel_neon_end() are now guaranteed to
originate from the same lexical scope, it is possible to transparently
allocate this buffer on the caller's stack instead.
Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
---
arch/arm64/include/asm/neon.h | 4 +--
arch/arm64/include/asm/processor.h | 2 +-
arch/arm64/kernel/fpsimd.c | 26 ++++++++++++++------
3 files changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 4e24f1058b55..acaac98ff449 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,10 +13,10 @@
#define cpu_has_neon() system_supports_fpsimd()
-void __kernel_neon_begin(void);
+void __kernel_neon_begin(struct user_fpsimd_state *);
void __kernel_neon_end(void);
-#define kernel_neon_begin() do { __kernel_neon_begin()
+#define kernel_neon_begin() do { __kernel_neon_begin(&(struct user_fpsimd_state){})
#define kernel_neon_end() __kernel_neon_end(); } while (0)
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 61d62bfd5a7b..226e635c53d9 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,7 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d7eb073d1366..919c53a26484 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1488,21 +1488,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1773,6 +1775,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1833,7 +1836,7 @@ void fpsimd_save_and_flush_cpu_state(void)
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
-void __kernel_neon_begin(void)
+void __kernel_neon_begin(struct user_fpsimd_state *s)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
@@ -1849,6 +1852,13 @@ void __kernel_neon_begin(void)
} else {
fpsimd_save_user_state();
+ /*
+ * Record the caller provided buffer as the kernel mode FP/SIMD
+ * buffer for this task, so that the state can be preserved and
+ * restored on a context switch.
+ */
+ current->thread.kernel_fpsimd_state = s;
+
/*
* Set the thread flag so that the kernel mode FPSIMD state
* will be context switched along with the rest of the task
@@ -1899,8 +1909,8 @@ void __kernel_neon_end(void)
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
- else
- clear_thread_flag(TIF_KERNEL_FPSTATE);
+ else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
+ current->thread.kernel_fpsimd_state = NULL;
}
EXPORT_SYMBOL_GPL(__kernel_neon_end);
@@ -1936,7 +1946,7 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
if (may_use_simd()) {
- __kernel_neon_begin();
+ __kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
--
2.51.0.384.g4c02a37b29-goog
Powered by blists - more mailing lists