lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250918063539.2640512-12-ardb+git@google.com>
Date: Thu, 18 Sep 2025 08:35:45 +0200
From: Ard Biesheuvel <ardb+git@...gle.com>
To: linux-arm-kernel@...ts.infradead.org
Cc: linux-crypto@...r.kernel.org, linux-kernel@...r.kernel.org, 
	herbert@...dor.apana.org.au, ebiggers@...nel.org, 
	Ard Biesheuvel <ardb@...nel.org>, Marc Zyngier <maz@...nel.org>, Will Deacon <will@...nel.org>, 
	Mark Rutland <mark.rutland@....com>, Kees Cook <keescook@...omium.org>, 
	Catalin Marinas <catalin.marinas@....com>, Mark Brown <broonie@...nel.org>
Subject: [PATCH 5/5] arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack

From: Ard Biesheuvel <ardb@...nel.org>

Commit aefbab8e77eb16b5

  ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")

added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-trivial bloat of task_struct, and the resulting memory overhead
may impact performance on systems with many processes.

This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and given that
calls to kernel_neon_begin() and kernel_neon_end() are now guaranteed to
originate from the same lexical scope, it is possible to transparently
allocate this buffer on the caller's stack instead.

Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
---
 arch/arm64/include/asm/neon.h      |  4 +--
 arch/arm64/include/asm/processor.h |  2 +-
 arch/arm64/kernel/fpsimd.c         | 26 ++++++++++++++------
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index 4e24f1058b55..acaac98ff449 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,10 +13,10 @@
 
 #define cpu_has_neon()		system_supports_fpsimd()
 
-void __kernel_neon_begin(void);
+void __kernel_neon_begin(struct user_fpsimd_state *);
 void __kernel_neon_end(void);
 
-#define kernel_neon_begin()	do { __kernel_neon_begin()
+#define kernel_neon_begin()	do { __kernel_neon_begin(&(struct user_fpsimd_state){})
 #define kernel_neon_end()	__kernel_neon_end(); } while (0)
 
 #endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 61d62bfd5a7b..226e635c53d9 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,7 @@ struct thread_struct {
 	unsigned long		fault_code;	/* ESR_EL1 value */
 	struct debug_info	debug;		/* debugging */
 
-	struct user_fpsimd_state	kernel_fpsimd_state;
+	struct user_fpsimd_state	*kernel_fpsimd_state;
 	unsigned int			kernel_fpsimd_cpu;
 #ifdef CONFIG_ARM64_PTR_AUTH
 	struct ptrauth_keys_user	keys_user;
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d7eb073d1366..919c53a26484 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1488,21 +1488,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
 	 * Elide the load if this CPU holds the most recent kernel mode
 	 * FPSIMD context of the current task.
 	 */
-	if (last->st == &task->thread.kernel_fpsimd_state &&
+	if (last->st == task->thread.kernel_fpsimd_state &&
 	    task->thread.kernel_fpsimd_cpu == smp_processor_id())
 		return;
 
-	fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+	fpsimd_load_state(task->thread.kernel_fpsimd_state);
 }
 
 static void fpsimd_save_kernel_state(struct task_struct *task)
 {
 	struct cpu_fp_state cpu_fp_state = {
-		.st		= &task->thread.kernel_fpsimd_state,
+		.st		= task->thread.kernel_fpsimd_state,
 		.to_save	= FP_STATE_FPSIMD,
 	};
 
-	fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+	BUG_ON(!cpu_fp_state.st);
+
+	fpsimd_save_state(task->thread.kernel_fpsimd_state);
 	fpsimd_bind_state_to_cpu(&cpu_fp_state);
 
 	task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1773,6 +1775,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
 void fpsimd_flush_task_state(struct task_struct *t)
 {
 	t->thread.fpsimd_cpu = NR_CPUS;
+	t->thread.kernel_fpsimd_state = NULL;
 	/*
 	 * If we don't support fpsimd, bail out after we have
 	 * reset the fpsimd_cpu for this task and clear the
@@ -1833,7 +1836,7 @@ void fpsimd_save_and_flush_cpu_state(void)
  * The caller may freely use the FPSIMD registers until kernel_neon_end() is
  * called.
  */
-void __kernel_neon_begin(void)
+void __kernel_neon_begin(struct user_fpsimd_state *s)
 {
 	if (WARN_ON(!system_supports_fpsimd()))
 		return;
@@ -1849,6 +1852,13 @@ void __kernel_neon_begin(void)
 	} else {
 		fpsimd_save_user_state();
 
+		/*
+		 * Record the caller provided buffer as the kernel mode FP/SIMD
+		 * buffer for this task, so that the state can be preserved and
+		 * restored on a context switch.
+		 */
+		current->thread.kernel_fpsimd_state = s;
+
 		/*
 		 * Set the thread flag so that the kernel mode FPSIMD state
 		 * will be context switched along with the rest of the task
@@ -1899,8 +1909,8 @@ void __kernel_neon_end(void)
 	if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
 	    test_thread_flag(TIF_KERNEL_FPSTATE))
 		fpsimd_load_kernel_state(current);
-	else
-		clear_thread_flag(TIF_KERNEL_FPSTATE);
+	else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
+		current->thread.kernel_fpsimd_state = NULL;
 }
 EXPORT_SYMBOL_GPL(__kernel_neon_end);
 
@@ -1936,7 +1946,7 @@ void __efi_fpsimd_begin(void)
 	WARN_ON(preemptible());
 
 	if (may_use_simd()) {
-		__kernel_neon_begin();
+		__kernel_neon_begin(&efi_fpsimd_state);
 	} else {
 		/*
 		 * If !efi_sve_state, SVE can't be in use yet and doesn't need
-- 
2.51.0.384.g4c02a37b29-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ