---

 b/arch/x86/include/asm/processor.h |    7 ++++++-
 b/arch/x86/kernel/fpu/core.c       |   11 +++--------
 b/arch/x86/kernel/fpu/init.c       |    2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff -puN arch/x86/kernel/fpu/core.c~debug-32bit-fpregs arch/x86/kernel/fpu/core.c
--- a/arch/x86/kernel/fpu/core.c~debug-32bit-fpregs	2024-06-13 10:39:30.819589794 -0700
+++ b/arch/x86/kernel/fpu/core.c	2024-06-13 12:37:46.166450575 -0700
@@ -57,7 +57,7 @@ struct fpu *x86_task_fpu(struct task_str
 	if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
 		return NULL;
 
-	return (void *)task + sizeof(*task);
+	return __x86_task_fpu(task);
 }
 #endif
 
@@ -594,13 +594,8 @@ static int update_fpu_shstk(struct task_
 int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
 	      unsigned long ssp)
 {
-	/*
-	 * We allocate the new FPU structure right after the end of the task struct.
-	 * task allocation size already took this into account.
-	 *
-	 * This is safe because task_struct size is a multiple of cacheline size.
-	 */
-	struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+	/* 'dst' is partially built, disable debugging:  */
+	struct fpu *dst_fpu = __x86_task_fpu(dst);
 
 	BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
 
diff -puN arch/x86/include/asm/processor.h~debug-32bit-fpregs arch/x86/include/asm/processor.h
--- a/arch/x86/include/asm/processor.h~debug-32bit-fpregs	2024-06-13 11:09:19.508181013 -0700
+++ b/arch/x86/include/asm/processor.h	2024-06-13 12:36:24.480086965 -0700
@@ -504,10 +504,15 @@ struct thread_struct {
 #endif
 };
 
+/* XSAVE requrires 64-byte alignment: */
+#define fpu_align(x) ALIGN(x, 64)
+
+#define __x86_task_fpu(task)	((struct fpu *)((void *)(task) + fpu_align(sizeof(*(task)))))
+
 #ifdef CONFIG_X86_DEBUG_FPU
 extern struct fpu *x86_task_fpu(struct task_struct *task);
 #else
-# define x86_task_fpu(task)	((struct fpu *)((void *)(task) + sizeof(*(task))))
+#define x86_task_fpu(task)	__x86_task_fpu(task)
 #endif
 
 /*
diff -puN arch/x86/kernel/fpu/init.c~debug-32bit-fpregs arch/x86/kernel/fpu/init.c
--- a/arch/x86/kernel/fpu/init.c~debug-32bit-fpregs	2024-06-13 11:55:22.597114454 -0700
+++ b/arch/x86/kernel/fpu/init.c	2024-06-13 12:39:15.924492560 -0700
@@ -158,7 +158,7 @@ static void __init fpu__init_task_struct
 {
 	int task_size = sizeof(struct task_struct);
 
-	task_size += sizeof(struct fpu);
+	task_size += fpu_align(sizeof(struct fpu));
 
 	/*
 	 * Subtract off the static size of the register state.
_