[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20240616105550.GA18292@redhat.com>
Date: Sun, 16 Jun 2024 12:55:50 +0200
From: Oleg Nesterov <oleg@...hat.com>
To: Ingo Molnar <mingo@...nel.org>
Cc: Nathan Chancellor <nathan@...nel.org>, linux-kernel@...r.kernel.org,
Andy Lutomirski <luto@...capital.net>,
Andrew Morton <akpm@...ux-foundation.org>,
Dave Hansen <dave@...1.net>, Peter Zijlstra <peterz@...radead.org>,
Borislav Petkov <bp@...en8.de>, Brian Gerst <brgerst@...il.com>,
"H . Peter Anvin" <hpa@...or.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Thomas Gleixner <tglx@...utronix.de>,
Uros Bizjak <ubizjak@...il.com>
Subject: Re: [PATCH 10/9] x86/fpu: Fix 'struct fpu' misalignment on 32-bit
kernels
On 06/15, Oleg Nesterov wrote:
>
> So perhaps we can (later) change x86_task_fpu(), fpu_clone(), and
> fpu__init_task_struct_size() to use
>
> ALIGN(sizeof(struct task_struct), 64)
>
> and remove the alignment attribute in sched.h?
On the 2nd thought, perhaps this makes sense from the very beginning?
See the patch below, up to you.
> Or use ARCH_MIN_TASKALIGN == __alignof__(union fpregs_state) which is
> also used in fork_init()->kmem_cache_create().
Either way, I hope that CONFIG_X86_VSMP can't define ARCH_MIN_TASKALIGN
less than __alignof__(fpregs_state).
Oleg.
---
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 64509c7f26c8..7887e9493330 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -507,6 +507,9 @@ struct thread_struct {
struct fpu *fpu;
};
+#define X86_TASK_SIZE \
+ ALIGN(sizeof(struct task_struct), __alignof__(union fpregs_state))
+
#define x86_task_fpu(task) ((task)->thread.fpu)
/*
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index f0c4367804b3..613198372764 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -591,7 +591,7 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
* This is safe because task_struct size is a multiple of cacheline size.
*/
struct fpu *src_fpu = x86_task_fpu(current);
- struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
+ struct fpu *dst_fpu = (void *)dst + X86_TASK_SIZE;
BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
BUG_ON(!src_fpu);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 4e8d37b5a90b..8b43c83b82c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -71,16 +71,14 @@ static bool __init fpu__probe_without_cpuid(void)
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
-static struct fpu x86_init_fpu __read_mostly;
+static struct fpu x86_init_fpu __aligned(64) __read_mostly;
static void __init fpu__init_system_early_generic(void)
{
- int this_cpu = smp_processor_id();
-
fpstate_reset(&x86_init_fpu);
current->thread.fpu = &x86_init_fpu;
- per_cpu(fpu_fpregs_owner_ctx, this_cpu) = &x86_init_fpu;
- x86_init_fpu.last_cpu = this_cpu;
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ x86_init_fpu.last_cpu = -1;
if (!boot_cpu_has(X86_FEATURE_CPUID) &&
!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) {
@@ -157,7 +155,7 @@ static void __init fpu__init_system_generic(void)
*/
static void __init fpu__init_task_struct_size(void)
{
- int task_size = sizeof(struct task_struct);
+ int task_size = X86_TASK_SIZE;
task_size += sizeof(struct fpu);
Powered by blists - more mailing lists