lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 26 Jan 2017 12:26:23 +0100
From:   Ingo Molnar <mingo@...nel.org>
To:     linux-kernel@...r.kernel.org
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Andy Lutomirski <luto@...capital.net>,
        Borislav Petkov <bp@...en8.de>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Fenghua Yu <fenghua.yu@...el.com>,
        "H . Peter Anvin" <hpa@...or.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Oleg Nesterov <oleg@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Rik van Riel <riel@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Yu-cheng Yu <yu-cheng.yu@...el.com>
Subject: [PATCH 1/7] x86/fpu: Simplify the fpu->last_cpu logic and rename it to fpu->fpregs_cached

fpu->last_cpu records the last CPU a given FPU context structure was used on.
This enables an important optimization: if a task schedules out to a kernel
thread and then gets scheduled back after only FPU-inactive kernel threads
executed, the FPU state in the registers is still intact and the FPU restore
can be skipped - speeding up the context switch.

The same logic can be implemented slightly simpler, by using a single boolean
flag: fpu->fpregs_cached tells us whether the context's FPU registers are
cached in the CPU.

The only difference is that this flag has to be invalidated when a task is
migrated away from its CPU - but that is a slow path compared to context
switches.

Cc: Andy Lutomirski <luto@...nel.org>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Dave Hansen <dave.hansen@...ux.intel.com>
Cc: Fenghua Yu <fenghua.yu@...el.com>
Cc: H. Peter Anvin <hpa@...or.com>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Oleg Nesterov <oleg@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Rik van Riel <riel@...hat.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Yu-cheng Yu <yu-cheng.yu@...el.com>
Signed-off-by: Ingo Molnar <mingo@...nel.org>
---
 arch/x86/include/asm/fpu/internal.h | 15 ++++++++-------
 arch/x86/include/asm/fpu/types.h    | 24 ++++++++++--------------
 arch/x86/include/asm/switch_to.h    | 10 ++++++++++
 arch/x86/kernel/fpu/core.c          |  2 +-
 kernel/sched/core.c                 |  2 ++
 kernel/sched/sched.h                |  8 ++++++++
 6 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 255645f60ca2..2eaf93cf11cc 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -490,7 +490,7 @@ DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
 /*
  * The in-register FPU state for an FPU context on a CPU is assumed to be
- * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * valid if fpu->fpregs_cached is still set, and if the fpu_fpregs_owner_ctx
  * matches the FPU.
  *
  * If the FPU register state is valid, the kernel can skip restoring the
@@ -512,12 +512,12 @@ static inline void __cpu_invalidate_fpregs_state(void)
 
 static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
 {
-	fpu->last_cpu = -1;
+	fpu->fpregs_cached = 0;
 }
 
 static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
 {
-	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+	return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && fpu->fpregs_cached;
 }
 
 /*
@@ -573,15 +573,16 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
 {
 	if (old_fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
-			old_fpu->last_cpu = -1;
+			old_fpu->fpregs_cached = 0;
 		else
-			old_fpu->last_cpu = cpu;
+			old_fpu->fpregs_cached = 1;
 
 		/* But leave fpu_fpregs_owner_ctx! */
 		old_fpu->fpregs_active = 0;
 		trace_x86_fpu_regs_deactivated(old_fpu);
-	} else
-		old_fpu->last_cpu = -1;
+	} else {
+		old_fpu->fpregs_cached = 0;
+	}
 }
 
 /*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 3c80f5b9c09d..3090b0d7b232 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -276,20 +276,6 @@ union fpregs_state {
  */
 struct fpu {
 	/*
-	 * @last_cpu:
-	 *
-	 * Records the last CPU on which this context was loaded into
-	 * FPU registers. (In the lazy-restore case we might be
-	 * able to reuse FPU registers across multiple context switches
-	 * this way, if no intermediate task used the FPU.)
-	 *
-	 * A value of -1 is used to indicate that the FPU state in context
-	 * memory is newer than the FPU state in registers, and that the
-	 * FPU state should be reloaded next time the task is run.
-	 */
-	unsigned int			last_cpu;
-
-	/*
 	 * @fpstate_active:
 	 *
 	 * This flag indicates whether this context is active: if the task
@@ -322,6 +308,16 @@ struct fpu {
 	unsigned char			fpregs_active;
 
 	/*
+	 * @fpregs_cached:
+	 *
+	 * This flag tells us whether this context is loaded into a CPU
+	 * right now.
+	 *
+	 * This is set to 0 if a task is migrated to another CPU.
+	 */
+	unsigned char			fpregs_cached;
+
+	/*
 	 * @state:
 	 *
 	 * In-memory copy of all FPU registers that we save/restore
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index fcc5cd387fd1..a7146dadb31d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -72,4 +72,14 @@ do {									\
 	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
 
+
+/*
+ * The task-migration arch callback clears the FPU registers cache:
+ */
+static inline void arch_task_migrate(struct task_struct *p)
+{
+	p->thread.fpu.fpregs_cached = 0;
+}
+#define arch_task_migrate arch_task_migrate
+
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..287f1cb32b59 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -190,7 +190,7 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
 	dst_fpu->fpregs_active = 0;
-	dst_fpu->last_cpu = -1;
+	dst_fpu->fpregs_cached = 0;
 
 	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c56fb57f2991..7eb2f3041fde 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1253,6 +1253,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 			p->sched_class->migrate_task_rq(p);
 		p->se.nr_migrations++;
 		perf_event_task_migrate(p);
+
+		arch_task_migrate(p);
 	}
 
 	__set_task_cpu(p, new_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7b34c7826ca5..ff8a894132e4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1824,3 +1824,11 @@ static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #else /* arch_scale_freq_capacity */
 #define arch_scale_freq_invariant()	(false)
 #endif
+
+/*
+ * Default task-migration arch callback:
+ */
+#ifndef arch_task_migrate
+static inline void arch_task_migrate(struct task_struct *p) { }
+#endif
+
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ