lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1476734984-13839-4-git-send-email-riel@redhat.com>
Date:   Mon, 17 Oct 2016 16:09:44 -0400
From:   riel@...hat.com
To:     linux-kernel@...r.kernel.org
Cc:     mingo@...nel.org, bp@...en8.de, torvalds@...ux-foundation.org,
        luto@...nel.org, dave.hansen@...el.linux.com, tglx@...utronix.de,
        hpa@...or.com
Subject: [PATCH RFC 3/3] x86/fpu: defer FPU state load until return to userspace

From: Rik van Riel <riel@...hat.com>

Defer loading of FPU state until return to userspace. This gives
the kernel the potential to skip loading FPU state for tasks that
stay in kernel mode, or for tasks that end up with repeated
invocations of kernel_fpu_begin.

It also increases the chances that a task's FPU state will remain
valid in the FPU registers until it is scheduled back in, allowing
us to skip restoring that task's FPU state altogether.

This also prepares the ground work for not having to restore
qemu userspace FPU state in KVM VCPU threads, when merely returning
to the host kernel because the guest went idle, or is running a
kernel thread. That functionality will come in a later patch.

Signed-off-by: Rik van Riel <riel@...hat.com>
---
 arch/x86/entry/common.c             |  9 +++++++++
 arch/x86/include/asm/fpu/api.h      |  5 +++++
 arch/x86/include/asm/fpu/internal.h | 13 +++++--------
 arch/x86/include/asm/thread_info.h  |  4 +++-
 arch/x86/kernel/fpu/core.c          | 28 ++++++++++++++++++++++++----
 arch/x86/kernel/process_32.c        |  5 ++---
 arch/x86/kernel/process_64.c        |  5 ++---
 7 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index bdd9cc59d20f..0c11ee22f90b 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -27,6 +27,7 @@
 #include <asm/vdso.h>
 #include <asm/uaccess.h>
 #include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -189,6 +190,14 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 	if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
 		exit_to_usermode_loop(regs, cached_flags);
 
+	/* Reload ti->flags; we may have rescheduled above. */
+	cached_flags = READ_ONCE(ti->flags);
+
+	if (unlikely(cached_flags & _TIF_LOAD_FPU)) {
+		clear_thread_flag(TIF_LOAD_FPU);
+		switch_fpu_return();
+	}
+
 #ifdef CONFIG_COMPAT
 	/*
 	 * Compat syscalls set TS_COMPAT.  Make sure we clear it before
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..d7ef49a03b51 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -37,6 +37,11 @@ extern int  irq_ts_save(void);
 extern void irq_ts_restore(int TS_state);
 
 /*
+ * Load the task FPU state before returning to userspace.
+ */
+extern void switch_fpu_return(void);
+
+/*
  * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
  *
  * If 'feature_name' is set then put a human-readable description of
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index d40deb337807..cccc0c059b41 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -601,8 +601,8 @@ static inline void make_fpregs_active_loadnew(void)
  *  - switch_fpu_prepare() saves the old state.
  *    This is done within the context of the old process.
  *
- *  - switch_fpu_finish() restores the new state as
- *    necessary.
+ *  - switch_fpu_finish() sets TIF_LOAD_FPU; the floating point state
+ *    will get loaded on return to userspace, or when the kernel needs it.
  */
 static inline void
 switch_fpu_prepare(struct fpu *old_fpu, int cpu)
@@ -628,13 +628,10 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
  * Set up the userspace FPU context for the new task, if the task
  * has used the FPU.
  */
-static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
+static inline void switch_fpu_finish(void)
 {
-	bool preload = static_cpu_has(X86_FEATURE_FPU) &&
-		       new_fpu->fpstate_active;
-
-	if (preload)
-		__make_fpregs_active(new_fpu, cpu);
+	if (static_cpu_has(X86_FEATURE_FPU))
+		set_thread_flag(TIF_LOAD_FPU);
 }
 
 /*
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 2aaca53c0974..9941d118f2cc 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -90,6 +90,7 @@ struct task_struct;
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
 #define TIF_X32			30	/* 32-bit native x86-64 binary */
+#define TIF_LOAD_FPU		31	/* load FPU on return to userspace */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -112,6 +113,7 @@ struct task_struct;
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 #define _TIF_X32		(1 << TIF_X32)
+#define _TIF_LOAD_FPU		(1 << TIF_LOAD_FPU)
 
 /*
  * work to do in syscall_trace_enter().  Also includes TIF_NOHZ for
@@ -125,7 +127,7 @@ struct task_struct;
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK						\
 	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |	\
-	_TIF_NOHZ)
+	_TIF_NOHZ | _TIF_LOAD_FPU)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 34ba9d47c20f..09c4254a6e26 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -99,14 +99,14 @@ void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
+	__cpu_invalidate_fpregs_state();
+
 	if (fpu->fpregs_active) {
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
 		 */
 		copy_fpregs_to_fpstate(fpu);
-	} else {
-		__cpu_invalidate_fpregs_state();
 	}
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -115,8 +115,10 @@ void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (fpu->fpregs_active)
-		copy_kernel_to_fpregs(&fpu->state);
+	if (fpu->fpregs_active) {
+		switch_fpu_finish();
+		fpu->fpregs_active = 0;
+	}
 
 	kernel_fpu_enable();
 }
@@ -501,6 +503,24 @@ void fpu__clear(struct fpu *fpu)
 }
 
 /*
+ * Load FPU context before returning to userspace.
+ */
+void switch_fpu_return(void)
+{
+	if (!static_cpu_has(X86_FEATURE_FPU))
+		return;
+
+	/*
+	 * We should never return to user space without the task's
+	 * own FPU contents loaded into the registers. That makes it
+	 * a bug to not have the task's FPU state set up.
+	 */
+	WARN_ON_FPU(!fpstate_active());
+
+	make_fpregs_active();
+}
+
+/*
  * x87 math exception handling:
  */
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 7dc8c9c3d801..9103871f80f5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -229,7 +229,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct thread_struct *prev = &prev_p->thread,
 			     *next = &next_p->thread;
 	struct fpu *prev_fpu = &prev->fpu;
-	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 
@@ -294,9 +293,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		lazy_load_gs(next->gs);
 
-	switch_fpu_finish(next_fpu, cpu);
-
 	this_cpu_write(current_task, next_p);
 
+	switch_fpu_finish();
+
 	return prev_p;
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 705669efb762..4b228e1e4423 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -260,7 +260,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
 	struct fpu *prev_fpu = &prev->fpu;
-	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 	struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
 	unsigned prev_fsindex, prev_gsindex;
@@ -415,8 +414,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 		prev->gsbase = 0;
 	prev->gsindex = prev_gsindex;
 
-	switch_fpu_finish(next_fpu, cpu);
-
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
@@ -425,6 +422,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Reload esp0 and ss1.  This changes current_thread_info(). */
 	load_sp0(tss, next);
 
+	switch_fpu_finish();
+
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
 	 */
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ