lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20160725153327.GA5468@gmail.com>
Date:	Mon, 25 Jul 2016 17:33:27 +0200
From:	Ingo Molnar <mingo@...nel.org>
To:	Linus Torvalds <torvalds@...ux-foundation.org>
Cc:	linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
	"H. Peter Anvin" <hpa@...or.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Borislav Petkov <bp@...en8.de>,
	Andrew Morton <akpm@...ux-foundation.org>
Subject: [GIT PULL] x86/fpu changes for v4.8

Linus,

Please pull the latest x86-fpu-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-fpu-for-linus

   # HEAD: ec3ed4a2104b8d1ab8da2db5b1221b2ba8a7a6e1 x86/fpu: Do not BUG_ON() in early FPU code

The main x86 FPU changes in this cycle were:

 - A large series of cleanups, fixes and enhancements to re-enable the XSAVES 
   instruction on Intel CPUs - which is the most advanced instruction to do FPU
   context switches. (Yu-cheng Yu, Fenghua Yu)

 - Add FPU tracepoints for the FPU state machine (Dave Hansen)

 Thanks,

	Ingo

------------------>
Dave Hansen (2):
      x86/fpu: Add tracepoints to dump FPU state at key points
      x86/fpu: Do not BUG_ON() in early FPU code

Fenghua Yu (3):
      x86/fpu/xstate: Define and use 'fpu_user_xstate_size'
      x86/fpu/xstate: Rename 'xstate_size' to 'fpu_kernel_xstate_size', to distinguish it from 'fpu_user_xstate_size'
      x86/fpu/xstate: Keep init_fpstate.xsave.header.xfeatures as zero for init optimization

Yu-cheng Yu (10):
      x86/fpu/xstate: Copy xstate registers directly to the signal frame when compacted format is in use
      x86/fpu/xstate: Align xstate components according to CPUID
      x86/fpu/xstate: Fix supervisor xstate component offset
      x86/fpu/xstate: Fix PTRACE frames for XSAVES
      x86/fpu/xstate: Fix XSTATE component offset print out
      x86/fpu/xstate: Fix xstate_offsets, xstate_sizes for non-extended xstates
      x86/fpu/xstate: Fix __fpu_restore_sig() for XSAVES
      x86/fpu/xstate: Return NULL for disabled xstate component address
      x86/fpu/xstate: Fix fpstate_init() for XRSTORS
      x86/fpu/xstate: Re-enable XSAVES


 arch/x86/include/asm/fpu/internal.h |   5 +
 arch/x86/include/asm/fpu/types.h    |   7 +
 arch/x86/include/asm/fpu/xstate.h   |  10 +-
 arch/x86/include/asm/processor.h    |   3 +-
 arch/x86/include/asm/trace/fpu.h    | 119 ++++++++++
 arch/x86/kernel/fpu/core.c          |  33 ++-
 arch/x86/kernel/fpu/init.c          |  34 +--
 arch/x86/kernel/fpu/regset.c        |  52 +++--
 arch/x86/kernel/fpu/signal.c        |  46 +++-
 arch/x86/kernel/fpu/xstate.c        | 451 ++++++++++++++++++++++++++++--------
 arch/x86/kvm/x86.c                  |   6 +-
 11 files changed, 608 insertions(+), 158 deletions(-)
 create mode 100644 arch/x86/include/asm/trace/fpu.h

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 31ac8e6d9f36..116b58347501 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -18,6 +18,7 @@
 #include <asm/fpu/api.h>
 #include <asm/fpu/xstate.h>
 #include <asm/cpufeature.h>
+#include <asm/trace/fpu.h>
 
 /*
  * High level FPU state handling functions:
@@ -524,6 +525,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
 
 	fpu->fpregs_active = 0;
 	this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+	trace_x86_fpu_regs_deactivated(fpu);
 }
 
 /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
@@ -533,6 +535,7 @@ static inline void __fpregs_activate(struct fpu *fpu)
 
 	fpu->fpregs_active = 1;
 	this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+	trace_x86_fpu_regs_activated(fpu);
 }
 
 /*
@@ -604,11 +607,13 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
 
 		/* But leave fpu_fpregs_owner_ctx! */
 		old_fpu->fpregs_active = 0;
+		trace_x86_fpu_regs_deactivated(old_fpu);
 
 		/* Don't change CR0.TS if we just switch! */
 		if (fpu.preload) {
 			new_fpu->counter++;
 			__fpregs_activate(new_fpu);
+			trace_x86_fpu_regs_activated(new_fpu);
 			prefetch(&new_fpu->state);
 		} else {
 			__fpregs_deactivate_hw();
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 36b90bbfc69f..48df486b02f9 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -122,6 +122,7 @@ enum xfeature {
 #define XFEATURE_MASK_OPMASK		(1 << XFEATURE_OPMASK)
 #define XFEATURE_MASK_ZMM_Hi256		(1 << XFEATURE_ZMM_Hi256)
 #define XFEATURE_MASK_Hi16_ZMM		(1 << XFEATURE_Hi16_ZMM)
+#define XFEATURE_MASK_PT		(1 << XFEATURE_PT_UNIMPLEMENTED_SO_FAR)
 #define XFEATURE_MASK_PKRU		(1 << XFEATURE_PKRU)
 
 #define XFEATURE_MASK_FPSSE		(XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
@@ -231,6 +232,12 @@ struct xstate_header {
 } __attribute__((packed));
 
 /*
+ * xstate_header.xcomp_bv[63] indicates that the extended_state_area
+ * is in compacted format.
+ */
+#define XCOMP_BV_COMPACTED_FORMAT ((u64)1 << 63)
+
+/*
  * This is our most modern FPU state format, as saved by the XSAVE
  * and restored by the XRSTOR instructions.
  *
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 38951b0fcc5a..ae55a43e09c0 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -18,6 +18,9 @@
 #define XSAVE_YMM_SIZE	    256
 #define XSAVE_YMM_OFFSET    (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
 
+/* Supervisor features */
+#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
+
 /* Supported features which support lazy state saving */
 #define XFEATURE_MASK_LAZY	(XFEATURE_MASK_FP | \
 				 XFEATURE_MASK_SSE | \
@@ -39,7 +42,6 @@
 #define REX_PREFIX
 #endif
 
-extern unsigned int xstate_size;
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
@@ -48,5 +50,9 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
 const void *get_xsave_field_ptr(int xstate_field);
-
+int using_compacted_format(void);
+int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave);
+int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave);
 #endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 62c6cc3cc5d3..965c5d212c31 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,7 +367,8 @@ DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
 #endif	/* X86_64 */
 
-extern unsigned int xstate_size;
+extern unsigned int fpu_kernel_xstate_size;
+extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
new file mode 100644
index 000000000000..9217ab1f5bf6
--- /dev/null
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -0,0 +1,119 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM x86_fpu
+
+#if !defined(_TRACE_FPU_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FPU_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(x86_fpu,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu),
+
+	TP_STRUCT__entry(
+		__field(struct fpu *, fpu)
+		__field(bool, fpregs_active)
+		__field(bool, fpstate_active)
+		__field(int, counter)
+		__field(u64, xfeatures)
+		__field(u64, xcomp_bv)
+		),
+
+	TP_fast_assign(
+		__entry->fpu		= fpu;
+		__entry->fpregs_active	= fpu->fpregs_active;
+		__entry->fpstate_active	= fpu->fpstate_active;
+		__entry->counter	= fpu->counter;
+		if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
+			__entry->xfeatures = fpu->state.xsave.header.xfeatures;
+			__entry->xcomp_bv  = fpu->state.xsave.header.xcomp_bv;
+		}
+	),
+	TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+			__entry->fpu,
+			__entry->fpregs_active,
+			__entry->fpstate_active,
+			__entry->counter,
+			__entry->xfeatures,
+			__entry->xcomp_bv
+	)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_after_save,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_before_restore,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_after_restore,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_regs_activated,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_dropped,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_copy_src,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_copy_dst,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+DEFINE_EVENT(x86_fpu, x86_fpu_xstate_check_failed,
+	TP_PROTO(struct fpu *fpu),
+	TP_ARGS(fpu)
+);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE fpu
+#endif /* _TRACE_FPU_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 97027545a72d..3fc03a09a93b 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -8,10 +8,14 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/regset.h>
 #include <asm/fpu/signal.h>
+#include <asm/fpu/types.h>
 #include <asm/traps.h>
 
 #include <linux/hardirq.h>
 
+#define CREATE_TRACE_POINTS
+#include <asm/trace/fpu.h>
+
 /*
  * Represents the initial FPU state. It's mostly (but not completely) zeroes,
  * depending on the FPU hardware format:
@@ -192,6 +196,7 @@ void fpu__save(struct fpu *fpu)
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
 	preempt_disable();
+	trace_x86_fpu_before_save(fpu);
 	if (fpu->fpregs_active) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			if (use_eager_fpu())
@@ -200,6 +205,7 @@ void fpu__save(struct fpu *fpu)
 				fpregs_deactivate(fpu);
 		}
 	}
+	trace_x86_fpu_after_save(fpu);
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(fpu__save);
@@ -222,7 +228,14 @@ void fpstate_init(union fpregs_state *state)
 		return;
 	}
 
-	memset(state, 0, xstate_size);
+	memset(state, 0, fpu_kernel_xstate_size);
+
+	/*
+	 * XRSTORS requires that this bit is set in xcomp_bv, or
+	 * it will #GP. Make sure it is replaced after the memset().
+	 */
+	if (static_cpu_has(X86_FEATURE_XSAVES))
+		state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT;
 
 	if (static_cpu_has(X86_FEATURE_FXSR))
 		fpstate_init_fxstate(&state->fxsave);
@@ -247,7 +260,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 * leak into the child task:
 	 */
 	if (use_eager_fpu())
-		memset(&dst_fpu->state.xsave, 0, xstate_size);
+		memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
 
 	/*
 	 * Save current FPU registers directly into the child
@@ -266,7 +279,8 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	 */
 	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+		memcpy(&src_fpu->state, &dst_fpu->state,
+		       fpu_kernel_xstate_size);
 
 		if (use_eager_fpu())
 			copy_kernel_to_fpregs(&src_fpu->state);
@@ -275,6 +289,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	}
 	preempt_enable();
 
+	trace_x86_fpu_copy_src(src_fpu);
+	trace_x86_fpu_copy_dst(dst_fpu);
+
 	return 0;
 }
 
@@ -288,7 +305,9 @@ void fpu__activate_curr(struct fpu *fpu)
 
 	if (!fpu->fpstate_active) {
 		fpstate_init(&fpu->state);
+		trace_x86_fpu_init_state(fpu);
 
+		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
 		fpu->fpstate_active = 1;
 	}
@@ -314,7 +333,9 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 	} else {
 		if (!fpu->fpstate_active) {
 			fpstate_init(&fpu->state);
+			trace_x86_fpu_init_state(fpu);
 
+			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
 			fpu->fpstate_active = 1;
 		}
@@ -347,7 +368,9 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 		fpu->last_cpu = -1;
 	} else {
 		fpstate_init(&fpu->state);
+		trace_x86_fpu_init_state(fpu);
 
+		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
 		fpu->fpstate_active = 1;
 	}
@@ -432,9 +455,11 @@ void fpu__restore(struct fpu *fpu)
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
+	trace_x86_fpu_before_restore(fpu);
 	fpregs_activate(fpu);
 	copy_kernel_to_fpregs(&fpu->state);
 	fpu->counter++;
+	trace_x86_fpu_after_restore(fpu);
 	kernel_fpu_enable();
 }
 EXPORT_SYMBOL_GPL(fpu__restore);
@@ -463,6 +488,8 @@ void fpu__drop(struct fpu *fpu)
 
 	fpu->fpstate_active = 0;
 
+	trace_x86_fpu_dropped(fpu);
+
 	preempt_enable();
 }
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index aacfd7a82cec..93982aebb398 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -145,8 +145,8 @@ static void __init fpu__init_system_generic(void)
  * This is inherent to the XSAVE architecture which puts all state
  * components into a single, continuous memory block:
  */
-unsigned int xstate_size;
-EXPORT_SYMBOL_GPL(xstate_size);
+unsigned int fpu_kernel_xstate_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -178,7 +178,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Add back the dynamically-calculated register state
 	 * size.
 	 */
-	task_size += xstate_size;
+	task_size += fpu_kernel_xstate_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -195,7 +195,7 @@ static void __init fpu__init_task_struct_size(void)
 }
 
 /*
- * Set up the xstate_size based on the legacy FPU context size.
+ * Set up the user and kernel xstate sizes based on the legacy FPU context size.
  *
  * We set this up first, and later it will be overwritten by
  * fpu__init_system_xstate() if the CPU knows about xstates.
@@ -208,7 +208,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	on_boot_cpu = 0;
 
 	/*
-	 * Note that xstate_size might be overwriten later during
+	 * Note that xstate sizes might be overwritten later during
 	 * fpu__init_system_xstate().
 	 */
 
@@ -219,27 +219,17 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_XSAVE);
 		setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-		xstate_size = sizeof(struct swregs_state);
+		fpu_kernel_xstate_size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			xstate_size = sizeof(struct fxregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fxregs_state);
 		else
-			xstate_size = sizeof(struct fregs_state);
+			fpu_kernel_xstate_size =
+				sizeof(struct fregs_state);
 	}
-	/*
-	 * Quirk: we don't yet handle the XSAVES* instructions
-	 * correctly, as we don't correctly convert between
-	 * standard and compacted format when interfacing
-	 * with user-space - so disable it for now.
-	 *
-	 * The difference is small: with recent CPUs the
-	 * compacted format is only marginally smaller than
-	 * the standard FPU state format.
-	 *
-	 * ( This is easy to backport while we are fixing
-	 *   XSAVES* support. )
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+	fpu_user_xstate_size = fpu_kernel_xstate_size;
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 81422dfb152b..c114b132d121 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -4,6 +4,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 /*
  * The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
@@ -85,21 +86,26 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
-	fpu__activate_fpstate_read(fpu);
-
 	xsave = &fpu->state.xsave;
 
-	/*
-	 * Copy the 48bytes defined by the software first into the xstate
-	 * memory layout in the thread struct, so that we can copy the entire
-	 * xstateregs to the user using one user_regset_copyout().
-	 */
-	memcpy(&xsave->i387.sw_reserved,
-		xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-	/*
-	 * Copy the xstate memory layout.
-	 */
-	ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	fpu__activate_fpstate_read(fpu);
+
+	if (using_compacted_format()) {
+		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+	} else {
+		fpstate_sanitize_xstate(fpu);
+		/*
+		 * Copy the 48 bytes defined by the software into the xsave
+		 * area in the thread struct, so that we can copy the whole
+		 * area to user using one user_regset_copyout().
+		 */
+		memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
+
+		/*
+		 * Copy the xstate memory layout.
+		 */
+		ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	}
 	return ret;
 }
 
@@ -114,11 +120,27 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return -ENODEV;
 
-	fpu__activate_fpstate_write(fpu);
+	/*
+	 * A whole standard-format XSAVE buffer is needed:
+	 */
+	if ((pos != 0) || (count < fpu_user_xstate_size))
+		return -EFAULT;
 
 	xsave = &fpu->state.xsave;
 
-	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+	fpu__activate_fpstate_write(fpu);
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
+	else
+		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+
+	/*
+	 * In case of failure, mark all states as init:
+	 */
+	if (ret)
+		fpstate_init(&fpu->state);
+
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 31c6a60505e6..9e231d88bb33 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -8,8 +8,10 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/sigframe.h>
+#include <asm/trace/fpu.h>
 
 static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32;
 
@@ -31,7 +33,7 @@ static inline int check_for_xstate(struct fxregs_state __user *buf,
 	/* Check for the first magic field and other error scenarios. */
 	if (fx_sw->magic1 != FP_XSTATE_MAGIC1 ||
 	    fx_sw->xstate_size < min_xstate_size ||
-	    fx_sw->xstate_size > xstate_size ||
+	    fx_sw->xstate_size > fpu_user_xstate_size ||
 	    fx_sw->xstate_size > fx_sw->extended_size)
 		return -1;
 
@@ -88,7 +90,8 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
 	if (!use_xsave())
 		return err;
 
-	err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 *)(buf + fpu_user_xstate_size));
 
 	/*
 	 * Read the xfeatures which we copied (directly from the cpu or
@@ -125,7 +128,7 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
 	else
 		err = copy_fregs_to_user((struct fregs_state __user *) buf);
 
-	if (unlikely(err) && __clear_user(buf, xstate_size))
+	if (unlikely(err) && __clear_user(buf, fpu_user_xstate_size))
 		err = -EFAULT;
 	return err;
 }
@@ -167,7 +170,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active()) {
+	if (fpregs_active() || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
@@ -175,8 +178,19 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 		if (ia32_fxstate)
 			copy_fxregs_to_kernel(&tsk->thread.fpu);
 	} else {
+		/*
+		 * It is a *bug* if kernel uses compacted-format for xsave
+		 * area and we copy it out directly to a signal frame. It
+		 * should have been handled above by saving the registers
+		 * directly.
+		 */
+		if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+			WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
+			return -1;
+		}
+
 		fpstate_sanitize_xstate(&tsk->thread.fpu);
-		if (__copy_to_user(buf_fx, xsave, xstate_size))
+		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
 
@@ -250,7 +264,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
-	int state_size = xstate_size;
+	int state_size = fpu_kernel_xstate_size;
 	u64 xfeatures = 0;
 	int fx_only = 0;
 
@@ -282,6 +296,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			 */
 			state_size = sizeof(struct fxregs_state);
 			fx_only = 1;
+			trace_x86_fpu_xstate_check_failed(fpu);
 		} else {
 			state_size = fx_sw_user.xstate_size;
 			xfeatures = fx_sw_user.xfeatures;
@@ -308,9 +323,17 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		 */
 		fpu__drop(fpu);
 
-		if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) ||
-		    __copy_from_user(&env, buf, sizeof(env))) {
+		if (using_compacted_format()) {
+			err = copyin_to_xsaves(NULL, buf_fx,
+					       &fpu->state.xsave);
+		} else {
+			err = __copy_from_user(&fpu->state.xsave,
+					       buf_fx, state_size);
+		}
+
+		if (err || __copy_from_user(&env, buf, sizeof(env))) {
 			fpstate_init(&fpu->state);
+			trace_x86_fpu_init_state(fpu);
 			err = -1;
 		} else {
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
@@ -341,7 +364,8 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 
 static inline int xstate_sigframe_size(void)
 {
-	return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
+	return use_xsave() ? fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE :
+			fpu_user_xstate_size;
 }
 
 /*
@@ -385,12 +409,12 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
+	int size = fpu_user_xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
-	fx_sw_reserved.xstate_size = xstate_size;
+	fx_sw_reserved.xstate_size = fpu_user_xstate_size;
 
 	if (config_enabled(CONFIG_IA32_EMULATION) ||
 	    config_enabled(CONFIG_X86_32)) {
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 4ea2a59483c7..680049aa4593 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -11,6 +11,7 @@
 #include <asm/fpu/internal.h>
 #include <asm/fpu/signal.h>
 #include <asm/fpu/regset.h>
+#include <asm/fpu/xstate.h>
 
 #include <asm/tlbflush.h>
 
@@ -44,6 +45,13 @@ static unsigned int xstate_sizes[XFEATURE_MAX]   = { [ 0 ... XFEATURE_MAX - 1] =
 static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
 
 /*
+ * The XSAVE area of kernel can be in standard or compacted format;
+ * it is always in standard format for user mode. This is the user
+ * mode standard format size used for signal and ptrace frames.
+ */
+unsigned int fpu_user_xstate_size;
+
+/*
  * Clear all of the X86_FEATURE_* bits that are unavailable
  * when the CPU has no XSAVE support.
  */
@@ -105,6 +113,27 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
 }
 EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
 
+static int xfeature_is_supervisor(int xfeature_nr)
+{
+	/*
+	 * We currently do not support supervisor states, but if
+	 * we did, we could find out like this.
+	 *
+	 * SDM says: If state component 'i' is a user state component,
+	 * ECX[0] return 0; if state component i is a supervisor
+	 * state component, ECX[0] returns 1.
+	 */
+	u32 eax, ebx, ecx, edx;
+
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	return !!(ecx & 1);
+}
+
+static int xfeature_is_user(int xfeature_nr)
+{
+	return !xfeature_is_supervisor(xfeature_nr);
+}
+
 /*
  * When executing XSAVEOPT (or other optimized XSAVE instructions), if
  * a processor implementation detects that an FPU state component is still
@@ -171,7 +200,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
 	 */
 	while (xfeatures) {
 		if (xfeatures & 0x1) {
-			int offset = xstate_offsets[feature_bit];
+			int offset = xstate_comp_offsets[feature_bit];
 			int size = xstate_sizes[feature_bit];
 
 			memcpy((void *)fx + offset,
@@ -192,6 +221,15 @@ void fpu__init_cpu_xstate(void)
 {
 	if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
 		return;
+	/*
+	 * Make it clear that XSAVES supervisor states are not yet
+	 * implemented should anyone expect it to work by changing
+	 * bits in XFEATURE_MASK_* macros and XCR0.
+	 */
+	WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR),
+		"x86/fpu: XSAVES supervisor states are not yet implemented.\n");
+
+	xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
 
 	cr4_set_bits(X86_CR4_OSXSAVE);
 	xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
@@ -217,13 +255,29 @@ static void __init setup_xstate_features(void)
 	/* start at the beginnning of the "extended state" */
 	unsigned int last_good_offset = offsetof(struct xregs_state,
 						 extended_state_area);
+	/*
+	 * The FP xstates and SSE xstates are legacy states. They are always
+	 * in the fixed offsets in the xsave area in either compacted form
+	 * or standard form.
+	 */
+	xstate_offsets[0] = 0;
+	xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
+	xstate_offsets[1] = xstate_sizes[0];
+	xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
 
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
 		if (!xfeature_enabled(i))
 			continue;
 
 		cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
-		xstate_offsets[i] = ebx;
+
+		/*
+		 * If an xfeature is supervisor state, the offset
+		 * in EBX is invalid. We leave it to -1.
+		 */
+		if (xfeature_is_user(i))
+			xstate_offsets[i] = ebx;
+
 		xstate_sizes[i] = eax;
 		/*
 		 * In our xstate size checks, we assume that the
@@ -233,8 +287,6 @@ static void __init setup_xstate_features(void)
 		WARN_ONCE(last_good_offset > xstate_offsets[i],
 			"x86/fpu: misordered xstate at %d\n", last_good_offset);
 		last_good_offset = xstate_offsets[i];
-
-		printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n", i, ebx, i, eax);
 	}
 }
 
@@ -263,6 +315,33 @@ static void __init print_xstate_features(void)
 }
 
 /*
+ * This check is important because it is easy to get XSTATE_*
+ * confused with XSTATE_BIT_*.
+ */
+#define CHECK_XFEATURE(nr) do {		\
+	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
+	WARN_ON(nr >= XFEATURE_MAX);	\
+} while (0)
+
+/*
+ * We could cache this like xstate_size[], but we only use
+ * it here, so it would be a waste of space.
+ */
+static int xfeature_is_aligned(int xfeature_nr)
+{
+	u32 eax, ebx, ecx, edx;
+
+	CHECK_XFEATURE(xfeature_nr);
+	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
+	/*
+	 * The value returned by ECX[1] indicates the alignment
+	 * of state component 'i' when the compacted format
+	 * of the extended region of an XSAVE area is used:
+	 */
+	return !!(ecx & 2);
+}
+
+/*
  * This function sets up offsets and sizes of all extended states in
  * xsave area. This supports both standard format and compacted format
  * of the xsave aread.
@@ -299,10 +378,29 @@ static void __init setup_xstate_comp(void)
 		else
 			xstate_comp_sizes[i] = 0;
 
-		if (i > FIRST_EXTENDED_XFEATURE)
+		if (i > FIRST_EXTENDED_XFEATURE) {
 			xstate_comp_offsets[i] = xstate_comp_offsets[i-1]
 					+ xstate_comp_sizes[i-1];
 
+			if (xfeature_is_aligned(i))
+				xstate_comp_offsets[i] =
+					ALIGN(xstate_comp_offsets[i], 64);
+		}
+	}
+}
+
+/*
+ * Print out xstate component offsets and sizes
+ */
+static void __init print_xstate_offset_size(void)
+{
+	int i;
+
+	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		if (!xfeature_enabled(i))
+			continue;
+		pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
+			 i, xstate_comp_offsets[i], i, xstate_sizes[i]);
 	}
 }
 
@@ -322,13 +420,11 @@ static void __init setup_init_fpu_buf(void)
 	setup_xstate_features();
 	print_xstate_features();
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
 		init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
-		init_fpstate.xsave.header.xfeatures = xfeatures_mask;
-	}
 
 	/*
-	 * Init all the features state with header_bv being 0x0
+	 * Init all the features state with header.xfeatures being 0x0
 	 */
 	copy_kernel_to_xregs_booting(&init_fpstate.xsave);
 
@@ -339,58 +435,19 @@ static void __init setup_init_fpu_buf(void)
 	copy_xregs_to_kernel_booting(&init_fpstate.xsave);
 }
 
-static int xfeature_is_supervisor(int xfeature_nr)
-{
-	/*
-	 * We currently do not support supervisor states, but if
-	 * we did, we could find out like this.
-	 *
-	 * SDM says: If state component i is a user state component,
-	 * ECX[0] return 0; if state component i is a supervisor
-	 * state component, ECX[0] returns 1.
-	u32 eax, ebx, ecx, edx;
-	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx;
-	return !!(ecx & 1);
-	*/
-	return 0;
-}
-/*
-static int xfeature_is_user(int xfeature_nr)
-{
-	return !xfeature_is_supervisor(xfeature_nr);
-}
-*/
-
-/*
- * This check is important because it is easy to get XSTATE_*
- * confused with XSTATE_BIT_*.
- */
-#define CHECK_XFEATURE(nr) do {		\
-	WARN_ON(nr < FIRST_EXTENDED_XFEATURE);	\
-	WARN_ON(nr >= XFEATURE_MAX);	\
-} while (0)
-
-/*
- * We could cache this like xstate_size[], but we only use
- * it here, so it would be a waste of space.
- */
-static int xfeature_is_aligned(int xfeature_nr)
+static int xfeature_uncompacted_offset(int xfeature_nr)
 {
 	u32 eax, ebx, ecx, edx;
 
-	CHECK_XFEATURE(xfeature_nr);
-	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
 	/*
-	 * The value returned by ECX[1] indicates the alignment
-	 * of state component i when the compacted format
-	 * of the extended region of an XSAVE area is used
+	 * Only XSAVES supports supervisor states and it uses compacted
+	 * format. Checking a supervisor state's uncompacted offset is
+	 * an error.
 	 */
-	return !!(ecx & 2);
-}
-
-static int xfeature_uncompacted_offset(int xfeature_nr)
-{
-	u32 eax, ebx, ecx, edx;
+	if (XFEATURE_MASK_SUPERVISOR & (1 << xfeature_nr)) {
+		WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
+		return -1;
+	}
 
 	CHECK_XFEATURE(xfeature_nr);
 	cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
@@ -415,7 +472,7 @@ static int xfeature_size(int xfeature_nr)
  * that it is obvious which aspect of 'XSAVES' is being handled
  * by the calling code.
  */
-static int using_compacted_format(void)
+int using_compacted_format(void)
 {
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
@@ -530,11 +587,12 @@ static void do_extra_xstate_size_checks(void)
 		 */
 		paranoid_xstate_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != xstate_size);
+	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
 }
 
+
 /*
- * Calculate total size of enabled xstates in XCR0/xfeatures_mask.
+ * Get total size of enabled xstates in XCR0/xfeatures_mask.
  *
  * Note the SDM's wording here.  "sub-function 0" only enumerates
  * the size of the *user* states.  If we use it to size a buffer
@@ -544,34 +602,33 @@ static void do_extra_xstate_size_checks(void)
  * Note that we do not currently set any bits on IA32_XSS so
  * 'XCR0 | IA32_XSS == XCR0' for now.
  */
-static unsigned int __init calculate_xstate_size(void)
+static unsigned int __init get_xsaves_size(void)
 {
 	unsigned int eax, ebx, ecx, edx;
-	unsigned int calculated_xstate_size;
+	/*
+	 * - CPUID function 0DH, sub-function 1:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVES instruction for an XSAVE area
+	 *    containing all the state components
+	 *    corresponding to bits currently set in
+	 *    XCR0 | IA32_XSS.
+	 */
+	cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
+	return ebx;
+}
 
-	if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
-		/*
-		 * - CPUID function 0DH, sub-function 0:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVE instruction for an XSAVE area
-		 *    containing all the *user* state components
-		 *    corresponding to bits currently set in XCR0.
-		 */
-		cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	} else {
-		/*
-		 * - CPUID function 0DH, sub-function 1:
-		 *    EBX enumerates the size (in bytes) required by
-		 *    the XSAVES instruction for an XSAVE area
-		 *    containing all the state components
-		 *    corresponding to bits currently set in
-		 *    XCR0 | IA32_XSS.
-		 */
-		cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
-		calculated_xstate_size = ebx;
-	}
-	return calculated_xstate_size;
+static unsigned int __init get_xsave_size(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+	/*
+	 * - CPUID function 0DH, sub-function 0:
+	 *    EBX enumerates the size (in bytes) required by
+	 *    the XSAVE instruction for an XSAVE area
+	 *    containing all the *user* state components
+	 *    corresponding to bits currently set in XCR0.
+	 */
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+	return ebx;
 }
 
 /*
@@ -591,7 +648,15 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size = calculate_xstate_size();
+	unsigned int possible_xstate_size;
+	unsigned int xsave_size;
+
+	xsave_size = get_xsave_size();
+
+	if (boot_cpu_has(X86_FEATURE_XSAVES))
+		possible_xstate_size = get_xsaves_size();
+	else
+		possible_xstate_size = xsave_size;
 
 	/* Ensure we have the space to store all enabled: */
 	if (!is_supported_xstate_size(possible_xstate_size))
@@ -601,8 +666,13 @@ static int init_xstate_size(void)
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_size = possible_xstate_size;
 	do_extra_xstate_size_checks();
+
+	/*
+	 * User space is always in standard format.
+	 */
+	fpu_user_xstate_size = xsave_size;
 	return 0;
 }
 
@@ -644,8 +714,13 @@ void __init fpu__init_system_xstate(void)
 	xfeatures_mask = eax + ((u64)edx << 32);
 
 	if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
+		/*
+		 * This indicates that something really unexpected happened
+		 * with the enumeration.  Disable XSAVE and try to continue
+		 * booting without it.  This is too early to BUG().
+		 */
 		pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask);
-		BUG();
+		goto out_disable;
 	}
 
 	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
@@ -653,21 +728,29 @@ void __init fpu__init_system_xstate(void)
 	/* Enable xstate instructions to be able to continue with initialization: */
 	fpu__init_cpu_xstate();
 	err = init_xstate_size();
-	if (err) {
-		/* something went wrong, boot without any XSAVE support */
-		fpu__init_disable_system_xstate();
-		return;
-	}
+	if (err)
+		goto out_disable;
+
+	/*
+	 * Update info used for ptrace frames; use standard-format size and no
+	 * supervisor xstates:
+	 */
+	update_regset_xstate_info(fpu_user_xstate_size,	xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR);
 
-	update_regset_xstate_info(xstate_size, xfeatures_mask);
 	fpu__init_prepare_fx_sw_frame();
 	setup_init_fpu_buf();
 	setup_xstate_comp();
+	print_xstate_offset_size();
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask,
-		xstate_size,
+		fpu_kernel_xstate_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
+	return;
+
+out_disable:
+	/* something went wrong, try to boot without any XSAVE support */
+	fpu__init_disable_system_xstate();
 }
 
 /*
@@ -693,6 +776,11 @@ void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
 {
 	int feature_nr = fls64(xstate_feature_mask) - 1;
 
+	if (!xfeature_enabled(feature_nr)) {
+		WARN_ON_FPU(1);
+		return NULL;
+	}
+
 	return (void *)xsave + xstate_comp_offsets[feature_nr];
 }
 /*
@@ -887,16 +975,16 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return -EINVAL;
 
-	/* Set the bits we need in PKRU  */
+	/* Set the bits we need in PKRU:  */
 	if (init_val & PKEY_DISABLE_ACCESS)
 		new_pkru_bits |= PKRU_AD_BIT;
 	if (init_val & PKEY_DISABLE_WRITE)
 		new_pkru_bits |= PKRU_WD_BIT;
 
-	/* Shift the bits in to the correct place in PKRU for pkey. */
+	/* Shift the bits in to the correct place in PKRU for pkey: */
 	new_pkru_bits <<= pkey_shift;
 
-	/* Locate old copy of the state in the xsave buffer */
+	/* Locate old copy of the state in the xsave buffer: */
 	old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
 
 	/*
@@ -909,9 +997,10 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	else
 		new_pkru_state.pkru = old_pkru_state->pkru;
 
-	/* mask off any old bits in place */
+	/* Mask off any old bits in place: */
 	new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-	/* Set the newly-requested bits */
+
+	/* Set the newly-requested bits: */
 	new_pkru_state.pkru |= new_pkru_bits;
 
 	/*
@@ -925,8 +1014,168 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 	 */
 	new_pkru_state.pad = 0;
 
-	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state,
-			sizeof(new_pkru_state));
+	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+
+	return 0;
+}
+
+/*
+ * This is similar to user_regset_copyout(), but will not add offset to
+ * the source data pointer or increment pos, count, kbuf, and ubuf.
+ */
+static inline int xstate_copyout(unsigned int pos, unsigned int count,
+				 void *kbuf, void __user *ubuf,
+				 const void *data, const int start_pos,
+				 const int end_pos)
+{
+	if ((count == 0) || (pos < start_pos))
+		return 0;
+
+	if (end_pos < 0 || pos < end_pos) {
+		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+
+		if (kbuf) {
+			memcpy(kbuf + pos, data, copy);
+		} else {
+			if (__copy_to_user(ubuf + pos, data, copy))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a ptrace buffer. It supports partial copy but pos always starts from
+ * zero. This is called from xstateregs_get() and there we check the CPU
+ * has XSAVES.
+ */
+int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
+			void __user *ubuf, struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int ret, i;
+	struct xstate_header header;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(pos != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
+
+	if (ret)
+		return ret;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+
+			if (ret)
+				return ret;
+
+			if (offset + size >= count)
+				break;
+		}
+
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
+
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Convert from a ptrace standard-format buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set() and
+ * there we check the CPU has XSAVES and a whole standard-sized buffer
+ * exists.
+ */
+int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
+		     struct xregs_state *xsave)
+{
+	unsigned int offset, size;
+	int i;
+	u64 xfeatures;
+	u64 allowed_features;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(xfeatures);
+
+	if (kbuf) {
+		memcpy(&xfeatures, kbuf + offset, size);
+	} else {
+		if (__copy_from_user(&xfeatures, ubuf + offset, size))
+			return -EFAULT;
+	}
+
+	/*
+	 * Reject if the user sets any disabled or supervisor features:
+	 */
+	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+
+	if (xfeatures & ~allowed_features)
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			if (kbuf) {
+				memcpy(dst, kbuf + offset, size);
+			} else {
+				if (__copy_from_user(dst, ubuf + offset, size))
+					return -EFAULT;
+			}
+		}
+	}
+
+	/*
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
+	 */
+	xsave->header.xfeatures |= xfeatures;
 
 	return 0;
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7da5dd2057a9..b2766723c951 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -55,9 +55,6 @@
 #include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
 #include <asm/debugreg.h>
 #include <asm/msr.h>
 #include <asm/desc.h>
@@ -68,6 +65,9 @@
 #include <asm/div64.h>
 #include <asm/irq_remapping.h>
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 #define MAX_IO_MSRS 256
 #define KVM_MAX_MCE_BANKS 32
 #define KVM_MCE_CAP_SUPPORTED (MCG_CTL_P | MCG_SER_P)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ