lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 31 May 2018 10:58:42 -0700
From:   "Chang S. Bae" <chang.seok.bae@...el.com>
To:     Andy Lutomirski <luto@...nel.org>,
        "H . Peter Anvin" <hpa@...or.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...nel.org>
Cc:     Andi Kleen <ak@...ux.intel.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Markus T Metzger <markus.t.metzger@...el.com>,
        "Ravi V . Shankar" <ravi.v.shankar@...el.com>,
        "Chang S . Bae" <chang.seok.bae@...el.com>,
        linux-kernel@...r.kernel.org
Subject: [PATCH V2 12/15] x86/fsgsbase/64: Use per-CPU base as GS base on paranoid_entry

FSGSBASE allows fast access on GS base. With that, per-CPU
base is always copied to GS base on paranoid entry. The
current GS base value is restored on the exit.

Currently, userspace can't modify GS base and the kernel's
conventions are that a negative GS base means it is a kernel
value and a positive GS base means it is a user value. But,
with FSGSBASE enabled, userspace can put arbitrary data in
there. This behavior will be the same with the patch.

Per-CPU base can be found from per_cpu_offset table with CPU
number, which is in the (per-CPU) segment limit or obtained
by RDPID instruction.

GAS-compatible RDPID macro is included.

Suggested-by: H. Peter Anvin <hpa@...or.com>
Signed-off-by: Chang S. Bae <chang.seok.bae@...el.com>
Cc: Andi Kleen <ak@...ux.intel.com>
Cc: Andy Lutomirski <luto@...nel.org>
Cc: Dave Hansen <dave.hansen@...ux.intel.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...nel.org>
---
 arch/x86/entry/entry_64.S       | 74 +++++++++++++++++++++++++++++++++--------
 arch/x86/include/asm/fsgsbase.h | 57 +++++++++++++++++++++++++++++++
 arch/x86/include/asm/inst.h     | 15 +++++++++
 3 files changed, 132 insertions(+), 14 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3166b96..cfac4c0 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -38,6 +38,8 @@
 #include <asm/export.h>
 #include <asm/frame.h>
 #include <asm/nospec-branch.h>
+#include <asm/vdso.h>
+#include <asm/fsgsbase.h>
 #include <linux/err.h>
 
 #include "calling.h"
@@ -954,10 +956,14 @@ ENTRY(\sym)
 	addq	$EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
 	.endif
 
-	/* these procedures expect "no swapgs" flag in ebx */
 	.if \paranoid
+	/*
+	 * With FSGSBASE, original GS base is stored in rbx
+	 * Without FSGSBASE, expect "no swapgs" flag in ebx
+	 */
 	jmp	paranoid_exit
 	.else
+	/* expect "no swapgs" flag in ebx */
 	jmp	error_exit
 	.endif
 
@@ -1168,26 +1174,57 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
- * Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ * Save all registers in pt_regs.
+ *
+ * When FSGSBASE enabled, current GS base is always copied to rbx.
+ *
+ * Without FSGSBASE, SWAPGS is needed when entering from userspace.
+ * A positive GS base means it is a user value and a negative GS
+ * base means it is a kernel value.
+ *
+ * Return:
+ * 	With FSGSBASE, rbx has current GS base.
+ * 	Without that,
+ *		ebx=0: need SWAPGS on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
 	UNWIND_HINT_FUNC
 	cld
 	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
-	movl	$1, %ebx
-	movl	$MSR_GS_BASE, %ecx
-	rdmsr
-	testl	%edx, %edx
-	js	1f				/* negative -> in kernel */
-	SWAPGS
-	xorl	%ebx, %ebx
 
-1:
+	/*
+	 * As long as this PTI macro doesn't depend on kernel GS base,
+	 * we can do it early. This is because FIND_PERCPU_BASE
+	 * references data in kernel space.
+	 */
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
+	/*
+	 * Read GS base by RDGSBASE. Kernel GS base is found
+	 * from the per-CPU offset table with CPU number.
+	 */
+	ALTERNATIVE "jmp .Lparanoid_entry_no_fsgsbase",	"",\
+		X86_FEATURE_FSGSBASE
+	RDGSBASE	%rbx
+	FIND_PERCPU_BASE	%rax
+	WRGSBASE	%rax
+	ret
+
+.Lparanoid_entry_no_fsgsbase:
+	movl	$1, %ebx
+	/*
+	 * FSGSBASE is not in use, so depend on the kernel-enforced
+	 * convention that a negative GS base indicates a kernel value.
+	 */
+	READ_MSR_GSBASE save_reg=%edx
+	testl	%edx, %edx	/* negative -> in kernel */
+	jns	.Lparanoid_entry_swapgs
+	ret
+
+.Lparanoid_entry_swapgs:
+	SWAPGS
+	xorl	%ebx, %ebx
 	ret
 END(paranoid_entry)
 
@@ -1201,12 +1238,21 @@ END(paranoid_entry)
  * be complicated.  Fortunately, we there's no good reason
  * to try to handle preemption here.
  *
- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+ * On entry,
+ *	With FSGSBASE,
+ *		rbx is original GS base that needs to be restored on the exit
+ *	Without that,
+ * 		ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
  */
 ENTRY(paranoid_exit)
 	UNWIND_HINT_REGS
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF_DEBUG
+	ALTERNATIVE "jmp .Lparanoid_exit_no_fsgsbase",	"nop",\
+		X86_FEATURE_FSGSBASE
+	WRGSBASE	%rbx
+	jmp	.Lparanoid_exit_no_swapgs;
+.Lparanoid_exit_no_fsgsbase:
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
@@ -1217,7 +1263,7 @@ ENTRY(paranoid_exit)
 	TRACE_IRQS_IRETQ_DEBUG
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
-	jmp restore_regs_and_return_to_kernel
+	jmp	restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
 /*
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 903c7a0..3a5e1ec 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -107,6 +107,63 @@ void  write_inactive_gsbase(unsigned long gsbase);
 	MODRM 0xd0 wrgsbase_opd 1
 .endm
 
+#if CONFIG_SMP
+
+/*
+ * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
+ * We normally use %GS for accessing per-CPU data, but we are setting up
+ * %GS here and obviously can not use %GS itself to access per-CPU data.
+ */
+.macro FIND_PERCPU_BASE_RDPID reg:req
+	RDPID	\reg
+
+	/*
+	 * CPU number is written before IST initialization. Later,
+	 * processor id is (also) written during vDSO initialization,
+	 * with 12 bits for the CPU and 8 bits for the node.
+	 */
+	andq	$PERCPU_CPU_MASK, \reg
+	/*
+	 * Kernel GS base is looked up from the __per_cpu_offset list with
+	 * the CPU number (processor id).
+	 */
+	movq	__per_cpu_offset(, \reg, 8), \reg
+.endm
+
+.macro FIND_PERCPU_BASE_SEG_LIMIT reg:req
+	/* CPU number is found from the limit of PER_CPU entry in GDT */
+	movq	$__PER_CPU_SEG, \reg
+	lsl	\reg, \reg
+
+	/* Same as FIND_PERCPU_BASE_RDPID */
+	andq	$PERCPU_CPU_MASK, \reg
+	movq	__per_cpu_offset(, \reg, 8), \reg
+.endm
+
+.macro FIND_PERCPU_BASE reg:req
+	ALTERNATIVE \
+		"FIND_PERCPU_BASE_SEG_LIMIT \reg", \
+		"FIND_PERCPU_BASE_RDPID \reg", \
+		X86_FEATURE_RDPID
+.endm
+
+#else
+
+.macro FIND_PERCPU_BASE reg:req
+	/* Tracking the base offset value */
+	movq	pcpu_unit_offsets(%rip), \reg
+.endm
+
+#endif /* CONFIG_SMP */
+
+.macro READ_MSR_GSBASE save_reg:req
+	movl	$MSR_GS_BASE, %ecx
+	/* Read MSR specified by %ecx into %edx:%eax */
+	rdmsr
+	.ifnc \save_reg, %edx
+	movl	%edx, \save_reg
+	.endif
+.endm
 #endif /* CONFIG_X86_64 */
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index f5a796d..d063841 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -306,6 +306,21 @@
 	.endif
 	MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
 	.endm
+
+.macro RDPID opd
+	REG_TYPE rdpid_opd_type \opd
+	.if rdpid_opd_type == REG_TYPE_R64
+	R64_NUM rdpid_opd \opd
+	.else
+	R32_NUM rdpid_opd \opd
+	.endif
+	.byte 0xf3
+	.if rdpid_opd > 7
+	PFX_REX rdpid_opd 0
+	.endif
+	.byte 0x0f, 0xc7
+	MODRM 0xc0 rdpid_opd 0x7
+.endm
 #endif
 
 #endif
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ