lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <9ab6ab7aae8c24985e93fda3fcdafea64160298d.1515204614.git.tim.c.chen@linux.intel.com>
Date:   Fri,  5 Jan 2018 18:12:18 -0800
From:   Tim Chen <tim.c.chen@...ux.intel.com>
To:     Thomas Gleixner <tglx@...utronix.de>,
        Andy Lutomirski <luto@...nel.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Greg KH <gregkh@...uxfoundation.org>
Cc:     Tim Chen <tim.c.chen@...ux.intel.com>,
        Dave Hansen <dave.hansen@...el.com>,
        Andrea Arcangeli <aarcange@...hat.com>,
        Andi Kleen <ak@...ux.intel.com>,
        Arjan Van De Ven <arjan.van.de.ven@...el.com>,
        David Woodhouse <dwmw@...zon.co.uk>,
        linux-kernel@...r.kernel.org
Subject: [PATCH v2 3/8] x86/enter: Use IBRS on syscall and interrupts

From: Andrea Arcangeli <aarcange@...hat.com>

Set IBRS upon kernel entrance via syscall and interrupts. Clear it
upon exit.  IBRS protects against unsafe indirect branching predictions
in the kernel.

The NMI interrupt save/restore of IBRS state was based on Andrea
Arcangeli's implementation.
Here's an explanation by Dave Hansen on why we save IBRS state for NMI.

The normal interrupt code uses the 'error_entry' path which uses the
Code Segment (CS) of the instruction that was interrupted to tell
whether it interrupted the kernel or userspace and thus has to switch
IBRS, or leave it alone.

The NMI code is different.  It uses 'paranoid_entry' because it can
interrupt the kernel while it is running with a userspace IBRS (and %GS
and CR3) value, but has a kernel CS.  If we used the same approach as
the normal interrupt code, we might do the following;

	SYSENTER_entry
<-------------- NMI HERE
	IBRS=1
		do_something()
	IBRS=0
	SYSRET

The NMI code might notice that we are running in the kernel and decide
that it is OK to skip the IBRS=1.  This would leave it running
unprotected with IBRS=0, which is bad.

However, if we unconditionally set IBRS=1, in the NMI, we might get the
following case:

	SYSENTER_entry
	IBRS=1
		do_something()
	IBRS=0
<-------------- NMI HERE (set IBRS=1)
	SYSRET

and we would return to userspace with IBRS=1.  Userspace would run
slowly until we entered and exited the kernel again.

Instead of those two approaches, we chose a third one where we simply
save the IBRS value in a scratch register (%r13) and then restore that
value, verbatim.

Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
---
 arch/x86/entry/entry_64.S        | 23 +++++++++++++++++++++++
 arch/x86/entry/entry_64_compat.S |  8 ++++++++
 2 files changed, 31 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f048e38..a4031c9 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -174,6 +174,8 @@ ENTRY(entry_SYSCALL_64_trampoline)
 
 	/* Load the top of the task stack into RSP */
 	movq	CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
+	/* Stack is usable, use the non-clobbering IBRS enable: */
+	ENABLE_IBRS
 
 	/* Start building the simulated IRET frame. */
 	pushq	$__USER_DS			/* pt_regs->ss */
@@ -217,6 +219,8 @@ ENTRY(entry_SYSCALL_64)
 	 */
 	movq	%rsp, PER_CPU_VAR(rsp_scratch)
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	/* Stack is usable, use the non-clobbering IBRS enable: */
+	ENABLE_IBRS
 
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER_DS			/* pt_regs->ss */
@@ -411,6 +415,7 @@ syscall_return_via_sysret:
 	 * We are on the trampoline stack.  All regs except RDI are live.
 	 * We can do future final exit work right here.
 	 */
+	DISABLE_IBRS
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
 	popq	%rdi
@@ -749,6 +754,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 	 * We can do future final exit work right here.
 	 */
 
+	DISABLE_IBRS
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
 	/* Restore RDI. */
@@ -836,6 +842,14 @@ native_irq_return_ldt:
 	SWAPGS					/* to kernel GS */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
 
+	/*
+	 * Normally we enable IBRS when we switch to kernel's CR3.
+	 * But we are going to switch back to user CR3 immediately
+	 * in this routine after fixing ESPFIX stack.  There is
+	 * no vulnerable code branching for IBRS to protect.
+	 * We don't toggle IBRS to avoid the cost of two MSR writes.
+	 */
+
 	movq	PER_CPU_VAR(espfix_waddr), %rdi
 	movq	%rax, (0*8)(%rdi)		/* user RAX */
 	movq	(1*8)(%rsp), %rax		/* user RIP */
@@ -969,6 +983,8 @@ ENTRY(switch_to_thread_stack)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
 	movq	%rsp, %rdi
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	/* Stack is usable, use the non-clobbering IBRS enable: */
+	ENABLE_IBRS
 	UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
 
 	pushq	7*8(%rdi)		/* regs->ss */
@@ -1271,6 +1287,7 @@ ENTRY(paranoid_entry)
 
 1:
 	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
+	ENABLE_IBRS_SAVE_AND_CLOBBER save_reg=%r13d
 
 	ret
 END(paranoid_entry)
@@ -1294,6 +1311,7 @@ ENTRY(paranoid_exit)
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
+	RESTORE_IBRS_CLOBBER save_reg=%r13d
 	RESTORE_CR3	scratch_reg=%rbx save_reg=%r14
 	SWAPGS_UNSAFE_STACK
 	jmp	.Lparanoid_exit_restore
@@ -1324,6 +1342,7 @@ ENTRY(error_entry)
 	SWAPGS
 	/* We have user CR3.  Change to kernel CR3. */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+	ENABLE_IBRS_CLOBBER
 
 .Lerror_entry_from_usermode_after_swapgs:
 	/* Put us onto the real thread stack. */
@@ -1371,6 +1390,7 @@ ENTRY(error_entry)
 	 */
 	SWAPGS
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+	ENABLE_IBRS_CLOBBER
 	jmp .Lerror_entry_done
 
 .Lbstep_iret:
@@ -1385,6 +1405,7 @@ ENTRY(error_entry)
 	 */
 	SWAPGS
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+	ENABLE_IBRS
 
 	/*
 	 * Pretend that the exception came from user mode: set up pt_regs
@@ -1486,6 +1507,7 @@ ENTRY(nmi)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
 	movq	%rsp, %rdx
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	ENABLE_IBRS
 	UNWIND_HINT_IRET_REGS base=%rdx offset=8
 	pushq	5*8(%rdx)	/* pt_regs->ss */
 	pushq	4*8(%rdx)	/* pt_regs->rsp */
@@ -1736,6 +1758,7 @@ end_repeat_nmi:
 	movq	$-1, %rsi
 	call	do_nmi
 
+	RESTORE_IBRS_CLOBBER save_reg=%r13d
 	RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
 
 	testl	%ebx, %ebx			/* swapgs needed? */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 40f1700..2074555 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -54,6 +54,7 @@ ENTRY(entry_SYSENTER_compat)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
 
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	ENABLE_IBRS
 
 	/*
 	 * User tracing code (ptrace or signal handlers) might assume that
@@ -224,6 +225,7 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	 * preserved during the C calls inside TRACE_IRQS_OFF anyway.
 	 */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
+	ENABLE_IBRS_CLOBBER /* clobbers %rax, %rcx, %rdx */
 
 	/*
 	 * User mode is traced as though IRQs are on, and SYSENTER
@@ -240,6 +242,12 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 	/* Opportunistic SYSRET */
 sysret32_from_system_call:
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
+	/*
+	 * Clobber of %rax, %rcx, %rdx is OK before register restoring.
+	 * This is safe to do here because we have no indirect branches
+	 * between here and the return to userspace (sysretl).
+	 */
+	DISABLE_IBRS_CLOBBER
 	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
 	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
 	movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */
-- 
2.9.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ