lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1427993219-7291-1-git-send-email-dvlasenk@redhat.com>
Date:	Thu,  2 Apr 2015 18:46:59 +0200
From:	Denys Vlasenko <dvlasenk@...hat.com>
To:	Ingo Molnar <mingo@...nel.org>
Cc:	Denys Vlasenko <dvlasenk@...hat.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Borislav Petkov <bp@...en8.de>,
	"H. Peter Anvin" <hpa@...or.com>,
	Andy Lutomirski <luto@...capital.net>,
	Oleg Nesterov <oleg@...hat.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Alexei Starovoitov <ast@...mgrid.com>,
	Will Drewry <wad@...omium.org>,
	Kees Cook <keescook@...omium.org>, x86@...nel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH] x86/asm/entry/64: move opportunistic sysret code to syscall code path

This change does two things:

Copy-pastes "retint_swapgs:" code into syscall handling code,
the copy is under "syscall_return:" label. The code is unchanged
apart from some label renames.

Removes "opportunistic sysret" code from "retint_swapgs:" code block,
since now it won't be reached by syscall return. This in fact removes
most of the code in question.

   text	   data	    bss	    dec	    hex	filename
  12530	      0	      0	  12530	   30f2	entry_64.o.before
  12562	      0	      0	  12562	   3112	entry_64.o

Run-tested.

Signed-off-by: Denys Vlasenko <dvlasenk@...hat.com>
CC: Linus Torvalds <torvalds@...ux-foundation.org>
CC: Steven Rostedt <rostedt@...dmis.org>
CC: Ingo Molnar <mingo@...nel.org>
CC: Borislav Petkov <bp@...en8.de>
CC: "H. Peter Anvin" <hpa@...or.com>
CC: Andy Lutomirski <luto@...capital.net>
CC: Oleg Nesterov <oleg@...hat.com>
CC: Frederic Weisbecker <fweisbec@...il.com>
CC: Alexei Starovoitov <ast@...mgrid.com>
CC: Will Drewry <wad@...omium.org>
CC: Kees Cook <keescook@...omium.org>
CC: x86@...nel.org
CC: linux-kernel@...r.kernel.org
---
 arch/x86/kernel/entry_64.S | 158 ++++++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 72 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7bc097a..aed8a51 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -354,8 +354,8 @@ GLOBAL(int_with_check)
 	movl TI_flags(%rcx),%edx
 	andl %edi,%edx
 	jnz   int_careful
-	andl    $~TS_COMPAT,TI_status(%rcx)
-	jmp   retint_swapgs
+	andl	$~TS_COMPAT,TI_status(%rcx)
+	jmp	syscall_return
 
 	/* Either reschedule or signal or syscall exit tracking needed. */
 	/* First do a reschedule test. */
@@ -399,9 +399,86 @@ int_restore_rest:
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
 	jmp int_with_check
+
+syscall_return:
+	/* The iretq could re-enable interrupts: */
+	DISABLE_INTERRUPTS(CLBR_ANY)
+	TRACE_IRQS_IRETQ
+
+	/*
+	 * Try to use SYSRET instead of IRET if we're returning to
+	 * a completely clean 64-bit userspace context.
+	 */
+	movq RCX(%rsp),%rcx
+	cmpq %rcx,RIP(%rsp)		/* RCX == RIP */
+	jne opportunistic_sysret_failed
+
+	/*
+	 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
+	 * in kernel space.  This essentially lets the user take over
+	 * the kernel, since userspace controls RSP.  It's not worth
+	 * testing for canonicalness exactly -- this check detects any
+	 * of the 17 high bits set, which is true for non-canonical
+	 * or kernel addresses.  (This will pessimize vsyscall=native.
+	 * Big deal.)
+	 *
+	 * If virtual addresses ever become wider, this will need
+	 * to be updated to remain correct on both old and new CPUs.
+	 */
+	.ifne __VIRTUAL_MASK_SHIFT - 47
+	.error "virtual address width changed -- sysret checks need update"
+	.endif
+	shr $__VIRTUAL_MASK_SHIFT, %rcx
+	jnz opportunistic_sysret_failed
+
+	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
+	jne opportunistic_sysret_failed
+
+	movq R11(%rsp),%r11
+	cmpq %r11,EFLAGS(%rsp)		/* R11 == RFLAGS */
+	jne opportunistic_sysret_failed
+
+	/*
+	 * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
+	 * restoring TF results in a trap from userspace immediately after
+	 * SYSRET.  This would cause an infinite loop whenever #DB happens
+	 * with register state that satisfies the opportunistic SYSRET
+	 * conditions.  For example, single-stepping this user code:
+	 *
+	 *           movq $stuck_here,%rcx
+	 *           pushfq
+	 *           popq %r11
+	 *   stuck_here:
+	 *
+	 * would never get past 'stuck_here'.
+	 */
+	testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+	jnz opportunistic_sysret_failed
+
+	/* nothing to check for RSP */
+
+	cmpq $__USER_DS,SS(%rsp)	/* SS must match SYSRET */
+	jne opportunistic_sysret_failed
+
+	/*
+	 * We win!  This label is here just for ease of understanding
+	 * perf profiles.  Nothing jumps here.
+	 */
+syscall_return_via_sysret:
+	CFI_REMEMBER_STATE
+	/* r11 is already restored (see code above) */
+	RESTORE_C_REGS_EXCEPT_R11
+	movq RSP(%rsp),%rsp
+	USERGS_SYSRET64
+	CFI_RESTORE_STATE
+
+opportunistic_sysret_failed:
+	SWAPGS
+	jmp	restore_c_regs_and_iret
 	CFI_ENDPROC
 END(system_call)
 
+
 	.macro FORK_LIKE func
 ENTRY(stub_\func)
 	CFI_STARTPROC
@@ -672,76 +749,8 @@ retint_swapgs:		/* return to user-space */
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_IRETQ
 
-	/*
-	 * Try to use SYSRET instead of IRET if we're returning to
-	 * a completely clean 64-bit userspace context.
-	 */
-	movq RCX(%rsp),%rcx
-	cmpq %rcx,RIP(%rsp)		/* RCX == RIP */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
-	 * in kernel space.  This essentially lets the user take over
-	 * the kernel, since userspace controls RSP.  It's not worth
-	 * testing for canonicalness exactly -- this check detects any
-	 * of the 17 high bits set, which is true for non-canonical
-	 * or kernel addresses.  (This will pessimize vsyscall=native.
-	 * Big deal.)
-	 *
-	 * If virtual addresses ever become wider, this will need
-	 * to be updated to remain correct on both old and new CPUs.
-	 */
-	.ifne __VIRTUAL_MASK_SHIFT - 47
-	.error "virtual address width changed -- sysret checks need update"
-	.endif
-	shr $__VIRTUAL_MASK_SHIFT, %rcx
-	jnz opportunistic_sysret_failed
-
-	cmpq $__USER_CS,CS(%rsp)	/* CS must match SYSRET */
-	jne opportunistic_sysret_failed
-
-	movq R11(%rsp),%r11
-	cmpq %r11,EFLAGS(%rsp)		/* R11 == RFLAGS */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-	 * restoring TF results in a trap from userspace immediately after
-	 * SYSRET.  This would cause an infinite loop whenever #DB happens
-	 * with register state that satisfies the opportunistic SYSRET
-	 * conditions.  For example, single-stepping this user code:
-	 *
-	 *           movq $stuck_here,%rcx
-	 *           pushfq
-	 *           popq %r11
-	 *   stuck_here:
-	 *
-	 * would never get past 'stuck_here'.
-	 */
-	testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-	jnz opportunistic_sysret_failed
-
-	/* nothing to check for RSP */
-
-	cmpq $__USER_DS,SS(%rsp)	/* SS must match SYSRET */
-	jne opportunistic_sysret_failed
-
-	/*
-	 * We win!  This label is here just for ease of understanding
-	 * perf profiles.  Nothing jumps here.
-	 */
-irq_return_via_sysret:
-	CFI_REMEMBER_STATE
-	/* r11 is already restored (see code above) */
-	RESTORE_C_REGS_EXCEPT_R11
-	movq RSP(%rsp),%rsp
-	USERGS_SYSRET64
-	CFI_RESTORE_STATE
-
-opportunistic_sysret_failed:
 	SWAPGS
-	jmp restore_args
+	jmp	restore_c_regs_and_iret
 
 /* Returning to kernel space */
 retint_kernel:
@@ -760,7 +769,12 @@ retint_kernel:
 	 * The iretq could re-enable interrupts:
 	 */
 	TRACE_IRQS_IRETQ
-restore_args:
+
+/*
+ * At this label, code paths which return to kernel and to user,
+ * which come from interrupts/exception and from syscalls, merge.
+ */
+restore_c_regs_and_iret:
 	RESTORE_C_REGS
 	REMOVE_PT_GPREGS_FROM_STACK 8
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ