linux-kernel - Re: [RFC v2 PATCH 6/7] x86/entry: get rid of ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180207212915.GA4633@isilmar-4.linta.de>
Date:   Wed, 7 Feb 2018 22:29:15 +0100
From:   Dominik Brodowski <linux@...inikbrodowski.net>
To:     Linus Torvalds <torvalds@...ux-foundation.org>
Cc:     Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        the arch/x86 maintainers <x86@...nel.org>,
        Dan Williams <dan.j.williams@...el.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andi Kleen <ak@...ux.intel.com>,
        Andrew Lutomirski <luto@...nel.org>
Subject: Re: [RFC v2 PATCH 6/7] x86/entry: get rid of
 ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR_REGS

On Wed, Feb 07, 2018 at 12:44:41PM -0800, Linus Torvalds wrote:
> On Wed, Feb 7, 2018 at 12:15 PM, Dominik Brodowski
> <linux@...inikbrodowski.net> wrote:
> >
> > Note: The testb $3, CS(%rsp) instruction in idtentry() does not need
> > modification. Previously %rsp was manually decreased by 15*8; with
> > this patch, %rsp is decreased by 15 pushq instructions. Moreover,
> > error_entry did and does the exact same test (with offset=8) after
> > the registers have been moved/pushed and cleared.
> 
> So this has the problem that now those save/clear instructions will
> all be done in that idtentry macro.
> 
> So now that code will be duplicated for all the users of that macro.
> 
> The old code did the saving in the common error_entry and
> paranoid_entry routines, in order to be able to share all the code,
> and making the duplicated stub functions generated by the idtentry
> macro smaller.
> 
> Now, admittedly the new push sequence is much smaller than the old
> movq sequence, so the duplication doesn't hurt as much, but it's still
> likely quite noticeable.
> 
> So this removes lines of asm code, but it adds a lot of instructions
> to the end result thanks to the macro, I think.

Indeed, that is the case (see below). However, if we want to switch to
PUSH instructions and do this in a routine which is call'ed and which
ret'urns, %rsp needs to be moved around even more often than the old
ALLOC_PT_GPREGS_ON_STACK macro did (which you wanted to get rid of,
IIUYC). Or do I miss something?

   text	   data	    bss	    dec	    hex	filename
  19500	      0	      0	  19500	   4c2c	arch/x86/entry/entry_64.o-orig
  19510	      0	      0	  19510	   4c36	arch/x86/entry/entry_64.o-3_of_7
  21105	      0	      0	  21105	   5271	arch/x86/entry/entry_64.o-5_of_7
  24307	      0	      0	  24307	   5ef3	arch/x86/entry/entry_64.o-7_of_7

In any case, here's a v2.1 for this patch 6/7, which silences an objtool
warning in error_entry.

Thanks,
	Dominik

----------------------------------------------------
From: Dominik Brodowski <linux@...inikbrodowski.net>
Date: Wed, 7 Feb 2018 20:56:13 +0100
Subject: [PATCH] x86/entry: get rid of ALLOC_PT_GPREGS_ON_STACK and SAVE_AND_CLEAR_REGS

Previously, error_entry() and paranoid_entry() saved the GP registers
onto stack space previously allocated by its callers. Combine these two
steps in the callers, and use the generic PUSH_AND_CLEAR_REGS macro
for that.

Note: The testb $3, CS(%rsp) instruction in idtentry() does not need
modification. Previously %rsp was manually decreased by 15*8; with
this patch, %rsp is decreased by 15 pushq instructions. Moreover,
error_entry did and does the exact same test (with offset=8) after
the registers have been moved/pushed and cleared.

Suggested-by: Linus Torvalds <torvalds@...ux-foundation.org>
Signed-off-by: Dominik Brodowski <linux@...inikbrodowski.net>

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index d6a97e2945ee..59675010c9a0 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -97,46 +97,6 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS	21*8
 
-	.macro ALLOC_PT_GPREGS_ON_STACK
-	addq	$-(15*8), %rsp
-	.endm
-
-	.macro SAVE_AND_CLEAR_REGS offset=0
-	/*
-	 * Save registers and sanitize registers of values that a
-	 * speculation attack might otherwise want to exploit. The
-	 * lower registers are likely clobbered well before they
-	 * could be put to use in a speculative execution gadget.
-	 * Interleave XOR with PUSH for better uop scheduling:
-	 */
-	movq %rdi, 14*8+\offset(%rsp)
-	movq %rsi, 13*8+\offset(%rsp)
-	movq %rdx, 12*8+\offset(%rsp)
-	movq %rcx, 11*8+\offset(%rsp)
-	movq %rax, 10*8+\offset(%rsp)
-	movq %r8,  9*8+\offset(%rsp)
-	xorq %r8, %r8				/* nospec r8 */
-	movq %r9,  8*8+\offset(%rsp)
-	xorq %r9, %r9				/* nospec r9 */
-	movq %r10, 7*8+\offset(%rsp)
-	xorq %r10, %r10				/* nospec r10 */
-	movq %r11, 6*8+\offset(%rsp)
-	xorq %r11, %r11				/* nospec r11 */
-	movq %rbx, 5*8+\offset(%rsp)
-	xorl %ebx, %ebx				/* nospec rbx */
-	movq %rbp, 4*8+\offset(%rsp)
-	xorl %ebp, %ebp				/* nospec rbp */
-	movq %r12, 3*8+\offset(%rsp)
-	xorq %r12, %r12				/* nospec r12 */
-	movq %r13, 2*8+\offset(%rsp)
-	xorq %r13, %r13				/* nospec r13 */
-	movq %r14, 1*8+\offset(%rsp)
-	xorq %r14, %r14				/* nospec r14 */
-	movq %r15, 0*8+\offset(%rsp)
-	xorq %r15, %r15				/* nospec r15 */
-	UNWIND_HINT_REGS offset=\offset
-	.endm
-
 	.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
 	/*
 	 * Push registers and sanitize registers of values that a
@@ -211,7 +171,7 @@ For 32-bit we have the following conventions - kernel is built with
  * is just setting the LSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
- * NOTE: This macro must be used *after* SAVE_AND_CLEAR_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
  * the original rbp.
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9c4fe360db42..c6e5d44eb322 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -871,7 +871,9 @@ ENTRY(\sym)
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	.endif
 
-	ALLOC_PT_GPREGS_ON_STACK
+	/* Save all registers in pt_regs */
+	PUSH_AND_CLEAR_REGS
+	ENCODE_FRAME_POINTER
 
 	.if \paranoid < 2
 	testb	$3, CS(%rsp)			/* If coming from userspace, switch stacks */
@@ -1121,15 +1123,13 @@ idtentry machine_check		do_mce			has_error_code=0	paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
 	UNWIND_HINT_FUNC
 	cld
-	SAVE_AND_CLEAR_REGS 8
-	ENCODE_FRAME_POINTER 8
 	movl	$1, %ebx
 	movl	$MSR_GS_BASE, %ecx
 	rdmsr
@@ -1173,14 +1173,13 @@ ENTRY(paranoid_exit)
 END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
 	UNWIND_HINT_FUNC
+	UNWIND_HINT_REGS offset=8
 	cld
-	SAVE_AND_CLEAR_REGS 8
-	ENCODE_FRAME_POINTER 8
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
 
@@ -1571,7 +1570,8 @@ end_repeat_nmi:
 	 * frame to point back to repeat_nmi.
 	 */
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
-	ALLOC_PT_GPREGS_ON_STACK
+	PUSH_AND_CLEAR_REGS
+	ENCODE_FRAME_POINTER
 
 	/*
 	 * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit