lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201116144757.1920077-8-alexandre.chartre@oracle.com>
Date:   Mon, 16 Nov 2020 15:47:43 +0100
From:   Alexandre Chartre <alexandre.chartre@...cle.com>
To:     tglx@...utronix.de, mingo@...hat.com, bp@...en8.de, hpa@...or.com,
        x86@...nel.org, dave.hansen@...ux.intel.com, luto@...nel.org,
        peterz@...radead.org, linux-kernel@...r.kernel.org,
        thomas.lendacky@....com, jroedel@...e.de
Cc:     konrad.wilk@...cle.com, jan.setjeeilers@...cle.com,
        junaids@...gle.com, oweisse@...gle.com, rppt@...ux.vnet.ibm.com,
        graf@...zon.de, mgross@...ux.intel.com, kuzuno@...il.com,
        alexandre.chartre@...cle.com
Subject: [RFC][PATCH v2 07/21] x86/entry: Fill ESPFIX stack using C code

The ESPFIX stack is filled using assembly code. Move this code to a C
function so that it is easier to read and modify.

Signed-off-by: Alexandre Chartre <alexandre.chartre@...cle.com>
---
 arch/x86/entry/entry_64.S   | 62 ++++++++++++++++++-------------------
 arch/x86/kernel/espfix_64.c | 41 ++++++++++++++++++++++++
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 73e9cd47dc83..6e0b5b010e0b 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -684,8 +684,10 @@ native_irq_return_ldt:
 	 * long (see ESPFIX_STACK_SIZE).  espfix_waddr points to the bottom
 	 * of the ESPFIX stack.
 	 *
-	 * We clobber RAX and RDI in this code.  We stash RDI on the
-	 * normal stack and RAX on the ESPFIX stack.
+	 * We call into C code to fill the ESPFIX stack. We stash registers
+	 * that the C function can clobber on the normal stack. The user RAX
+	 * is stashed first so that it is adjacent to the iret frame which
+	 * will be copied to the ESPFIX stack.
 	 *
 	 * The ESPFIX stack layout we set up looks like this:
 	 *
@@ -699,39 +701,37 @@ native_irq_return_ldt:
 	 * --- bottom of ESPFIX stack ---
 	 */
 
-	pushq	%rdi				/* Stash user RDI */
-	SWAPGS					/* to kernel GS */
-	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi	/* to kernel CR3 */
-
-	movq	PER_CPU_VAR(espfix_waddr), %rdi
-	movq	%rax, (0*8)(%rdi)		/* user RAX */
-	movq	(1*8)(%rsp), %rax		/* user RIP */
-	movq	%rax, (1*8)(%rdi)
-	movq	(2*8)(%rsp), %rax		/* user CS */
-	movq	%rax, (2*8)(%rdi)
-	movq	(3*8)(%rsp), %rax		/* user RFLAGS */
-	movq	%rax, (3*8)(%rdi)
-	movq	(5*8)(%rsp), %rax		/* user SS */
-	movq	%rax, (5*8)(%rdi)
-	movq	(4*8)(%rsp), %rax		/* user RSP */
-	movq	%rax, (4*8)(%rdi)
-	/* Now RAX == RSP. */
-
-	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
+	/* save registers */
+	pushq	%rax
+	pushq	%rdi
+	pushq	%rsi
+	pushq	%rdx
+	pushq	%rcx
+	pushq	%r8
+	pushq	%r9
+	pushq	%r10
+	pushq	%r11
 
 	/*
-	 * espfix_stack[31:16] == 0.  The page tables are set up such that
-	 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
-	 * espfix_waddr for any X.  That is, there are 65536 RO aliases of
-	 * the same page.  Set up RSP so that RSP[31:16] contains the
-	 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
-	 * still points to an RO alias of the ESPFIX stack.
+	 * fill_espfix_stack will copy the iret+rax frame to the ESPFIX
+	 * stack and return with RAX containing a pointer to the ESPFIX
+	 * stack.
 	 */
-	orq	PER_CPU_VAR(espfix_stack), %rax
+	leaq	8*8(%rsp), %rdi		/* points to the iret+rax frame */
+	call	fill_espfix_stack
 
-	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
-	SWAPGS					/* to user GS */
-	popq	%rdi				/* Restore user RDI */
+	/*
+	 * RAX contains a pointer to the ESPFIX, so restore registers but
+	 * RAX. RAX will be restored from the ESPFIX stack.
+	 */
+	popq	%r11
+	popq	%r10
+	popq	%r9
+	popq	%r8
+	popq	%rcx
+	popq	%rdx
+	popq	%rsi
+	popq	%rdi
 
 	movq	%rax, %rsp
 	UNWIND_HINT_IRET_REGS offset=8
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4fe7af58cfe1..ff4b5160b39c 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -33,6 +33,7 @@
 #include <asm/pgalloc.h>
 #include <asm/setup.h>
 #include <asm/espfix.h>
+#include <asm/entry-common.h>
 
 /*
  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
@@ -205,3 +206,43 @@ void init_espfix_ap(int cpu)
 	per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page
 				      + (addr & ~PAGE_MASK);
 }
+
+/*
+ * iret frame with an additional user_rax register.
+ */
+struct iret_rax_frame {
+	unsigned long user_rax;
+	unsigned long rip;
+	unsigned long cs;
+	unsigned long rflags;
+	unsigned long rsp;
+	unsigned long ss;
+};
+
+noinstr unsigned long fill_espfix_stack(struct iret_rax_frame *frame)
+{
+	struct iret_rax_frame *espfix_frame;
+	unsigned long rsp;
+
+	native_swapgs();
+	user_pagetable_exit();
+
+	espfix_frame = (struct iret_rax_frame *)this_cpu_read(espfix_waddr);
+	*espfix_frame = *frame;
+
+	/*
+	 * espfix_stack[31:16] == 0.  The page tables are set up such that
+	 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
+	 * espfix_waddr for any X.  That is, there are 65536 RO aliases of
+	 * the same page.  Set up RSP so that RSP[31:16] contains the
+	 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
+	 * still points to an RO alias of the ESPFIX stack.
+	 */
+	rsp = ((unsigned long)espfix_frame) & 0xffff0000;
+	rsp |= this_cpu_read(espfix_stack);
+
+	user_pagetable_enter();
+	native_swapgs();
+
+	return rsp;
+}
-- 
2.18.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ