linux-kernel - [PATCH] x86/retpoline: Avoid return buffer underflows on context switch

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-Id: <20180108201531.6782-1-andi@firstfloor.org>
Date:   Mon,  8 Jan 2018 12:15:31 -0800
From:   Andi Kleen <andi@...stfloor.org>
To:     dwmw@...zon.co.uk
Cc:     pjt@...gle.com, linux-kernel@...r.kernel.org,
        gregkh@...ux-foundation.org, tim.c.chen@...ux.intel.com,
        dave.hansen@...el.com, tglx@...utronix.de, luto@...capital.net,
        Andi Kleen <ak@...ux.intel.com>
Subject: [PATCH] x86/retpoline: Avoid return buffer underflows on context switch

From: Andi Kleen <ak@...ux.intel.com>

[This is on top of David's retpoline branch, as of 08-01 this morning]

This patch further hardens retpoline

CPUs have return buffers which store the return address for
RET to predict function returns. Some CPUs (Skylake, some Broadwells)
can fall back to indirect branch prediction on return buffer underflow.

With retpoline we want to avoid uncontrolled indirect branches,
which could be poisoned by ring 3, so we need to avoid uncontrolled
return buffer underflows in the kernel.

This can happen when we're context switching from a shallower to a
deeper kernel stack.  The deeper kernel stack would eventually underflow
the return buffer, which again would fall back to the indirect branch predictor.

To guard against this fill the return buffer with controlled
content during context switch. This prevents any underflows.

We always fill the buffer with 30 entries: 32 minus 2 for at
least one call from entry_{64,32}.S to C code and another into
the function doing the filling.

That's pessimistic because we likely did more controlled kernel calls.
So in principle we could do less.  However it's hard to maintain such an
invariant, and it may be broken with more aggressive compilers.
So err on the side of safety and always fill 30.

Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 arch/x86/entry/entry_32.S            | 15 +++++++++++++++
 arch/x86/entry/entry_64.S            | 15 +++++++++++++++
 arch/x86/include/asm/nospec-branch.h | 29 +++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index cf9ef33d299b..5404a9b2197c 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -250,6 +250,21 @@ ENTRY(__switch_to_asm)
 	popl	%ebx
 	popl	%ebp
 
+	/*
+	 * When we switch from a shallower to a deeper call stack
+	 * the call stack will underflow in the kernel in the next task.
+	 * This could cause the CPU to fall back to indirect branch
+	 * prediction, which may be poisoned.
+	 *
+	 * To guard against that always fill the return stack with
+	 * known values.
+	 *
+	 * We do this in assembler because it needs to be before
+	 * any calls on the new stack, and this can be difficult to
+	 * ensure in a complex C function like __switch_to.
+	 */
+	ALTERNATIVE "jmp __switch_to", "", X86_FEATURE_RETPOLINE
+	FILL_RETURN_BUFFER
 	jmp	__switch_to
 END(__switch_to_asm)
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9bce6ed03353..0f28d0ea57e8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -495,6 +495,21 @@ ENTRY(__switch_to_asm)
 	popq	%rbx
 	popq	%rbp
 
+	/*
+	 * When we switch from a shallower to a deeper call stack
+	 * the call stack will underflow in the kernel in the next task.
+	 * This could cause the CPU to fall back to indirect branch
+	 * prediction, which may be poisoned.
+	 *
+	 * To guard against that always fill the return stack with
+	 * known values.
+	 *
+	 * We do this in assembler because it needs to be before
+	 * any calls on the new stack, and this can be difficult to
+	 * ensure in a complex C function like __switch_to.
+	 */
+	ALTERNATIVE "jmp __switch_to", "", X86_FEATURE_RETPOLINE
+	FILL_RETURN_BUFFER
 	jmp	__switch_to
 END(__switch_to_asm)
 
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index b8c8eeacb4be..e84e231248c2 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -53,6 +53,35 @@
 #endif
 .endm
 
+/*
+ * We use 32-N: 32 is the max return buffer size,
+ * but there should have been at a minimum two
+ * controlled calls already: one into the kernel
+ * from entry*.S and another into the function
+ * containing this macro. So N=2, thus 30.
+ */
+#define NUM_BRANCHES_TO_FILL	30
+
+/*
+ * Fill the CPU return branch buffer to prevent
+ * indirect branch prediction on underflow.
+ * Caller should check for X86_FEATURE_SMEP and X86_FEATURE_RETPOLINE
+ */
+.macro FILL_RETURN_BUFFER
+#ifdef CONFIG_RETPOLINE
+	.rept	NUM_BRANCHES_TO_FILL
+	call	1221f
+	pause	/* stop speculation */
+1221:
+	.endr
+#ifdef CONFIG_64BIT
+	addq	$8*NUM_BRANCHES_TO_FILL, %rsp
+#else
+	addl    $4*NUM_BRANCHES_TO_FILL, %esp
+#endif
+#endif
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #if defined(CONFIG_X86_64) && defined(RETPOLINE)
-- 
2.14.3