lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230821112723.3995187-5-andrew.cooper3@citrix.com>
Date:   Mon, 21 Aug 2023 12:27:23 +0100
From:   Andrew Cooper <andrew.cooper3@...rix.com>
To:     LKML <linux-kernel@...r.kernel.org>
CC:     Andrew Cooper <andrew.cooper3@...rix.com>, <x86@...nel.org>,
        "Borislav Petkov" <bp@...en8.de>,
        Peter Zijlstra <peterz@...radead.org>,
        Josh Poimboeuf <jpoimboe@...nel.org>,
        Babu Moger <babu.moger@....com>, <David.Kaplan@....com>,
        Nikolay Borisov <nik.borisov@...e.com>,
        <gregkh@...uxfoundation.org>, Thomas Gleixner <tglx@...utronix.de>
Subject: [PATCH RFC 4/4] x86/srso: Use CALL-based return thunks to reduce overhead

The SRSO safety depends on having a CALL to an {ADD,LEA}/RET sequence which
has been made safe in the BTB.  Specifically, there needs to be no pertubance
to the RAS between a correctly predicted CALL and the subsequent RET.

Use the new infrastructure to CALL to a return thunk.  Remove
srso_fam1?_safe_ret() symbols and point srso_fam1?_return_thunk().

This removes one taken branch from every function return, which will reduce
the overhead of the mitigation.  It also removes one of three moving pieces
from the SRSO mess.

Signed-off-by: Andrew Cooper <andrew.cooper3@...rix.com>
---
CC: x86@...nel.org
CC: linux-kernel@...r.kernel.org
CC: Borislav Petkov <bp@...en8.de>
CC: Peter Zijlstra <peterz@...radead.org>
CC: Josh Poimboeuf <jpoimboe@...nel.org>
CC: Babu Moger <babu.moger@....com>
CC: David.Kaplan@....com
CC: Nikolay Borisov <nik.borisov@...e.com>
CC: gregkh@...uxfoundation.org
CC: Thomas Gleixner <tglx@...utronix.de>

RFC:

  vmlinux.o: warning: objtool: srso_fam17_return_thunk(): can't find starting instruction

Any objtool whisperers know what's going on, and particularly why
srso_fam19_return_thunk() appears to be happy?

Also, depends on the resolution of the RFC in the previous patch.
---
 arch/x86/kernel/cpu/bugs.c    |  4 ++-
 arch/x86/kernel/vmlinux.lds.S |  6 ++---
 arch/x86/lib/retpoline.S      | 47 ++++++++++++++---------------------
 3 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index de2f84aa526f..c4d580b485a7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -2458,8 +2458,10 @@ static void __init srso_select_mitigation(void)
 		if (IS_ENABLED(CONFIG_CPU_SRSO)) {
 			/*
 			 * Enable the return thunk for generated code
-			 * like ftrace, static_call, etc.
+			 * like ftrace, static_call, etc.  These
+			 * ret-thunks need to call to their target.
 			 */
+			x86_return_thunk_use_call = true;
 			setup_force_cpu_cap(X86_FEATURE_RETHUNK);
 			setup_force_cpu_cap(X86_FEATURE_UNRET);
 
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 127ccdbf6d95..ed7d4020c2b4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -522,7 +522,7 @@ INIT_PER_CPU(irq_stack_backing_store);
 
 #ifdef CONFIG_RETHUNK
 . = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
-. = ASSERT((srso_fam17_safe_ret & 0x3f) == 0, "srso_fam17_safe_ret not cacheline-aligned");
+. = ASSERT((srso_fam17_return_thunk & 0x3f) == 0, "srso_fam17_return_thunk not cacheline-aligned");
 #endif
 
 #ifdef CONFIG_CPU_SRSO
@@ -536,8 +536,8 @@ INIT_PER_CPU(irq_stack_backing_store);
  * Instead do: (A | B) - (A & B) in order to compute the XOR
  * of the two function addresses:
  */
-. = ASSERT(((ABSOLUTE(srso_fam19_untrain_ret) | srso_fam19_safe_ret) -
-		(ABSOLUTE(srso_fam19_untrain_ret) & srso_fam19_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+. = ASSERT(((ABSOLUTE(srso_fam19_untrain_ret) | srso_fam19_return_thunk) -
+		(ABSOLUTE(srso_fam19_untrain_ret) & srso_fam19_return_thunk)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
 		"SRSO function pair won't alias");
 #endif
 
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index d8732ae21122..2b1c92632158 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -133,11 +133,11 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
 #ifdef CONFIG_RETHUNK
 
 /*
- * srso_fam19_untrain_ret() and srso_fam19_safe_ret() are placed at
+ * srso_fam19_untrain_ret() and srso_fam19_return_thunk() are placed at
  * special addresses:
  *
  * - srso_fam19_untrain_ret() is 2M aligned
- * - srso_fam19_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * - srso_fam19_return_thunk() is also in the same 2M page but bits 2, 8, 14
  * and 20 in its virtual address are set (while those bits in the
  * srso_fam19_untrain_ret() function are cleared).
  *
@@ -145,7 +145,7 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
  * target buffer of Zen3/4 generations, leading to any potential
  * poisoned entries at that BTB slot to get evicted.
  *
- * As a result, srso_fam19_safe_ret() becomes a safe return.
+ * As a result, srso_fam19_return_thunk() becomes a safe return.
  */
 #ifdef CONFIG_CPU_SRSO
 	.section .text..__x86.rethunk_untrain
@@ -155,7 +155,8 @@ SYM_START(srso_fam19_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	ANNOTATE_NOENDBR
 	ASM_NOP2
 	lfence
-	jmp srso_fam19_return_thunk
+	call srso_fam19_return_thunk
+	ud2
 SYM_FUNC_END(srso_fam19_untrain_ret)
 __EXPORT_THUNK(srso_fam19_untrain_ret)
 
@@ -169,23 +170,17 @@ SYM_START(srso_fam19_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 SYM_FUNC_END(srso_fam19_untrain_ret)
 #endif
 
-SYM_START(srso_fam19_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
-	lea 8(%_ASM_SP), %_ASM_SP
+SYM_START(srso_fam19_return_thunk, SYM_L_GLOBAL, SYM_A_NONE)
 	UNWIND_HINT_FUNC
+	ANNOTATE_NOENDBR
+	lea 8(%_ASM_SP), %_ASM_SP
 	ANNOTATE_UNRET_SAFE
 	ret
 	int3
-SYM_FUNC_END(srso_fam19_safe_ret)
+SYM_FUNC_END(srso_fam19_return_thunk)
 
 	.section .text..__x86.return_thunk
 
-SYM_CODE_START(srso_fam19_return_thunk)
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
-	call srso_fam19_safe_ret
-	ud2
-SYM_CODE_END(srso_fam19_return_thunk)
-
 /*
  * Some generic notes on the untraining sequences:
  *
@@ -194,13 +189,13 @@ SYM_CODE_END(srso_fam19_return_thunk)
  *
  * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
  * Retbleed sequence because the return sequence done there
- * (srso_fam17_safe_ret()) is longer and the return sequence must fully nest
+ * (srso_fam17_return_thunk()) is longer and the return sequence must fully nest
  * (end before) the untraining sequence. Therefore, the untraining
  * sequence must fully overlap the return sequence.
  *
  * Regarding alignment - the instructions which need to be untrained,
  * must all start at a cacheline boundary for Zen1/2 generations. That
- * is, instruction sequences starting at srso_fam17_safe_ret() and
+ * is, instruction sequences starting at srso_fam17_return_thunk() and
  * the respective instruction sequences at retbleed_return_thunk()
  * must start at a cacheline boundary.
  */
@@ -272,12 +267,12 @@ __EXPORT_THUNK(retbleed_untrain_ret)
  *
  * movabs $0xccccc30824648d48,%rax
  *
- * and when the return thunk executes the inner label srso_fam17_safe_ret()
+ * and when the return thunk executes the inner label srso_fam17_return_thunk()
  * later, it is a stack manipulation and a RET which is mispredicted and
  * thus a "safe" one to use.
  */
 	.align 64
-	.skip 64 - (srso_fam17_safe_ret - srso_fam17_untrain_ret), 0xcc
+	.skip 64 - (srso_fam17_return_thunk - srso_fam17_untrain_ret), 0xcc
 SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
 	ANNOTATE_NOENDBR
 	.byte 0x48, 0xb8
@@ -288,26 +283,22 @@ SYM_START(srso_fam17_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
  * and execution will continue at the return site read from the top of
  * the stack.
  */
-SYM_INNER_LABEL(srso_fam17_safe_ret, SYM_L_GLOBAL)
+SYM_INNER_LABEL(srso_fam17_return_thunk, SYM_L_GLOBAL)
+	UNWIND_HINT_FUNC
+	ANNOTATE_NOENDBR
 	lea 8(%_ASM_SP), %_ASM_SP
+	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 	int3
 	/* end of movabs */
 	lfence
-	call srso_fam17_safe_ret
+	call srso_fam17_return_thunk
 	ud2
-SYM_CODE_END(srso_fam17_safe_ret)
+SYM_CODE_END(srso_fam17_return_thunk)
 SYM_FUNC_END(srso_fam17_untrain_ret)
 __EXPORT_THUNK(srso_fam17_untrain_ret)
 
-SYM_CODE_START(srso_fam17_return_thunk)
-	UNWIND_HINT_FUNC
-	ANNOTATE_NOENDBR
-	call srso_fam17_safe_ret
-	ud2
-SYM_CODE_END(srso_fam17_return_thunk)
-
 SYM_FUNC_START(entry_untrain_ret)
 	ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
 		      "jmp srso_fam17_untrain_ret", X86_FEATURE_SRSO, \
-- 
2.30.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ