lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210831175025.27570-23-jiangshanlai@gmail.com>
Date:   Wed,  1 Sep 2021 01:50:23 +0800
From:   Lai Jiangshan <jiangshanlai@...il.com>
To:     linux-kernel@...r.kernel.org
Cc:     Lai Jiangshan <laijs@...ux.alibaba.com>,
        Andy Lutomirski <luto@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        x86@...nel.org, "H. Peter Anvin" <hpa@...or.com>,
        Joerg Roedel <jroedel@...e.de>,
        Youquan Song <youquan.song@...el.com>,
        Tony Luck <tony.luck@...el.com>
Subject: [PATCH 22/24] x86/entry: Implement and use do_paranoid_entry() and paranoid_exit()

From: Lai Jiangshan <laijs@...ux.alibaba.com>

All the facilities are set in traps.c, so we can implement the major body
of paranoid_entry() in C as do_paranoid_entry() and the whole
paranoid_exit() in C.

paranoid_entry() needs to save two values which are added into the
struct ist_regs.  And paranoid_exit() use them after the interrupt
is handled.

No functional change intended.

Signed-off-by: Lai Jiangshan <laijs@...ux.alibaba.com>
---
 arch/x86/entry/entry_64.S    | 128 +++--------------------------------
 arch/x86/entry/traps.c       |  62 +++++++++++++++++
 arch/x86/include/asm/traps.h |  22 ++++++
 3 files changed, 92 insertions(+), 120 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1ae10ca351f4..8b2e19e6c9e1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -437,6 +437,7 @@ SYM_CODE_START(\asmsym)
 
 	call	\cfunc
 
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -516,6 +517,7 @@ SYM_CODE_START(\asmsym)
 	 * identical to the stack in the IRET frame or the VC fall-back stack,
 	 * so it is definitely mapped even with PTI enabled.
 	 */
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -548,6 +550,7 @@ SYM_CODE_START(\asmsym)
 	movq	$-1, ORIG_RAX(%rdi)	/* no syscall to restart */
 	call	\cfunc
 
+	movq	%rsp, %rdi		/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp	/* put %rsp back to pt_regs */
 	jmp	restore_regs_and_return_to_kernel
@@ -840,14 +843,8 @@ SYM_CODE_END(xen_failsafe_callback)
 #endif /* CONFIG_XEN_PV */
 
 /*
- * Save all registers in pt_regs. Return GSBASE related information
- * in EBX depending on the availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        GSBASE value at entry, must be restored in paranoid_exit
+ * Save all registers and addtional info in ist_regs.
+ * Switch CR3 and gsbase if needed.
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
 	UNWIND_HINT_FUNC
@@ -856,124 +853,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 	movq	RDI(%rsp), %rsi	/* temporarily store the return address in %rsi */
 	movq	%rdi, RDI(%rsp) /* put %rdi onto pt_regs */
 	subq	$IST_pt_regs, %rsp /* reserve room for ist_regs */
+	movq	%rsp, %rdi	/* ist_regs pointer */
 	pushq	%rsi		/* put the return address onto the stack */
 	ENCODE_FRAME_POINTER 8+IST_pt_regs
 
-	/*
-	 * Always stash CR3 in %r14.  This value will be restored,
-	 * verbatim, at exit.  Needed if paranoid_entry interrupted
-	 * another entry that already switched to the user CR3 value
-	 * but has not yet returned to userspace.
-	 *
-	 * This is also why CS (stashed in the "iret frame" by the
-	 * hardware at entry) can not be used: this may be a return
-	 * to kernel code, but with a user CR3 value.
-	 *
-	 * Switching CR3 does not depend on kernel GSBASE so it can
-	 * be done before switching to the kernel GSBASE. This is
-	 * required for FSGSBASE because the kernel GSBASE has to
-	 * be retrieved from a kernel internal table.
-	 */
-	SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
-
-	/*
-	 * Handling GSBASE depends on the availability of FSGSBASE.
-	 *
-	 * Without FSGSBASE the kernel enforces that negative GSBASE
-	 * values indicate kernel GSBASE. With FSGSBASE no assumptions
-	 * can be made about the GSBASE value when entering from user
-	 * space.
-	 */
-	ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/*
-	 * Read the current GSBASE and store it in %rbx unconditionally,
-	 * retrieve and set the current CPUs kernel GSBASE. The stored value
-	 * has to be restored in paranoid_exit unconditionally.
-	 *
-	 * The unconditional write to GS base below ensures that no subsequent
-	 * loads based on a mispredicted GS base can happen, therefore no LFENCE
-	 * is needed here.
-	 */
-	SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
-	ret
-
-.Lparanoid_entry_checkgs:
-	/* EBX = 1 -> kernel GSBASE active, no restore required */
-	movl	$1, %ebx
-	/*
-	 * The kernel-enforced convention is a negative GSBASE indicates
-	 * a kernel value. No SWAPGS needed on entry and exit.
-	 */
-	movl	$MSR_GS_BASE, %ecx
-	rdmsr
-	testl	%edx, %edx
-	jns	.Lparanoid_entry_swapgs
-	ret
-
-.Lparanoid_entry_swapgs:
-	swapgs
-
-	/*
-	 * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
-	 * unconditional CR3 write, even in the PTI case.  So do an lfence
-	 * to prevent GS speculation, regardless of whether PTI is enabled.
-	 */
-	FENCE_SWAPGS_KERNEL_ENTRY
-
-	/* EBX = 0 -> SWAPGS required on exit */
-	xorl	%ebx, %ebx
+	call	do_paranoid_entry
 	ret
 SYM_CODE_END(paranoid_entry)
 
-/*
- * "Paranoid" exit path from exception stack.  This is invoked
- * only on return from IST interrupts that came from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, there's no good reason to try
- * to handle preemption here.
- *
- * R/EBX contains the GSBASE related information depending on the
- * availability of the FSGSBASE instructions:
- *
- * FSGSBASE	R/EBX
- *     N        0 -> SWAPGS on exit
- *              1 -> no SWAPGS on exit
- *
- *     Y        User space GSBASE, must be restored unconditionally
- */
-SYM_CODE_START_LOCAL(paranoid_exit)
-	UNWIND_HINT_REGS offset=8
-	/*
-	 * The order of operations is important. RESTORE_CR3 requires
-	 * kernel GSBASE.
-	 *
-	 * NB to anyone to try to optimize this code: this code does
-	 * not execute at all for exceptions from user mode. Those
-	 * exceptions go through error_exit instead.
-	 */
-	RESTORE_CR3	scratch_reg=%rax save_reg=%r14
-
-	/* Handle the three GSBASE cases */
-	ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
-
-	/* With FSGSBASE enabled, unconditionally restore GSBASE */
-	wrgsbase	%rbx
-	ret
-
-.Lparanoid_exit_checkgs:
-	/* On non-FSGSBASE systems, conditionally do SWAPGS */
-	testl		%ebx, %ebx
-	jnz		.Lparanoid_exit_done
-
-	/* We are returning to a context with user GSBASE */
-	swapgs
-.Lparanoid_exit_done:
-	ret
-SYM_CODE_END(paranoid_exit)
-
 /*
  * Save all registers in pt_regs, and switch GS if needed.
  */
@@ -1308,6 +1195,7 @@ end_repeat_nmi:
 	 * Use paranoid_exit to handle SWAPGS and CR3, but no need to use
 	 * restore_regs_and_return_to_kernel as we must handle nested NMI.
 	 */
+	movq	%rsp, %rdi			/* ist_regs pointer */
 	call	paranoid_exit
 	addq	$IST_pt_regs, %rsp		/* put %rsp back to pt_regs */
 
diff --git a/arch/x86/entry/traps.c b/arch/x86/entry/traps.c
index b5c92b4e0cb5..52511db6baa6 100644
--- a/arch/x86/entry/traps.c
+++ b/arch/x86/entry/traps.c
@@ -1029,6 +1029,68 @@ static __always_inline unsigned long ist_switch_to_kernel_gsbase(void)
 	/* SWAPGS required on exit */
 	return 0;
 }
+
+asmlinkage __visible __entry_text
+void do_paranoid_entry(struct ist_regs *ist)
+{
+	/*
+	 * Always stash CR3 in ist->cr3.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 *
+	 * This is also why CS (stashed in the "iret frame" by the
+	 * hardware at entry) can not be used: this may be a return
+	 * to kernel code, but with a user CR3 value.
+	 *
+	 * Switching CR3 does not depend on kernel GSBASE so it can
+	 * be done before switching to the kernel GSBASE. This is
+	 * required for FSGSBASE because the kernel GSBASE has to
+	 * be retrieved from a kernel internal table.
+	 */
+	ist->cr3 = ist_switch_to_kernel_cr3();
+
+	/* Handle GSBASE, store the return value in ist_regs for exit. */
+	ist->gsbase = ist_switch_to_kernel_gsbase();
+}
+
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from IST interrupts that came from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, there's no good reason to try
+ * to handle preemption here.
+ */
+asmlinkage __visible __entry_text
+void paranoid_exit(struct ist_regs *ist)
+{
+	/*
+	 * Restore cr3 at first, it can use kernel GSBASE.
+	 */
+	ist_restore_cr3(ist->cr3);
+
+	/*
+	 * Handle the three GSBASE cases.
+	 *
+	 * ist->gsbase contains the GSBASE related information depending
+	 * on the availability of the FSGSBASE instructions:
+	 *
+	 * FSGSBASE	ist->gsbase
+	 *     N        0 -> SWAPGS on exit
+	 *              1 -> no SWAPGS on exit
+	 *
+	 *     Y        User space GSBASE, must be restored unconditionally
+	 */
+	if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+		wrgsbase(ist->gsbase);
+		return;
+	}
+
+	if (!ist->gsbase)
+		native_swapgs();
+}
 #endif
 
 static bool is_sysenter_singlestep(struct pt_regs *regs)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index e24c63bbc30a..0bc7117a01cd 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -12,6 +12,26 @@
 
 #ifdef CONFIG_X86_64
 struct ist_regs {
+	/*
+	 * Always stash CR3 in cr3.  This value will be restored,
+	 * verbatim, at exit.  Needed if paranoid_entry interrupted
+	 * another entry that already switched to the user CR3 value
+	 * but has not yet returned to userspace.
+	 */
+	unsigned long cr3;
+
+	/*
+	 * gsbase contains the GSBASE related information depending on the
+	 * availability of the FSGSBASE instructions:
+	 *
+	 * FSGSBASE	gsbase
+	 *     N        0 -> SWAPGS on exit
+	 *              1 -> no SWAPGS on exit
+	 *
+	 *     Y        User space GSBASE, must be restored unconditionally
+	 */
+	unsigned long gsbase;
+
 	/*
 	 * ist specific fields must be defined before pt_regs
 	 * and they are located below pt_regs on the stacks.
@@ -20,6 +40,8 @@ struct ist_regs {
 };
 
 asmlinkage __visible notrace struct pt_regs *do_error_entry(struct pt_regs *eregs);
+asmlinkage __visible notrace void do_paranoid_entry(struct ist_regs *ist);
+asmlinkage __visible notrace void paranoid_exit(struct ist_regs *ist);
 void __init trap_init(void);
 asmlinkage __visible noinstr struct ist_regs *vc_switch_off_ist(struct ist_regs *ist);
 #endif
-- 
2.19.1.6.gb485710b

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ