lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon,  8 Jan 2018 17:12:19 +0100
From:   Willy Tarreau <w@....eu>
To:     linux-kernel@...r.kernel.org, x86@...nel.org
Cc:     tglx@...utronix.de, gnomes@...rguk.ukuu.org.uk,
        torvalds@...ux-foundation.org, Willy Tarreau <w@....eu>
Subject: [PATCH RFC 4/4] x86/entry/pti: don't switch PGD on tasks holding flag TIF_NOPTI

If a task has the TIF_NOPTI flag set, it doesn't want to experience
page table isolation. In this case, returns from kernel to user will
not switch the CR3, leaving it to the kernel one which already maps
both user and kernel pages. Upon entry in the kernel, we can't check
this flag so we simply check if CR3 was pointing to the kernel's PGD,
indicating an earlier absence of switch, and in this case we don't
change it.

Thanks to these changes, haproxy running under KVM went back from
12400 conn/s to 21000 once loaded after calling prctl().

Signed-off-by: Willy Tarreau <w@....eu>
---
 arch/x86/entry/calling.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 45a63e0..054b8b7 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/jump_label.h>
+#include <asm/thread_info.h>
 #include <asm/unwind_hints.h>
 #include <asm/cpufeatures.h>
 #include <asm/page_types.h>
@@ -214,6 +215,11 @@
 .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
 	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 	mov	%cr3, \scratch_reg
+
+	/* if we're already on the kernel PGD, we don't switch */
+	testq $(PTI_SWITCH_PGTABLES_MASK), \scratch_reg
+	jz .Lend_\@
+
 	ADJUST_KERNEL_CR3 \scratch_reg
 	mov	\scratch_reg, %cr3
 .Lend_\@:
@@ -224,6 +230,12 @@
 
 .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
 	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+
+	/* "NOPTI" taskflag avoids the switch */
+	movq	PER_CPU_VAR(current_task), \scratch_reg
+	btq	$TIF_NOPTI, TASK_TI_flags(\scratch_reg)
+	jc	.Lend_\@
+
 	mov	%cr3, \scratch_reg
 
 	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
@@ -262,6 +274,13 @@
 	ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
 	movq	%cr3, \scratch_reg
 	movq	\scratch_reg, \save_reg
+
+	/* if we're already on the kernel PGD, we don't switch,
+	 * we just save the current cr3.
+	 */
+	testq $(PTI_SWITCH_PGTABLES_MASK), \scratch_reg
+	jz .Ldone_\@
+
 	/*
 	 * Is the "switch mask" all zero?  That means that both of
 	 * these are zero:
@@ -284,6 +303,10 @@
 .macro RESTORE_CR3 scratch_reg:req save_reg:req
 	ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
 
+	/* if we saved a kernel context, we didn't switch so we don't switch */
+	testq $(PTI_SWITCH_PGTABLES_MASK), \save_reg
+	jz .Lend_\@
+
 	ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
 
 	/*
-- 
1.7.12.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ