lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1428681033-1549-9-git-send-email-andi@firstfloor.org>
Date:	Fri, 10 Apr 2015 08:50:33 -0700
From:	Andi Kleen <andi@...stfloor.org>
To:	x86@...nel.org
Cc:	luto@...nel.org, linux-kernel@...r.kernel.org,
	Andi Kleen <ak@...ux.intel.com>
Subject: [PATCH 8/8] x86: Use rd/wr fs/gs base in arch_prctl

From: Andi Kleen <ak@...ux.intel.com>

Convert arch_prctl to use the new instructions to
change fs/gs if available, instead of using MSRs.

This is merely a small performance optimization,
no new functionality.

With the new instructions the syscall is really obsolete,
as everything can be set directly in ring 3. But the syscall
is widely used by existing software, so we still support it.

The syscall still enforces that the addresses are not
in kernel space, even though that is not needed more.
This is mainly so that the programs written for new CPUs
do not suddenly fail on old CPUs.

With the new instructions available it prefers to use
them in the context switch, instead of using the old
"use GDT segment rewrite" trick.

v2: Make kprobes safe
Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 arch/x86/kernel/process_64.c | 48 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3019c51..1fe4d79 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -534,20 +534,38 @@ unsigned long get_wchan(struct task_struct *p)
 	return 0;
 }
 
+static noinline __kprobes void reload_user_gs(unsigned long addr)
+{
+	local_irq_disable();
+	swapgs();
+	loadsegment(gs, 0);
+	wrgsbase(addr);
+	swapgs();
+	local_irq_enable();
+}
+
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 {
 	int ret = 0;
 	int doit = task == current;
 	int cpu;
+	int fast_seg = boot_cpu_has(X86_FEATURE_FSGSBASE);
 
 	switch (code) {
 	case ARCH_SET_GS:
+		/*
+		 * With fast_seg we don't need that check anymore,
+		 * but keep it so that programs do not suddenly
+		 * start failing when run on older CPUs.
+		 * If you really want to set a address in kernel space
+		 * use WRGSBASE directly.
+		 */
 		if (addr >= TASK_SIZE_OF(task))
 			return -EPERM;
 		cpu = get_cpu();
 		/* handle small bases via the GDT because that's faster to
 		   switch. */
-		if (addr <= 0xffffffff) {
+		if (addr <= 0xffffffff && !fast_seg) {
 			set_32bit_tls(task, GS_TLS, addr);
 			if (doit) {
 				load_TLS(&task->thread, cpu);
@@ -559,8 +577,12 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			task->thread.gsindex = 0;
 			task->thread.gs = addr;
 			if (doit) {
-				load_gs_index(0);
-				ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+				if (fast_seg) {
+					reload_user_gs(addr);
+				} else {
+					load_gs_index(0);
+					ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+				}
 			}
 		}
 		put_cpu();
@@ -573,7 +595,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		cpu = get_cpu();
 		/* handle small bases via the GDT because that's faster to
 		   switch. */
-		if (addr <= 0xffffffff) {
+		if (addr <= 0xffffffff && !fast_seg) {
 			set_32bit_tls(task, FS_TLS, addr);
 			if (doit) {
 				load_TLS(&task->thread, cpu);
@@ -588,7 +610,10 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 				/* set the selector to 0 to not confuse
 				   __switch_to */
 				loadsegment(fs, 0);
-				ret = wrmsrl_safe(MSR_FS_BASE, addr);
+				if (fast_seg)
+					wrfsbase(addr);
+				else
+					ret = wrmsrl_safe(MSR_FS_BASE, addr);
 			}
 		}
 		put_cpu();
@@ -597,6 +622,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		unsigned long base;
 		if (task->thread.fsindex == FS_TLS_SEL)
 			base = read_32bit_tls(task, FS_TLS);
+		else if (doit && fast_seg)
+			base = rdfsbase();
 		else if (doit)
 			rdmsrl(MSR_FS_BASE, base);
 		else
@@ -611,9 +638,14 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 			base = read_32bit_tls(task, GS_TLS);
 		else if (doit) {
 			savesegment(gs, gsindex);
-			if (gsindex)
-				rdmsrl(MSR_KERNEL_GS_BASE, base);
-			else
+			if (gsindex) {
+				if (fast_seg) {
+					local_irq_disable();
+					base = read_user_gs();
+					local_irq_enable();
+				} else
+					rdmsrl(MSR_KERNEL_GS_BASE, base);
+			} else
 				base = task->thread.gs;
 		} else
 			base = task->thread.gs;
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ