lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Fri,  5 Aug 2011 23:01:21 -0400
From:	Andy Lutomirski <luto@....EDU>
To:	"H. Peter Anvin\"" <hpa@...or.com>
Cc:	Andi Kleen <andi@...stfloor.org>, x86@...nel.org,
	linux-kernel@...r.kernel.org, torvalds@...ux-foundation.org,
	lueckintel@...oo.com, kimwooyoung@...il.com,
	Suresh Siddha <suresh.b.siddha@...el.com>,
	Andy Lutomirski <luto@....edu>
Subject: [RFC] x86-64: Allow emulated vsyscalls from user addresses

A few dynamic recompilation tools are too clever for their own good.
They trace control flow through the vsyscall page and recompile that
code somewhere else.  Then they expect it to work.  DynamoRIO
(http://dynamorio.org/) and Pin (http://www.pintool.org/) are
affected.  They crash when tracing programs that use vsyscalls.
Valgrind is smart enough not to cause problems.  It crashes on the
getcpu vsyscall, but that has nothing to do with emulation.

This patch makes each of the three vsyscall entries use a different
vector so that they can work when relocated.  It assumes that the
code that relocates them is okay with the int instruction acting
like ret.  DynamoRIO at least appears to work.

We print an obnoxious (rate-limited) message to the log when this
happens.  Hopefully it will inspire the JIT tools to learn not to
trace into kernel address space.

Signed-off-by: Andy Lutomirski <luto@....edu>
---

This uses vectors 0x40, 0x41, and 0x42 for now.  They are REX
prefixes in 64-bit code, and jumping to the second byte of one
of these instructions will turn into 'rex.? int3', which will
trap.

 arch/x86/kernel/vsyscall_64.c     |   75 ++++++++++--------------------------
 arch/x86/kernel/vsyscall_emu_64.S |    6 +-
 2 files changed, 24 insertions(+), 57 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index f785f5b..a33ad02 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -105,22 +105,8 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
 	       regs->sp, regs->ax, regs->si, regs->di);
 }
 
-static int addr_to_vsyscall_nr(unsigned long addr)
-{
-	int nr;
-
-	if ((addr & ~0xC00UL) != VSYSCALL_START)
-		return -EINVAL;
-
-	nr = (addr & 0xC00UL) >> 10;
-	if (nr >= 3)
-		return -EINVAL;
-
-	return nr;
-}
-
-void emulate_vsyscall(struct pt_regs *regs, int nr,
-		      long (*vsys)(struct pt_regs *))
+static void emulate_vsyscall(struct pt_regs *regs, int nr,
+			     long (*vsys)(struct pt_regs *))
 {
 	struct task_struct *tsk;
 	unsigned long caller;
@@ -128,6 +114,8 @@ void emulate_vsyscall(struct pt_regs *regs, int nr,
 
 	local_irq_enable();
 
+	trace_emulate_vsyscall(nr);
+
 	if (!user_64bit_mode(regs)) {
 		/*
 		 * If we trapped from kernel mode, we might as well OOPS now
@@ -138,50 +126,29 @@ void emulate_vsyscall(struct pt_regs *regs, int nr,
 
 		/* Compat mode and non-compat 32-bit CS should both segfault. */
 		warn_bad_vsyscall(KERN_WARNING, regs,
-				  "illegal int 0xcc from 32-bit mode");
+				  "illegal emulated vsyscall from 32-bit mode");
 		goto sigsegv;
 	}
 
-	/*
-	 * x86-ism here: regs->ip points to the instruction after the int 0xcc,
-	 * and int 0xcc is two bytes long.
-	 */
-	vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
-
-	trace_emulate_vsyscall(vsyscall_nr);
-
-	if (vsyscall_nr < 0) {
-		warn_bad_vsyscall(KERN_WARNING, regs,
-				  "illegal int 0xcc (exploit attempt?)");
-		goto sigsegv;
-	}
+	tsk = current;
+	if (seccomp_mode(&tsk->seccomp))
+		do_exit(SIGKILL);
 
 	if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
 		warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
 		goto sigsegv;
 	}
 
-	tsk = current;
-	if (seccomp_mode(&tsk->seccomp))
-		do_exit(SIGKILL);
+	/*
+	 * x86-ism here: regs->ip points to the instruction after the int 0xcc,
+	 * and int 0xcc is two bytes long.
+	 */
+	if (((regs->ip - 2) & ~0xfff) != VSYSCALL_START)
+		warn_bad_vsyscall(KERN_WARNING, regs,
+				  "emulated vsyscall from bogus address -- "
+				  "fix your code");
 
-	switch (vsyscall_nr) {
-	case 0:
-		ret = sys_gettimeofday(
-			(struct timeval __user *)regs->di,
-			(struct timezone __user *)regs->si);
-		break;
-
-	case 1:
-		ret = sys_time((time_t __user *)regs->di);
-		break;
-
-	case 2:
-		ret = sys_getcpu((unsigned __user *)regs->di,
-				 (unsigned __user *)regs->si,
-				 0);
-		break;
-	}
+	ret = vsys(regs);
 
 	if (ret == -EFAULT) {
 		/*
@@ -223,9 +190,9 @@ static long vsys_gettimeofday(struct pt_regs *regs)
 		(struct timezone __user *)regs->si);
 }
 
-void dotraplinkage emulate_vsyscall0(struct pt_regs *regs, long error_code)
+void dotraplinkage do_emulate_vsyscall0(struct pt_regs *regs, long error_code)
 {
-	emulate_vsyscall(regs, vsys_gettimeofday);
+	emulate_vsyscall(regs, 0, vsys_gettimeofday);
 }
 
 static long vsys_time(struct pt_regs *regs)
@@ -233,7 +200,7 @@ static long vsys_time(struct pt_regs *regs)
 	return sys_time((time_t __user *)regs->di);
 }
 
-void dotraplinkage emulate_vsyscall1(struct pt_regs *regs, long error_code)
+void dotraplinkage do_emulate_vsyscall1(struct pt_regs *regs, long error_code)
 {
 	emulate_vsyscall(regs, 1, vsys_time);
 }
@@ -245,7 +212,7 @@ static long vsys_getcpu(struct pt_regs *regs)
 			  0);
 }
 
-void dotraplinkage emulate_vsyscall2(struct pt_regs *regs, long error_code)
+void dotraplinkage do_emulate_vsyscall2(struct pt_regs *regs, long error_code)
 {
 	emulate_vsyscall(regs, 2, vsys_getcpu);
 }
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845e..a4f02a3 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -13,15 +13,15 @@
 
 .section .vsyscall_0, "a"
 ENTRY(vsyscall_0)
-	int $VSYSCALL_EMU_VECTOR
+	int $VSYSCALL0_EMU_VECTOR
 END(vsyscall_0)
 
 .section .vsyscall_1, "a"
 ENTRY(vsyscall_1)
-	int $VSYSCALL_EMU_VECTOR
+	int $VSYSCALL1_EMU_VECTOR
 END(vsyscall_1)
 
 .section .vsyscall_2, "a"
 ENTRY(vsyscall_2)
-	int $VSYSCALL_EMU_VECTOR
+	int $VSYSCALL2_EMU_VECTOR
 END(vsyscall_2)
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ