lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1459960170-4454-2-git-send-email-dsafonov@virtuozzo.com>
Date:	Wed, 6 Apr 2016 19:29:29 +0300
From:	Dmitry Safonov <dsafonov@...tuozzo.com>
To:	<linux-kernel@...r.kernel.org>
CC:	<luto@...capital.net>, <tglx@...utronix.de>, <mingo@...hat.com>,
	<hpa@...or.com>, <x86@...nel.org>, <shuahkh@....samsung.com>,
	<bp@...en8.de>, <akpm@...ux-foundation.org>,
	<linux-kselftest@...r.kernel.org>, <gorcunov@...nvz.org>,
	<xemul@...tuozzo.com>, <khorenko@...tuozzo.com>,
	<0x7f454c46@...il.com>, Dmitry Safonov <dsafonov@...tuozzo.com>
Subject: [PATCH 1/2] x86/arch_prctl: add ARCH_SET_{COMPAT,NATIVE} to change compatible mode

Now each process that runs natively on x86_64 may execute 32-bit code
by proper setting it's CS selector: either from LDT or reuse Linux's
USER32_CS. The vice-versa is also valid: running 64-bit code in
compatible task is also possible by choosing USER_CS.
So we may switch between 32 and 64 bit code execution in any process.
Linux will choose the right syscall numbers in entries for those
processes. But it still will consider them native/compat by the
personality, that elf loader set on launch. This affects i.e., ptrace
syscall on those tasks: PTRACE_GETREGSET will return 64/32-bit regset
according to process's mode (that's how strace detect task's
personality from 4.8 version).

This patch adds arch_prctl calls for x86 that make possible to tell
Linux kernel in which mode the application is running currently.
Mainly, this is needed for CRIU: restoring compatible & native
applications both from 64-bit restorer. By that reason I wrapped all
the code in CONFIG_CHECKPOINT_RESTORE.
This patch solves also a problem for running 64-bit code in 32-bit elf
(and reverse), that you have only 32-bit elf vdso for fast syscalls.
When switching between native <-> compat mode by arch_prctl, it will
remap needed vdso binary blob for target mode.

Cc: Cyrill Gorcunov <gorcunov@...nvz.org>
Cc: Pavel Emelyanov <xemul@...tuozzo.com>
Cc: Konstantin Khorenko <khorenko@...tuozzo.com>
CC: Dmitry Safonov <0x7f454c46@...il.com>
Signed-off-by: Dmitry Safonov <dsafonov@...tuozzo.com>
---
 arch/x86/entry/vdso/vma.c         | 76 ++++++++++++++++++++++++++--------
 arch/x86/include/asm/vdso.h       |  5 +++
 arch/x86/include/uapi/asm/prctl.h |  6 +++
 arch/x86/kernel/process_64.c      | 87 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 157 insertions(+), 17 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 10f704584922..9a1561da0bad 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -156,22 +156,21 @@ static int vvar_fault(const struct vm_special_mapping *sm,
 	return VM_FAULT_SIGBUS;
 }
 
-static int map_vdso(const struct vdso_image *image, bool calculate_addr)
+static int do_map_vdso(const struct vdso_image *image, bool calculate_addr,
+		unsigned long addr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	unsigned long addr, text_start;
+	unsigned long text_start;
 	int ret = 0;
 	static const struct vm_special_mapping vvar_mapping = {
 		.name = "[vvar]",
 		.fault = vvar_fault,
 	};
 
-	if (calculate_addr) {
+	if (calculate_addr && !addr) {
 		addr = vdso_addr(current->mm->start_stack,
 				 image->size - image->sym_vvar_start);
-	} else {
-		addr = 0;
 	}
 
 	down_write(&mm->mmap_sem);
@@ -209,11 +208,11 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 				       VM_PFNMAP,
 				       &vvar_mapping);
 
-	if (IS_ERR(vma)) {
+	if (IS_ERR(vma))
 		ret = PTR_ERR(vma);
-		goto up_fail;
-	}
 
+	if (ret)
+		do_munmap(mm, addr, image->size - image->sym_vvar_start);
 up_fail:
 	if (ret)
 		current->mm->context.vdso = NULL;
@@ -223,24 +222,28 @@ up_fail:
 }
 
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
-static int load_vdso32(void)
+static int load_vdso32(unsigned long addr)
 {
 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
 		return 0;
 
-	return map_vdso(&vdso_image_32, false);
+	return do_map_vdso(&vdso_image_32, false, addr);
 }
 #endif
 
 #ifdef CONFIG_X86_64
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int load_vdso64(unsigned long addr)
 {
 	if (!vdso64_enabled)
 		return 0;
 
-	return map_vdso(&vdso_image_64, true);
+	return do_map_vdso(&vdso_image_64, true, addr);
 }
 
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	return load_vdso64(0);
+}
 #ifdef CONFIG_COMPAT
 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
 				       int uses_interp)
@@ -250,20 +253,59 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
 		if (!vdso64_enabled)
 			return 0;
 
-		return map_vdso(&vdso_image_x32, true);
+		return do_map_vdso(&vdso_image_x32, true, 0);
 	}
 #endif
 #ifdef CONFIG_IA32_EMULATION
-	return load_vdso32();
+	return load_vdso32(0);
 #else
 	return 0;
 #endif
 }
-#endif
-#else
+#endif /* CONFIG_COMPAT */
+
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+unsigned long unmap_vdso(void)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr = (unsigned long)current->mm->context.vdso;
+
+	if (!addr)
+		return 0;
+
+	/* vvar pages */
+	vma = find_vma(current->mm, addr - 1);
+	if (vma)
+		vm_munmap(vma->vm_start, vma->vm_end - vma->vm_start);
+
+	/* vdso pages */
+	vma = find_vma(current->mm, addr);
+	if (vma)
+		vm_munmap(vma->vm_start, vma->vm_end - vma->vm_start);
+
+	current->mm->context.vdso = NULL;
+
+	return addr;
+}
+/*
+ * Maps needed vdso type: vdso_image_32/vdso_image_64
+ * @compatible - true for compatible, false for native vdso image
+ * @addr - specify addr for vdso mapping (0 for random/searching)
+ * NOTE: be sure to set/clear thread-specific flags before
+ * calling this function.
+ */
+int map_vdso(bool compatible, unsigned long addr)
+{
+	if (compatible)
+		return load_vdso32(addr);
+	else
+		return load_vdso64(addr);
+}
+#endif /* CONFIG_IA32_EMULATION && CONFIG_CHECKPOINT_RESTORE */
+#else /* !CONFIG_X86_64 */
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-	return load_vdso32();
+	return load_vdso32(0);
 }
 #endif
 
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 43dc55be524e..3ead7cc48a68 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -39,6 +39,11 @@ extern const struct vdso_image vdso_image_x32;
 extern const struct vdso_image vdso_image_32;
 #endif
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+extern int map_vdso(bool to_compat, unsigned long addr);
+extern unsigned long unmap_vdso(void);
+#endif
+
 extern void __init init_vdso_image(const struct vdso_image *image);
 
 #endif /* __ASSEMBLER__ */
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032fae09..455844f06485 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+#define ARCH_SET_COMPAT		0x2001
+#define ARCH_SET_NATIVE		0x2002
+#define ARCH_GET_PERSONALITY	0x2003
+#endif
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31ac23a..e50660d59530 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,6 +49,7 @@
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
 #include <asm/xen/hypervisor.h>
+#include <asm/vdso.h>
 
 asmlinkage extern void ret_from_fork(void);
 
@@ -505,6 +506,83 @@ void set_personality_ia32(bool x32)
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+/*
+ * Check if there are still some vmas (except vdso) for current,
+ * which placed above compatible TASK_SIZE.
+ * Check also code, data, stack, args and env placements.
+ * Returns true if all mappings are compatible.
+ */
+static bool task_mappings_compatible(void)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long top_addr = IA32_PAGE_OFFSET;
+	struct vm_area_struct *vma = find_vma(mm, top_addr);
+
+	if (mm->end_code	> top_addr ||
+	    mm->end_data	> top_addr ||
+	    mm->start_stack	> top_addr ||
+	    mm->brk		> top_addr ||
+	    mm->arg_end		> top_addr ||
+	    mm->env_end		> top_addr)
+		return false;
+
+	while (vma) {
+		if ((vma->vm_start != (unsigned long)mm->context.vdso) &&
+		    (vma->vm_end != (unsigned long)mm->context.vdso))
+			return false;
+
+		top_addr = vma->vm_end;
+		vma = find_vma(mm, top_addr);
+	}
+
+	return true;
+}
+
+static int do_set_personality(bool compat, unsigned long addr)
+{
+	int ret;
+	unsigned long old_vdso_base;
+	unsigned long old_mmap_base = current->mm->mmap_base;
+
+	if (test_thread_flag(TIF_IA32) == compat) /* nothing to do */
+		return 0;
+
+	if (compat && !task_mappings_compatible())
+		return -EFAULT;
+
+	/*
+	 * We can't just remap vdso to needed location:
+	 * vdso compatible and native images differs
+	 */
+	old_vdso_base = unmap_vdso();
+
+	if (compat)
+		set_personality_ia32(false);
+	else
+		set_personality_64bit();
+
+	/*
+	 * Update mmap_base & get_unmapped_area helper, side effect:
+	 * one may change get_unmapped_area or mmap_base with personality()
+	 * or switching to and fro compatible mode
+	 */
+	arch_pick_mmap_layout(current->mm);
+
+	ret = map_vdso(compat, addr);
+	if (ret) {
+		current->mm->mmap_base = old_mmap_base;
+		if (compat)
+			set_personality_64bit();
+		else
+			set_personality_ia32(false);
+		WARN_ON(map_vdso(!compat, old_vdso_base));
+	}
+
+	return ret;
+}
+#endif
+
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 {
 	int ret = 0;
@@ -592,6 +670,15 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 		break;
 	}
 
+#if defined(CONFIG_IA32_EMULATION) && defined(CONFIG_CHECKPOINT_RESTORE)
+	case ARCH_SET_COMPAT:
+		return do_set_personality(true, addr);
+	case ARCH_SET_NATIVE:
+		return do_set_personality(false, addr);
+	case ARCH_GET_PERSONALITY:
+		return test_thread_flag(TIF_IA32);
+#endif
+
 	default:
 		ret = -EINVAL;
 		break;
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ