lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221003222133.20948-9-aliraza@bu.edu>
Date:   Mon,  3 Oct 2022 18:21:31 -0400
From:   Ali Raza <aliraza@...edu>
To:     linux-kernel@...r.kernel.org
Cc:     corbet@....net, masahiroy@...nel.org, michal.lkml@...kovi.net,
        ndesaulniers@...gle.com, tglx@...utronix.de, mingo@...hat.com,
        bp@...en8.de, dave.hansen@...ux.intel.com, hpa@...or.com,
        luto@...nel.org, ebiederm@...ssion.com, keescook@...omium.org,
        peterz@...radead.org, viro@...iv.linux.org.uk, arnd@...db.de,
        juri.lelli@...hat.com, vincent.guittot@...aro.org,
        dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
        mgorman@...e.de, bristot@...hat.com, vschneid@...hat.com,
        pbonzini@...hat.com, jpoimboe@...nel.org,
        linux-doc@...r.kernel.org, linux-kbuild@...r.kernel.org,
        linux-mm@...ck.org, linux-fsdevel@...r.kernel.org,
        linux-arch@...r.kernel.org, x86@...nel.org, rjones@...hat.com,
        munsoner@...edu, tommyu@...edu, drepper@...hat.com,
        lwoodman@...hat.com, mboydmcse@...il.com, okrieg@...edu,
        rmancuso@...edu, Ali Raza <aliraza@...edu>
Subject: [RFC UKL 08/10] exec: Make exec path for starting UKL application

The UKL application still relies on much of the setup done to start a
standard user space process, so we still need to use much of that path.
There are several areas that the UKL application doesn't need or want so we
bypass them in the case of UKL. These are: ELF loading, because it is part
of the kernel image; and segments register value initialization.  We need
to record a starting location for the application heap, this normally is
the end of the ELF binary, once loaded. We choose an arbitrary low address
because there is no binary to load. We also hardcode the entry point for
the application to ukl__start which is the entry point for glibc plus the
'ukl_' prefix.

Cc: Jonathan Corbet <corbet@....net>
Cc: Masahiro Yamada <masahiroy@...nel.org>
Cc: Michal Marek <michal.lkml@...kovi.net>
Cc: Nick Desaulniers <ndesaulniers@...gle.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Borislav Petkov <bp@...en8.de>
Cc: Dave Hansen <dave.hansen@...ux.intel.com>
Cc: "H. Peter Anvin" <hpa@...or.com>
Cc: Andy Lutomirski <luto@...nel.org>
Cc: Eric Biederman <ebiederm@...ssion.com>
Cc: Kees Cook <keescook@...omium.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Alexander Viro <viro@...iv.linux.org.uk>
Cc: Arnd Bergmann <arnd@...db.de>
Cc: Juri Lelli <juri.lelli@...hat.com>
Cc: Vincent Guittot <vincent.guittot@...aro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Ben Segall <bsegall@...gle.com>
Cc: Mel Gorman <mgorman@...e.de>
Cc: Daniel Bristot de Oliveira <bristot@...hat.com>
Cc: Valentin Schneider <vschneid@...hat.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>
Cc: Josh Poimboeuf <jpoimboe@...nel.org>

Suggested-by: Thomas Unger <tommyu@...edu>
Signed-off-by: Ali Raza <aliraza@...edu>
---
 arch/x86/include/asm/elf.h   |  9 ++++--
 arch/x86/kernel/process.c    | 13 +++++++++
 arch/x86/kernel/process_64.c | 27 ++++++++++--------
 fs/binfmt_elf.c              | 28 ++++++++++++++++++
 fs/exec.c                    | 55 ++++++++++++++++++++++++++----------
 5 files changed, 103 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index cb0ff1055ab1..91b6efafb46f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -6,6 +6,7 @@
  * ELF register definitions..
  */
 #include <linux/thread_info.h>
+#include <linux/sched.h>
 
 #include <asm/ptrace.h>
 #include <asm/user.h>
@@ -164,9 +165,11 @@ static inline void elf_common_init(struct thread_struct *t,
 	regs->si = regs->di = regs->bp = 0;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
-	t->fsbase = t->gsbase = 0;
-	t->fsindex = t->gsindex = 0;
-	t->ds = t->es = ds;
+	if (!is_ukl_thread()) {
+		t->fsbase = t->gsbase = 0;
+		t->fsindex = t->gsindex = 0;
+		t->ds = t->es = ds;
+	}
 }
 
 #define ELF_PLAT_INIT(_r, load_addr)			\
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 58a6ea472db9..8395fc0c3398 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -192,6 +192,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	frame->bx = 0;
 	*childregs = *current_pt_regs();
 	childregs->ax = 0;
+
+#ifdef CONFIG_UNIKERNEL_LINUX
+	/*
+	 * UKL leaves return address and flags on user stack. This works
+	 * fine for clone (i.e., VM shared) but not for 'fork' style
+	 * clone (i.e., VM not shared). This is where we clean those extra
+	 * elements from user stack.
+	 */
+	if (is_ukl_thread() & !(clone_flags & CLONE_VM)) {
+		childregs->sp += 2*(sizeof(long));
+	}
+#endif
+
 	if (sp)
 		childregs->sp = sp;
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e9e4a2946452..cf007b95d684 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -530,21 +530,26 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
 {
 	WARN_ON_ONCE(regs != current_pt_regs());
 
-	if (static_cpu_has(X86_BUG_NULL_SEG)) {
-		/* Loading zero below won't clear the base. */
-		loadsegment(fs, __USER_DS);
-		load_gs_index(__USER_DS);
-	}
+	if (!is_ukl_thread()) {
+		if (static_cpu_has(X86_BUG_NULL_SEG)) {
+			/* Loading zero below won't clear the base. */
+			loadsegment(fs, __USER_DS);
+			load_gs_index(__USER_DS);
+		}
 
-	loadsegment(fs, 0);
-	loadsegment(es, _ds);
-	loadsegment(ds, _ds);
-	load_gs_index(0);
+		loadsegment(fs, 0);
+		loadsegment(es, _ds);
+		loadsegment(ds, _ds);
+		load_gs_index(0);
 
+		regs->cs		= _cs;
+		regs->ss		= _ss;
+	} else {
+		regs->cs		= __KERNEL_CS;
+		regs->ss		= __KERNEL_DS;
+	}
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
-	regs->cs		= _cs;
-	regs->ss		= _ss;
 	regs->flags		= X86_EFLAGS_IF;
 }
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 63c7ebb0da89..1c91f1179398 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -845,6 +845,10 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	struct pt_regs *regs;
 
 	retval = -ENOEXEC;
+
+	if (is_ukl_thread())
+		goto UKL_SKIP_READING_ELF;
+
 	/* First of all, some simple consistency checks */
 	if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
 		goto out;
@@ -998,6 +1002,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	if (retval)
 		goto out_free_dentry;
 
+UKL_SKIP_READING_ELF:
 	/* Flush all traces of the currently running executable */
 	retval = begin_new_exec(bprm);
 	if (retval)
@@ -1029,6 +1034,17 @@ static int load_elf_binary(struct linux_binprm *bprm)
 	start_data = 0;
 	end_data = 0;
 
+	if (is_ukl_thread()) {
+		/*
+		 * load_bias needs to ensure that we push the heap start
+		 * past the end of the executable, but in this case, it is
+		 * already mapped with the kernel text.  So we select an
+		 * address that is "high enough"
+		 */
+		load_bias = 0x405000;
+		goto UKL_SKIP_LOADING_ELF;
+	}
+
 	/* Now we do a little grungy work by mmapping the ELF image into
 	   the correct location in memory. */
 	for(i = 0, elf_ppnt = elf_phdata;
@@ -1224,6 +1240,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		}
 	}
 
+UKL_SKIP_LOADING_ELF:
 	e_entry = elf_ex->e_entry + load_bias;
 	phdr_addr += load_bias;
 	elf_bss += load_bias;
@@ -1246,6 +1263,16 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		goto out_free_dentry;
 	}
 
+	if (is_ukl_thread()) {
+		/*
+		 * We know that this symbol exists and that it is the entry
+		 * point for the linked application.
+		 */
+		extern void ukl__start(void);
+		elf_entry = (unsigned long) ukl__start;
+		goto UKL_SKIP_FINDING_ELF_ENTRY;
+	}
+
 	if (interpreter) {
 		elf_entry = load_elf_interp(interp_elf_ex,
 					    interpreter,
@@ -1283,6 +1310,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 
 	set_binfmt(&elf_format);
 
+UKL_SKIP_FINDING_ELF_ENTRY:
 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
 	retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
 	if (retval < 0)
diff --git a/fs/exec.c b/fs/exec.c
index d046dbb9cbd0..4ae06fcf7436 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1246,9 +1246,11 @@ int begin_new_exec(struct linux_binprm * bprm)
 	int retval;
 
 	/* Once we are committed compute the creds */
-	retval = bprm_creds_from_file(bprm);
-	if (retval)
-		return retval;
+	if (!is_ukl_thread()) {
+		retval = bprm_creds_from_file(bprm);
+		if (retval)
+			return retval;
+	}
 
 	/*
 	 * Ensure all future errors are fatal.
@@ -1282,9 +1284,11 @@ int begin_new_exec(struct linux_binprm * bprm)
 		goto out;
 
 	/* If the binary is not readable then enforce mm->dumpable=0 */
-	would_dump(bprm, bprm->file);
-	if (bprm->have_execfd)
-		would_dump(bprm, bprm->executable);
+	if (!is_ukl_thread()) {
+		would_dump(bprm, bprm->file);
+		if (bprm->have_execfd)
+			would_dump(bprm, bprm->executable);
+	}
 
 	/*
 	 * Release all of the old mmap stuff
@@ -1509,6 +1513,11 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
 	if (!bprm)
 		goto out;
 
+	if (is_ukl_thread()) {
+		bprm->filename = "UKL";
+		goto out_ukl;
+	}
+
 	if (fd == AT_FDCWD || filename->name[0] == '/') {
 		bprm->filename = filename->name;
 	} else {
@@ -1522,6 +1531,8 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename)
 
 		bprm->filename = bprm->fdpath;
 	}
+
+out_ukl:
 	bprm->interp = bprm->filename;
 
 	retval = bprm_mm_init(bprm);
@@ -1708,6 +1719,15 @@ static int search_binary_handler(struct linux_binprm *bprm)
 	struct linux_binfmt *fmt;
 	int retval;
 
+	if (is_ukl_thread()) {
+		list_for_each_entry(fmt, &formats, lh) {
+			retval = fmt->load_binary(bprm);
+			if (retval == 0)
+				return retval;
+		}
+		goto out_ukl;
+	}
+
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
 		return retval;
@@ -1717,7 +1737,7 @@ static int search_binary_handler(struct linux_binprm *bprm)
 		return retval;
 
 	retval = -ENOENT;
- retry:
+retry:
 	read_lock(&binfmt_lock);
 	list_for_each_entry(fmt, &formats, lh) {
 		if (!try_module_get(fmt->module))
@@ -1745,6 +1765,7 @@ static int search_binary_handler(struct linux_binprm *bprm)
 		goto retry;
 	}
 
+out_ukl:
 	return retval;
 }
 
@@ -1799,7 +1820,7 @@ static int exec_binprm(struct linux_binprm *bprm)
 static int bprm_execve(struct linux_binprm *bprm,
 		       int fd, struct filename *filename, int flags)
 {
-	struct file *file;
+	struct file *file = NULL;
 	int retval;
 
 	retval = prepare_bprm_creds(bprm);
@@ -1809,10 +1830,12 @@ static int bprm_execve(struct linux_binprm *bprm,
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
 
-	file = do_open_execat(fd, filename, flags);
-	retval = PTR_ERR(file);
-	if (IS_ERR(file))
-		goto out_unmark;
+	if (!is_ukl_thread()) {
+		file = do_open_execat(fd, filename, flags);
+		retval = PTR_ERR(file);
+		if (IS_ERR(file))
+			goto out_unmark;
+	}
 
 	sched_exec();
 
@@ -1830,9 +1853,11 @@ static int bprm_execve(struct linux_binprm *bprm,
 		bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
 
 	/* Set the unchanging part of bprm->cred */
-	retval = security_bprm_creds_for_exec(bprm);
-	if (retval)
-		goto out;
+	if (!is_ukl_thread()) {
+		retval = security_bprm_creds_for_exec(bprm);
+		if (retval)
+			goto out;
+	}
 
 	retval = exec_binprm(bprm);
 	if (retval < 0)
-- 
2.21.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ