#define _GNU_SOURCE /* Required for CLONE_NEWNS */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if 0 #define __debug__(x) printf x #else #define __debug__(x) #endif #define N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) #define TRUE 1 #define FALSE 0 typedef int bool; #define READ_END 0 #define WRITE_END 1 static void die_with_error (const char *format, ...) { va_list args; int errsv; errsv = errno; va_start (args, format); vfprintf (stderr, format, args); va_end (args); fprintf (stderr, ": %s\n", strerror (errsv)); exit (1); } static void die (const char *format, ...) { va_list args; va_start (args, format); vfprintf (stderr, format, args); va_end (args); fprintf (stderr, "\n"); exit (1); } static void * xmalloc (size_t size) { void *res = malloc (size); if (res == NULL) die ("oom"); return res; } char * strconcat (const char *s1, const char *s2) { size_t len = 0; char *res; if (s1) len += strlen (s1); if (s2) len += strlen (s2); res = xmalloc (len + 1); *res = 0; if (s1) strcat (res, s1); if (s2) strcat (res, s2); return res; } char * strconcat3 (const char *s1, const char *s2, const char *s3) { size_t len = 0; char *res; if (s1) len += strlen (s1); if (s2) len += strlen (s2); if (s3) len += strlen (s3); res = xmalloc (len + 1); *res = 0; if (s1) strcat (res, s1); if (s2) strcat (res, s2); if (s3) strcat (res, s3); return res; } char * strconcat_len (const char *s1, const char *s2, size_t s2_len) { size_t len = 0; char *res; if (s1) len += strlen (s1); if (s2) len += s2_len; res = xmalloc (len + 1); *res = 0; if (s1) strcat (res, s1); if (s2) strncat (res, s2, s2_len); return res; } char* strdup_printf (const char *format, ...) { char *buffer = NULL; va_list args; va_start (args, format); vasprintf (&buffer, format, args); va_end (args); if (buffer == NULL) die ("oom"); return buffer; } static inline int raw_clone(unsigned long flags, void *child_stack) { #if defined(__s390__) || defined(__CRIS__) /* On s390 and cris the order of the first and second arguments * of the raw clone() system call is reversed. */ return (int) syscall(__NR_clone, child_stack, flags); #else return (int) syscall(__NR_clone, flags, child_stack); #endif } static int pivot_root (const char * new_root, const char * put_old) { #ifdef __NR_pivot_root return syscall(__NR_pivot_root, new_root, put_old); #else errno = ENOSYS; return -1; #endif } typedef enum { FILE_TYPE_REGULAR, FILE_TYPE_DIR, FILE_TYPE_SYMLINK, FILE_TYPE_SYSTEM_SYMLINK, FILE_TYPE_BIND, FILE_TYPE_BIND_RO, FILE_TYPE_MOUNT, FILE_TYPE_REMOUNT, FILE_TYPE_DEVICE, } file_type_t; typedef enum { FILE_FLAGS_NONE = 0, FILE_FLAGS_NON_FATAL = 1 << 0, FILE_FLAGS_IF_LAST_FAILED = 1 << 1, FILE_FLAGS_DEVICES = 1 << 2, FILE_FLAGS_NOREMOUNT = 1 << 3, } file_flags_t; typedef struct { file_type_t type; const char *name; mode_t mode; const char *data; file_flags_t flags; int *option; } create_table_t; typedef struct { const char *what; const char *where; const char *type; const char *options; unsigned long flags; } mount_table_t; int ascii_isdigit (char c) { return c >= '0' && c <= '9'; } static const create_table_t create[] = { { FILE_TYPE_DIR, ".oldroot", 0755 }, { FILE_TYPE_DIR, "usr", 0755 }, { FILE_TYPE_BIND_RO, "usr", 0755, "/usr"}, { FILE_TYPE_DIR, "tmp", 01777 }, { FILE_TYPE_DIR, "run", 0755}, { FILE_TYPE_DIR, "var", 0755}, { FILE_TYPE_SYMLINK, "var/tmp", 0755, "/tmp"}, { FILE_TYPE_SYMLINK, "var/run", 0755, "/run"}, { FILE_TYPE_SYSTEM_SYMLINK, "lib32", 0755, "usr/lib32"}, { FILE_TYPE_SYSTEM_SYMLINK, "lib64", 0755, "usr/lib64"}, { FILE_TYPE_SYSTEM_SYMLINK, "lib", 0755, "usr/lib"}, { FILE_TYPE_SYSTEM_SYMLINK, "bin", 0755, "usr/bin" }, { FILE_TYPE_SYSTEM_SYMLINK, "sbin", 0755, "usr/sbin"}, { FILE_TYPE_DIR, "etc", 0755, NULL, 0}, { FILE_TYPE_BIND, "etc", 0755, "/etc", FILE_FLAGS_NOREMOUNT}, { FILE_TYPE_DIR, "proc", 0755}, { FILE_TYPE_MOUNT, "proc"}, { FILE_TYPE_DIR, "dev", 0755}, { FILE_TYPE_MOUNT, "dev"}, { FILE_TYPE_DIR, "dev/shm", 0755}, { FILE_TYPE_MOUNT, "dev/shm"}, { FILE_TYPE_DEVICE, "dev/null", 0666}, { FILE_TYPE_DEVICE, "dev/zero", 0666}, { FILE_TYPE_DEVICE, "dev/full", 0666}, { FILE_TYPE_DEVICE, "dev/random", 0666}, { FILE_TYPE_DEVICE, "dev/urandom", 0666}, { FILE_TYPE_DEVICE, "dev/tty", 0666}, }; static mount_table_t mount_table[] = { { "proc", "proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV }, { "tmpfs", "dev", "tmpfs", "mode=755", MS_NOSUID|MS_STRICTATIME }, { "tmpfs", "dev/shm", "tmpfs", "mode=1777", MS_NOSUID|MS_NODEV|MS_STRICTATIME }, }; typedef enum { BIND_READONLY = (1<<0), BIND_PRIVATE = (1<<1), BIND_DEVICES = (1<<2), BIND_RECURSIVE = (1<<3), BIND_NOREMOUNT = (1<<4), } bind_option_t; static int bind_mount (const char *src, const char *dest, bind_option_t options) { bool readonly = (options & BIND_READONLY) != 0; bool private = (options & BIND_PRIVATE) != 0; bool devices = (options & BIND_DEVICES) != 0; bool noremount = (options & BIND_NOREMOUNT) != 0; bool recursive = (options & BIND_RECURSIVE) != 0; if (mount (src, dest, NULL, MS_MGC_VAL|MS_BIND|(recursive?MS_REC:0), NULL) != 0) return 1; if (private) { if (mount ("none", dest, NULL, MS_REC|MS_PRIVATE, NULL) != 0) return 2; } if (!noremount) { if (mount ("none", dest, NULL, MS_MGC_VAL|MS_BIND|MS_REMOUNT|(devices?0:MS_NODEV)|MS_NOSUID|(readonly?MS_RDONLY:0), NULL) != 0) return 3; } return 0; } static int write_to_file (int fd, const char *content) { ssize_t len = strlen (content); ssize_t res; while (len > 0) { res = write (fd, content, len); if (res < 0 && errno == EINTR) continue; if (res <= 0) return -1; len -= res; content += res; } return 0; } static int write_file (const char *path, const char *content) { int fd; int res; fd = open (path, O_RDWR | O_CLOEXEC, 0); if (fd == -1) return -1; res = 0; if (content) res = write_to_file (fd, content); close (fd); return res; } static int create_file (const char *path, mode_t mode, const char *content) { int fd; int res; fd = creat (path, mode); if (fd == -1) return -1; res = 0; if (content) res = write_to_file (fd, content); close (fd); return res; } static void create_files (const create_table_t *create, int n_create) { bool last_failed = FALSE; int i; for (i = 0; i < n_create; i++) { char *name; char *data = NULL; mode_t mode = create[i].mode; file_flags_t flags = create[i].flags; int *option = create[i].option; char *in_root; int k; bool found; int res; if ((flags & FILE_FLAGS_IF_LAST_FAILED) && !last_failed) continue; if (option && !*option) continue; name = strdup_printf (create[i].name, getuid()); if (create[i].data) data = strdup_printf (create[i].data, getuid()); last_failed = FALSE; switch (create[i].type) { case FILE_TYPE_DIR: if (mkdir (name, mode) != 0) die_with_error ("creating dir %s", name); break; case FILE_TYPE_REGULAR: if (create_file (name, mode, NULL)) die_with_error ("creating file %s", name); break; case FILE_TYPE_SYSTEM_SYMLINK: { struct stat buf; in_root = strconcat ("/", name); if (stat (in_root, &buf) == 0) { if (mkdir (name, mode) != 0) die_with_error ("creating dir %s", name); if (bind_mount (in_root, name, BIND_PRIVATE | BIND_READONLY)) die_with_error ("mount %s", name); } free (in_root); break; } case FILE_TYPE_SYMLINK: if (symlink (data, name) != 0) die_with_error ("creating symlink %s", name); break; case FILE_TYPE_BIND: case FILE_TYPE_BIND_RO: if ((res = bind_mount (data, name, 0 | ((create[i].type == FILE_TYPE_BIND_RO) ? BIND_READONLY : 0) | ((flags & FILE_FLAGS_DEVICES) ? BIND_DEVICES : 0) | ((flags & FILE_FLAGS_NOREMOUNT) ? BIND_NOREMOUNT : 0) ))) { if (res > 1 || (flags & FILE_FLAGS_NON_FATAL) == 0) die_with_error ("mounting bindmount %s", name); last_failed = TRUE; } break; case FILE_TYPE_MOUNT: found = FALSE; for (k = 0; k < N_ELEMENTS(mount_table); k++) { if (strcmp (mount_table[k].where, name) == 0) { if (mount(mount_table[k].what, mount_table[k].where, mount_table[k].type, mount_table[k].flags, mount_table[k].options) < 0) die_with_error ("Mounting %s", name); found = TRUE; } } if (!found) die ("Unable to find mount %s\n", name); break; case FILE_TYPE_REMOUNT: if (mount ("none", name, NULL, MS_MGC_VAL|MS_REMOUNT|mode, NULL) != 0) die_with_error ("Unable to remount %s\n", name); break; case FILE_TYPE_DEVICE: if (create_file (name, mode, NULL)) die_with_error ("creating file %s", name); in_root = strconcat ("/", name); if ((res = bind_mount (in_root, name, BIND_DEVICES))) { if (res > 1 || (flags & FILE_FLAGS_NON_FATAL) == 0) die_with_error ("binding device %s", name); } free (in_root); break; default: die ("Unknown create type %d\n", create[i].type); } free (name); free (data); } } int main (int argc, char **argv) { mode_t old_umask; char *newroot; char *args[] = {"sh", NULL}; char old_cwd[256]; char *uid_map, *gid_map; int uid, gid; pid_t pid; newroot = "/tmp/.xdg-app-root"; if (mkdir (newroot, 0755) && errno != EEXIST) die_with_error ("Creating xdg-app-root failed"); __debug__(("creating new namespace\n")); uid = getuid (); gid = getgid (); pid = raw_clone (SIGCHLD | CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWUSER, NULL); if (pid == -1) die_with_error ("Creating new namespace failed"); if (pid != 0) exit (0); uid_map = strdup_printf ("%d %d 1\n", uid, uid); if (write_file ("/proc/self/uid_map", uid_map) < 0) die_with_error ("setting up uid map"); free (uid_map); if (write_file("/proc/self/setgroups", "deny\n") < 0) die_with_error ("error writing to setgroups"); gid_map = strdup_printf ("%d %d 1\n", gid, gid); if (write_file ("/proc/self/gid_map", gid_map) < 0) die_with_error ("setting up gid map"); free (gid_map); old_umask = umask (0); if (mount (NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) die_with_error ("Failed to make / slave"); if (mount ("", newroot, "tmpfs", MS_NODEV|MS_NOEXEC|MS_NOSUID, NULL) != 0) die_with_error ("Failed to mount tmpfs"); getcwd (old_cwd, sizeof (old_cwd)); if (chdir (newroot) != 0) die_with_error ("chdir"); create_files (create, N_ELEMENTS (create)); if (pivot_root (newroot, ".oldroot")) die_with_error ("pivot_root"); chdir ("/"); /* The old root better be rprivate or we will send unmount events to the parent namespace */ if (mount (".oldroot", ".oldroot", NULL, MS_REC|MS_PRIVATE, NULL) != 0) die_with_error ("Failed to make old root rprivate"); if (umount2 (".oldroot", MNT_DETACH)) die_with_error ("unmount oldroot"); umask (old_umask); chdir (old_cwd); __debug__(("forking for child\n")); if (execvp (args[0], args) == -1) die_with_error ("execvp %s", args[0]); return 0; }