linux-kernel - [PATCH 1/2] fs/exec: Explicitly unshare fs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221006082735.1321612-2-keescook@chromium.org>
Date:   Thu,  6 Oct 2022 01:27:34 -0700
From:   Kees Cook <keescook@...omium.org>
To:     Eric Biederman <ebiederm@...ssion.com>
Cc:     Kees Cook <keescook@...omium.org>,
        Jorge Merlino <jorge.merlino@...onical.com>,
        Alexander Viro <viro@...iv.linux.org.uk>,
        "Christian Brauner (Microsoft)" <brauner@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Andy Lutomirski <luto@...nel.org>,
        Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
        Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
        linux-fsdevel@...r.kernel.org,
        John Johansen <john.johansen@...onical.com>,
        Paul Moore <paul@...l-moore.com>,
        James Morris <jmorris@...ei.org>,
        "Serge E. Hallyn" <serge@...lyn.com>,
        Stephen Smalley <stephen.smalley.work@...il.com>,
        Eric Paris <eparis@...isplace.org>,
        Richard Haines <richard_c_haines@...nternet.com>,
        Casey Schaufler <casey@...aufler-ca.com>,
        Xin Long <lucien.xin@...il.com>,
        "David S. Miller" <davem@...emloft.net>,
        Todd Kjos <tkjos@...gle.com>,
        Ondrej Mosnacek <omosnace@...hat.com>,
        Prashanth Prahlad <pprahlad@...hat.com>,
        Micah Morton <mortonm@...omium.org>,
        Fenghua Yu <fenghua.yu@...el.com>,
        Andrei Vagin <avagin@...il.com>, linux-kernel@...r.kernel.org,
        apparmor@...ts.ubuntu.com, linux-security-module@...r.kernel.org,
        selinux@...r.kernel.org, linux-hardening@...r.kernel.org
Subject: [PATCH 1/2] fs/exec: Explicitly unshare fs_struct on exec

The check_unsafe_exec() counting of n_fs would not add up under a heavily
threaded process trying to perform a suid exec, causing the suid portion
to fail. This counting error appears to be unneeded, but to catch any
possible conditions, explicitly unshare fs_struct on exec, if it ends up
needing to happen. This means fs->in_exec must be removed (since it would
never get cleared in the old copy), and is specifically no longer needed.

See also commit 498052bba55e ("New locking/refcounting for fs_struct").

This additionally allows the "in_exec" member to be dropped, showing an
8 bytes savings per fs_struct on 64-bit. Before:

struct fs_struct {
        int                        users;                /*     0     4 */
        spinlock_t                 lock;                 /*     4     4 */
        seqcount_spinlock_t        seq;                  /*     8     4 */
        int                        umask;                /*    12     4 */
        int                        in_exec;              /*    16     4 */

        /* XXX 4 bytes hole, try to pack */

        struct path                root;                 /*    24    16 */
        struct path                pwd;                  /*    40    16 */

        /* size: 56, cachelines: 1, members: 7 */
        /* sum members: 52, holes: 1, sum holes: 4 */
        /* last cacheline: 56 bytes */
};

After:

struct fs_struct {
        int                        users;                /*     0     4 */
        spinlock_t                 lock;                 /*     4     4 */
        seqcount_spinlock_t        seq;                  /*     8     4 */
        int                        umask;                /*    12     4 */
        struct path                root;                 /*    16    16 */
        struct path                pwd;                  /*    32    16 */

        /* size: 48, cachelines: 1, members: 6 */
        /* last cacheline: 48 bytes */
};

Reported-by: Jorge Merlino <jorge.merlino@...onical.com>
Link: https://lore.kernel.org/lkml/20220910211215.140270-1-jorge.merlino@canonical.com/
Cc: Eric Biederman <ebiederm@...ssion.com>
Cc: Alexander Viro <viro@...iv.linux.org.uk>
Cc: "Christian Brauner (Microsoft)" <brauner@...nel.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Andy Lutomirski <luto@...nel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: linux-mm@...ck.org
Cc: linux-fsdevel@...r.kernel.org
Signed-off-by: Kees Cook <keescook@...omium.org>
---
 fs/exec.c                 |  9 +++---
 fs/fs_struct.c            |  1 -
 include/linux/fdtable.h   |  1 +
 include/linux/fs_struct.h |  1 -
 kernel/fork.c             | 62 ++++++++++++++++++++++++++-------------
 5 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 902bce45b116..7d5f63f03c58 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1272,6 +1272,11 @@ int begin_new_exec(struct linux_binprm * bprm)
 	if (retval)
 		goto out;
 
+	/* Ensure the fs_struct is not shared. */
+	retval = unshare_fs();
+	if (retval)
+		goto out;
+
 	/*
 	 * Must be called _before_ exec_mmap() as bprm->mm is
 	 * not visible until then. This also enables the update
@@ -1583,8 +1588,6 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 
 	if (p->fs->users > n_fs)
 		bprm->unsafe |= LSM_UNSAFE_SHARE;
-	else
-		p->fs->in_exec = 1;
 	spin_unlock(&p->fs->lock);
 }
 
@@ -1843,7 +1846,6 @@ static int bprm_execve(struct linux_binprm *bprm,
 		goto out;
 
 	/* execve succeeded */
-	current->fs->in_exec = 0;
 	current->in_execve = 0;
 	rseq_execve(current);
 	acct_update_integrals(current);
@@ -1861,7 +1863,6 @@ static int bprm_execve(struct linux_binprm *bprm,
 		force_fatal_sig(SIGSEGV);
 
 out_unmark:
-	current->fs->in_exec = 0;
 	current->in_execve = 0;
 
 	return retval;
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 04b3f5b9c629..c27c67023d01 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -115,7 +115,6 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
 	/* We don't need to lock fs - think why ;-) */
 	if (fs) {
 		fs->users = 1;
-		fs->in_exec = 0;
 		spin_lock_init(&fs->lock);
 		seqcount_spinlock_init(&fs->seq, &fs->lock);
 		fs->umask = old->umask;
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index e066816f3519..fbfb89ee3bda 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -117,6 +117,7 @@ struct task_struct;
 
 void put_files_struct(struct files_struct *fs);
 int unshare_files(void);
+int unshare_fs(void);
 struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
 void do_close_on_exec(struct files_struct *);
 int iterate_fd(struct files_struct *, unsigned,
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 783b48dedb72..08b82a90ceae 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -11,7 +11,6 @@ struct fs_struct {
 	spinlock_t lock;
 	seqcount_spinlock_t seq;
 	int umask;
-	int in_exec;
 	struct path root, pwd;
 } __randomize_layout;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index b4a799d9c50f..53b7248f7a4b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1589,10 +1589,6 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
 	if (clone_flags & CLONE_FS) {
 		/* tsk->fs is already what we want */
 		spin_lock(&fs->lock);
-		if (fs->in_exec) {
-			spin_unlock(&fs->lock);
-			return -EAGAIN;
-		}
 		fs->users++;
 		spin_unlock(&fs->lock);
 		return 0;
@@ -3070,10 +3066,9 @@ static int check_unshare_flags(unsigned long unshare_flags)
 	return 0;
 }
 
-/*
- * Unshare the filesystem structure if it is being shared
- */
-static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
+/* Allocate an fs_struct if it is currently shared and CLONE_FS requested. */
+static int unshare_fs_alloc(unsigned long unshare_flags,
+			    struct fs_struct **new_fsp)
 {
 	struct fs_struct *fs = current->fs;
 
@@ -3091,6 +3086,41 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 	return 0;
 }
 
+/* Swap out fs_struct, returning old fs if it needs freeing. */
+static void unshare_fs_finalize(struct fs_struct **new_fsp)
+{
+	struct task_struct *task = current;
+	struct fs_struct *fs = task->fs;
+
+	spin_lock(&fs->lock);
+	task->fs = *new_fsp;
+	if (--fs->users)
+		*new_fsp = NULL;
+	else
+		*new_fsp = fs;
+	spin_unlock(&fs->lock);
+}
+
+/*
+ * Unshare the filesystem structure if it is being shared
+ */
+int unshare_fs(void)
+{
+	struct fs_struct *new_fs = NULL;
+	int error;
+
+	error = unshare_fs_alloc(CLONE_FS, &new_fs);
+	if (error || !new_fs)
+		return error;
+
+	unshare_fs_finalize(&new_fs);
+
+	if (new_fs)
+		free_fs_struct(new_fs);
+
+	return 0;
+}
+
 /*
  * Unshare file descriptor table if it is being shared
  */
@@ -3120,7 +3150,7 @@ int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
  */
 int ksys_unshare(unsigned long unshare_flags)
 {
-	struct fs_struct *fs, *new_fs = NULL;
+	struct fs_struct *new_fs = NULL;
 	struct files_struct *new_fd = NULL;
 	struct cred *new_cred = NULL;
 	struct nsproxy *new_nsproxy = NULL;
@@ -3159,7 +3189,7 @@ int ksys_unshare(unsigned long unshare_flags)
 	 */
 	if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
 		do_sysvsem = 1;
-	err = unshare_fs(unshare_flags, &new_fs);
+	err = unshare_fs_alloc(unshare_flags, &new_fs);
 	if (err)
 		goto bad_unshare_out;
 	err = unshare_fd(unshare_flags, NR_OPEN_MAX, &new_fd);
@@ -3197,16 +3227,8 @@ int ksys_unshare(unsigned long unshare_flags)
 
 		task_lock(current);
 
-		if (new_fs) {
-			fs = current->fs;
-			spin_lock(&fs->lock);
-			current->fs = new_fs;
-			if (--fs->users)
-				new_fs = NULL;
-			else
-				new_fs = fs;
-			spin_unlock(&fs->lock);
-		}
+		if (new_fs)
+			unshare_fs_finalize(&new_fs);
 
 		if (new_fd)
 			swap(current->files, new_fd);
-- 
2.34.1