[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220207121800.5079-3-mkoutny@suse.com>
Date: Mon, 7 Feb 2022 13:17:56 +0100
From: Michal Koutný <mkoutny@...e.com>
To: Eric Biederman <ebiederm@...ssion.com>,
Alexey Gladkov <legion@...nel.org>
Cc: Kees Cook <keescook@...omium.org>, Shuah Khan <shuah@...nel.org>,
Christian Brauner <brauner@...nel.org>,
Solar Designer <solar@...nwall.com>,
Ran Xiaokai <ran.xiaokai@....com.cn>,
linux-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org,
Linux Containers <containers@...ts.linux-foundation.org>
Subject: [RFC PATCH 2/6] set*uid: Check RLIMIT_PROC against new credentials
The generic idea is that not even root or capable user can force an
unprivileged user's limit breach. (For historical and security reasons
this check is postponed from set*uid to execve.) During the switch the
resource consumption of target the user has to be checked. The commits
905ae01c4ae2 ("Add a reference to ucounts for each cred") and
21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") made the
check in set_user() look at the old user's consumption.
This version of the fix simply moves the check to the place where the
actual switch of the accounting structure happens -- set_cred_ucounts().
The other callers are kept without the check but with the per-userns
accounting they may be newly subject to the check too.
The set_cred_ucounts() becomes inconsistent since task->flags are
passed by the caller but task_rlimit() is implicitly `current`'s, this
patch is meant to illustrate the issue, nicer implementation is
possible.
Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts")
Signed-off-by: Michal Koutný <mkoutny@...e.com>
---
fs/exec.c | 2 +-
include/linux/cred.h | 2 +-
kernel/cred.c | 24 +++++++++++++++++++++---
kernel/fork.c | 2 +-
kernel/sys.c | 21 +++------------------
kernel/user_namespace.c | 2 +-
6 files changed, 28 insertions(+), 25 deletions(-)
diff --git a/fs/exec.c b/fs/exec.c
index fc598c2652b2..e759e42c61da 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1363,7 +1363,7 @@ int begin_new_exec(struct linux_binprm * bprm)
WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
flush_signal_handlers(me, 0);
- retval = set_cred_ucounts(bprm->cred);
+ retval = set_cred_ucounts(bprm->cred, NULL);
if (retval < 0)
goto out_unlock;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index fcbc6885cc09..455525ab380d 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -170,7 +170,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
extern int cred_fscmp(const struct cred *, const struct cred *);
extern void __init cred_init(void);
-extern int set_cred_ucounts(struct cred *);
+extern int set_cred_ucounts(struct cred *, unsigned int *);
/*
* check for validity of credentials
diff --git a/kernel/cred.c b/kernel/cred.c
index 473d17c431f3..791cab70b764 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -370,7 +370,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
ret = create_user_ns(new);
if (ret < 0)
goto error_put;
- ret = set_cred_ucounts(new);
+ ret = set_cred_ucounts(new, NULL);
if (ret < 0)
goto error_put;
}
@@ -492,7 +492,7 @@ int commit_creds(struct cred *new)
/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
- * in set_user().
+ * in set_cred_ucounts().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user || new->user_ns != old->user_ns)
@@ -663,7 +663,7 @@ int cred_fscmp(const struct cred *a, const struct cred *b)
}
EXPORT_SYMBOL(cred_fscmp);
-int set_cred_ucounts(struct cred *new)
+int set_cred_ucounts(struct cred *new, unsigned int *nproc_flags)
{
struct task_struct *task = current;
const struct cred *old = task->real_cred;
@@ -685,6 +685,24 @@ int set_cred_ucounts(struct cred *new)
new->ucounts = new_ucounts;
put_ucounts(old_ucounts);
+ if (!nproc_flags)
+ return 0;
+
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+ * it never fails if called by root. We may still enforce NPROC limit
+ * for programs doing set*uid()+execve() by harmlessly deferring the
+ * failure to the execve() stage.
+ */
+ if (ucounts_limit_cmp(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) >= 0 &&
+ new->user != INIT_USER &&
+ !security_capable(new, &init_user_ns, CAP_SYS_RESOURCE, CAP_OPT_NONE) &&
+ !security_capable(new, &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NONE))
+ *nproc_flags |= PF_NPROC_EXCEEDED;
+ else
+ *nproc_flags &= ~PF_NPROC_EXCEEDED;
+
return 0;
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 7cb21a70737d..a4005c679d29 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -3051,7 +3051,7 @@ int ksys_unshare(unsigned long unshare_flags)
goto bad_unshare_cleanup_cred;
if (new_cred) {
- err = set_cred_ucounts(new_cred);
+ err = set_cred_ucounts(new_cred, NULL);
if (err)
goto bad_unshare_cleanup_cred;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 48c90dcceff3..4e4eea30e235 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -472,21 +472,6 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
- /*
- * We don't fail in case of NPROC limit excess here because too many
- * poorly written programs don't check set*uid() return code, assuming
- * it never fails if called by root. We may still enforce NPROC limit
- * for programs doing set*uid()+execve() by harmlessly deferring the
- * failure to the execve() stage.
- */
- if (ucounts_limit_cmp(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) >= 0 &&
- new_user != INIT_USER &&
- !security_capable(new, &init_user_ns, CAP_SYS_RESOURCE, CAP_OPT_NONE) &&
- !security_capable(new, &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NONE))
- current->flags |= PF_NPROC_EXCEEDED;
- else
- current->flags &= ~PF_NPROC_EXCEEDED;
-
free_uid(new->user);
new->user = new_user;
return 0;
@@ -560,7 +545,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
if (retval < 0)
goto error;
- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, ¤t->flags);
if (retval < 0)
goto error;
@@ -622,7 +607,7 @@ long __sys_setuid(uid_t uid)
if (retval < 0)
goto error;
- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, ¤t->flags);
if (retval < 0)
goto error;
@@ -701,7 +686,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (retval < 0)
goto error;
- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, ¤t->flags);
if (retval < 0)
goto error;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b2e3ca7ee99..f7eec0b0233b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1344,7 +1344,7 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns)
put_user_ns(cred->user_ns);
set_cred_user_ns(cred, get_user_ns(user_ns));
- if (set_cred_ucounts(cred) < 0)
+ if (set_cred_ucounts(cred, NULL) < 0)
return -EINVAL;
return 0;
--
2.34.1
Powered by blists - more mailing lists