[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1454057651-23959-7-git-send-email-serge.hallyn@ubuntu.com>
Date: Fri, 29 Jan 2016 02:54:09 -0600
From: serge.hallyn@...ntu.com
To: linux-kernel@...r.kernel.org
Cc: adityakali@...gle.com, tj@...nel.org, linux-api@...r.kernel.org,
containers@...ts.linux-foundation.org, cgroups@...r.kernel.org,
lxc-devel@...ts.linuxcontainers.org, akpm@...ux-foundation.org,
ebiederm@...ssion.com, gregkh@...uxfoundation.org,
lizefan@...wei.com, hannes@...xchg.org,
Serge Hallyn <serge.hallyn@...ntu.com>,
Serge Hallyn <serge.hallyn@...onical.com>
Subject: [PATCH 6/8] cgroup: mount cgroupns-root when inside non-init cgroupns
From: Serge Hallyn <serge.hallyn@...ntu.com>
This patch enables cgroup mounting inside userns when a process
as appropriate privileges. The cgroup filesystem mounted is
rooted at the cgroupns-root. Thus, in a container-setup, only
the hierarchy under the cgroupns-root is exposed inside the container.
This allows container management tools to run inside the containers
without depending on any global state.
Signed-off-by: Serge Hallyn <serge.hallyn@...onical.com>
---
Changelog:
20151116 - Don't allow user namespaces to bind new subsystems
20151118 - postpone the FS_USERNS_MOUNT flag until the
last patch, until we can convince ourselves it
is safe.
20151207 - Switch to walking up the kernfs path from kn root.
- Group initialized variables
- Explain the capable(CAP_SYS_ADMIN) check
- Style fixes
20160104 - kernfs_node_dentry: lock inode for lookup_one_len()
20160128 - grab needed lock in mount
Signed-off-by: Serge Hallyn <serge.hallyn@...ntu.com>
---
kernel/cgroup.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 96e3dab..3e04df0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1983,6 +1983,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
{
bool is_v2 = fs_type == &cgroup2_fs_type;
struct super_block *pinned_sb = NULL;
+ struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
struct cgroup_subsys *ss;
struct cgroup_root *root;
struct cgroup_sb_opts opts;
@@ -1991,6 +1992,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
int i;
bool new_sb;
+ get_cgroup_ns(ns);
+
+ /* Check if the caller has permission to mount. */
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) {
+ put_cgroup_ns(ns);
+ return ERR_PTR(-EPERM);
+ }
+
/*
* The first time anyone tries to mount a cgroup, enable the list
* linking each css_set to its tasks and fix up all existing tasks.
@@ -2001,6 +2010,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
if (is_v2) {
if (data) {
pr_err("cgroup2: unknown option \"%s\"\n", (char *)data);
+ put_cgroup_ns(ns);
return ERR_PTR(-EINVAL);
}
cgrp_dfl_root_visible = true;
@@ -2106,6 +2116,16 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
goto out_unlock;
}
+ /*
+ * We know this subsystem has not yet been bound. Users in a non-init
+ * user namespace may only mount hierarchies with no bound subsystems,
+ * i.e. 'none,name=user1'
+ */
+ if (!opts.none && !capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
root = kzalloc(sizeof(*root), GFP_KERNEL);
if (!root) {
ret = -ENOMEM;
@@ -2124,12 +2144,37 @@ out_free:
kfree(opts.release_agent);
kfree(opts.name);
- if (ret)
+ if (ret) {
+ put_cgroup_ns(ns);
return ERR_PTR(ret);
+ }
out_mount:
dentry = kernfs_mount(fs_type, flags, root->kf_root,
is_v2 ? CGROUP2_SUPER_MAGIC : CGROUP_SUPER_MAGIC,
&new_sb);
+
+ /*
+ * In non-init cgroup namespace, instead of root cgroup's
+ * dentry, we return the dentry corresponding to the
+ * cgroupns->root_cgrp.
+ */
+ if (!IS_ERR(dentry) && ns != &init_cgroup_ns) {
+ struct dentry *nsdentry;
+ struct cgroup *cgrp;
+
+ mutex_lock(&cgroup_mutex);
+ spin_lock_bh(&css_set_lock);
+
+ cgrp = cset_cgroup_from_root(ns->root_cset, root);
+
+ spin_unlock_bh(&css_set_lock);
+ mutex_unlock(&cgroup_mutex);
+
+ nsdentry = kernfs_node_dentry(cgrp->kn, dentry->d_sb);
+ dput(dentry);
+ dentry = nsdentry;
+ }
+
if (IS_ERR(dentry) || !new_sb)
cgroup_put(&root->cgrp);
@@ -2142,6 +2187,7 @@ out_mount:
deactivate_super(pinned_sb);
}
+ put_cgroup_ns(ns);
return dentry;
}
--
1.7.9.5
Powered by blists - more mailing lists