Add cgroup wrapper for safely calling can_attach on all threads in a threadgroup From: Ben Blum This patch adds a function cgroup_can_attach_per_thread which handles iterating over each thread in a threadgroup safely with respect to the invariants that will be used in cgroup_attach_proc. Also, subsystems whose can_attach calls require per-thread validation are modified to use the per_thread wrapper to avoid duplicating cgroup-internal code. This is a pre-patch for cgroup-procs-writable.patch. Signed-off-by: Ben Blum --- include/linux/cgroup.h | 12 ++++++++++++ kernel/cgroup.c | 35 +++++++++++++++++++++++++++++++++++ kernel/cgroup_freezer.c | 27 ++++++++++++--------------- kernel/cpuset.c | 20 +++++++------------- kernel/ns_cgroup.c | 27 +++++++++++++-------------- kernel/sched.c | 21 ++++++--------------- 6 files changed, 85 insertions(+), 57 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e3d00fd..f040d66 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -580,6 +580,18 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); /* + * For use in subsystems whose can_attach functions need to run an operation + * on every task in the threadgroup. Calls the given callback once if the + * 'threadgroup' flag is false, or once per thread in the group if true. + * The callback should return 0/-ERR; this will return 0/-ERR. + * The callback will run within an rcu_read section, so must not sleep. + */ +int cgroup_can_attach_per_thread(struct cgroup *cgrp, struct task_struct *task, + int (*cb)(struct cgroup *cgrp, + struct task_struct *task), + bool threadgroup); + +/* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works * if cgroup_subsys.use_id == true. It can be used for looking up and scanning. * CSS ID is assigned at cgroup allocation (create) automatically diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f91d7dd..e8b8f71 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1688,6 +1688,41 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) } EXPORT_SYMBOL_GPL(cgroup_path); +int cgroup_can_attach_per_thread(struct cgroup *cgrp, struct task_struct *task, + int (*cb)(struct cgroup *cgrp, + struct task_struct *task), + bool threadgroup) +{ + /* Start by running on the leader, in all cases. */ + int ret = cb(cgrp, task); + if (ret < 0) + return ret; + + if (threadgroup) { + /* Run on each task in the threadgroup. */ + struct task_struct *c; + rcu_read_lock(); + /* + * It is necessary for the given task to still be the leader + * to safely traverse thread_group. See cgroup_attach_proc. + */ + if (!thread_group_leader(task)) { + rcu_read_unlock(); + return -EAGAIN; + } + list_for_each_entry_rcu(c, &task->thread_group, thread_group) { + ret = cb(cgrp, c); + if (ret < 0) { + rcu_read_unlock(); + return ret; + } + } + rcu_read_unlock(); + } + return 0; +} +EXPORT_SYMBOL_GPL(cgroup_can_attach_per_thread); + /** * cgroup_attach_task - attach task 'tsk' to cgroup 'cgrp' * @cgrp: the cgroup the task is attaching to diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index ce71ed5..677b24e 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -161,6 +161,13 @@ static bool is_task_frozen_enough(struct task_struct *task) (task_is_stopped_or_traced(task) && freezing(task)); } +static int freezer_can_attach_cb(struct cgroup *cgrp, struct task_struct *task) +{ + if (is_task_frozen_enough(task)) + return -EBUSY; + return 0; +} + /* * The call to cgroup_lock() in the freezer.state write method prevents * a write to that file racing against an attach, and hence the @@ -171,6 +178,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss, struct task_struct *task, bool threadgroup) { struct freezer *freezer; + int ret; /* * Anything frozen can't move or be moved to/from. @@ -179,26 +187,15 @@ static int freezer_can_attach(struct cgroup_subsys *ss, * frozen, so it's sufficient to check the latter condition. */ - if (is_task_frozen_enough(task)) - return -EBUSY; + ret = cgroup_can_attach_per_thread(new_cgroup, task, + freezer_can_attach_cb, threadgroup); + if (ret < 0) + return ret; freezer = cgroup_freezer(new_cgroup); if (freezer->state == CGROUP_FROZEN) return -EBUSY; - if (threadgroup) { - struct task_struct *c; - - rcu_read_lock(); - list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - if (is_task_frozen_enough(c)) { - rcu_read_unlock(); - return -EBUSY; - } - } - rcu_read_unlock(); - } - return 0; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b23c097..cc4b1f7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1376,6 +1376,11 @@ static int fmeter_getrate(struct fmeter *fmp) /* Protected by cgroup_lock */ static cpumask_var_t cpus_attach; +static int cpuset_can_attach_cb(struct cgroup *cgrp, struct task_struct *task) +{ + return security_task_setscheduler(task, 0, NULL); +} + /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, struct task_struct *tsk, bool threadgroup) @@ -1397,22 +1402,11 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont, if (tsk->flags & PF_THREAD_BOUND) return -EINVAL; - ret = security_task_setscheduler(tsk, 0, NULL); + ret = cgroup_can_attach_per_thread(cont, tsk, cpuset_can_attach_cb, + threadgroup); if (ret) return ret; - if (threadgroup) { - struct task_struct *c; - rcu_read_lock(); - list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { - ret = security_task_setscheduler(c, 0, NULL); - if (ret) { - rcu_read_unlock(); - return ret; - } - } - rcu_read_unlock(); - } return 0; } diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c index 2a5dfec..af0accf 100644 --- a/kernel/ns_cgroup.c +++ b/kernel/ns_cgroup.c @@ -42,9 +42,18 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid) * (hence either you are in the same cgroup as task, or in an * ancestor cgroup thereof) */ +static int ns_can_attach_cb(struct cgroup *new_cgroup, struct task_struct *task) +{ + if (!cgroup_is_descendant(new_cgroup, task)) + return -EPERM; + return 0; +} + static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, struct task_struct *task, bool threadgroup) { + int ret; + if (current != task) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -53,20 +62,10 @@ static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup, return -EPERM; } - if (!cgroup_is_descendant(new_cgroup, task)) - return -EPERM; - - if (threadgroup) { - struct task_struct *c; - rcu_read_lock(); - list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - if (!cgroup_is_descendant(new_cgroup, c)) { - rcu_read_unlock(); - return -EPERM; - } - } - rcu_read_unlock(); - } + ret = cgroup_can_attach_per_thread(new_cgroup, task, ns_can_attach_cb, + threadgroup); + if (ret < 0) + return ret; return 0; } diff --git a/kernel/sched.c b/kernel/sched.c index 70fa78d..8330e6f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -8715,21 +8715,12 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk, bool threadgroup) { - int retval = cpu_cgroup_can_attach_task(cgrp, tsk); - if (retval) - return retval; - if (threadgroup) { - struct task_struct *c; - rcu_read_lock(); - list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { - retval = cpu_cgroup_can_attach_task(cgrp, c); - if (retval) { - rcu_read_unlock(); - return retval; - } - } - rcu_read_unlock(); - } + int ret = cgroup_can_attach_per_thread(cgrp, tsk, + cpu_cgroup_can_attach_task, + threadgroup); + if (ret) + return ret; + return 0; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/