From: Vincent Guittot A new fair task is detached and attached from/to task_group with: cgroup_post_fork() ss->fork(child) := cpu_cgroup_fork() sched_move_task() task_move_group_fair() Which is wrong, because at this point in fork() the task isn't fully initialized and it cannot 'move' to another group, because its not attached to any group as yet. In fact, cpu_cgroup_fork needs a small part of sched_move_task so we can just call this small part directly instead sched_move_task. And the task doesn't really migrate because it is not yet attached so we need the sequence: do_fork() sched_fork() __set_task_cpu() cgroup_post_fork() set_task_rq() # set task group and runqueue wake_up_new_task() select_task_rq() can select a new cpu __set_task_cpu post_init_entity_util_avg attach_task_cfs_rq() activate_task enqueue_task This patch makes that happen. Maybe-Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/core.c | 67 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 20 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7743,27 +7743,17 @@ void sched_offline_group(struct task_gro spin_unlock_irqrestore(&task_group_lock, flags); } -/* change task's runqueue when it moves between groups. - * The caller of this function should have put the task in its new group - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to - * reflect its new group. +/* + * Set task's runqueue and group. + * + * In case of a move between group, we update src and dst group thanks to + * sched_class->task_move_group. Otherwise, we just need to set runqueue and + * group pointers. The task will be attached to the runqueue during its wake + * up. */ -void sched_move_task(struct task_struct *tsk) +static void sched_set_group(struct task_struct *tsk, bool move) { struct task_group *tg; - int queued, running; - struct rq_flags rf; - struct rq *rq; - - rq = task_rq_lock(tsk, &rf); - - running = task_current(rq, tsk); - queued = task_on_rq_queued(tsk); - - if (queued) - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); - if (unlikely(running)) - put_prev_task(rq, tsk); /* * All callers are synchronized by task_rq_lock(); we do not use RCU @@ -7776,11 +7766,37 @@ void sched_move_task(struct task_struct tsk->sched_task_group = tg; #ifdef CONFIG_FAIR_GROUP_SCHED - if (tsk->sched_class->task_move_group) + if (move && tsk->sched_class->task_move_group) tsk->sched_class->task_move_group(tsk); else #endif set_task_rq(tsk, task_cpu(tsk)); +} + +/* + * Change task's runqueue when it moves between groups. + * + * The caller of this function should have put the task in its new group by + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect + * its new group. + */ +void sched_move_task(struct task_struct *tsk) +{ + int queued, running; + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(tsk, &rf); + + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); + + if (queued) + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE); + if (unlikely(running)) + put_prev_task(rq, tsk); + + sched_set_group(tsk, true); if (unlikely(running)) tsk->sched_class->set_curr_task(rq); @@ -8208,9 +8224,20 @@ static void cpu_cgroup_css_free(struct c sched_free_group(tg); } +/* + * This is called before wake_up_new_task(), therefore we really only + * have to set its group bits, all the other stuff does not apply. + */ static void cpu_cgroup_fork(struct task_struct *task) { - sched_move_task(task); + struct rq_flags rf; + struct rq *rq; + + rq = task_rq_lock(task, &rf); + + sched_set_group(task, false); + + task_rq_unlock(rq, task, &rf); } static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)