[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtAuh2=V5PXFc0jL7m1FphUAM-O0P=3wya7NYXLF0JVtcw@mail.gmail.com>
Date: Fri, 17 Jun 2016 15:58:37 +0200
From: Vincent Guittot <vincent.guittot@...aro.org>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Yuyang Du <yuyang.du@...el.com>, Ingo Molnar <mingo@...nel.org>,
linux-kernel <linux-kernel@...r.kernel.org>,
Mike Galbraith <umgwanakikbuti@...il.com>,
Benjamin Segall <bsegall@...gle.com>,
Paul Turner <pjt@...gle.com>,
Morten Rasmussen <morten.rasmussen@....com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Matt Fleming <matt@...eblueprint.co.uk>
Subject: Re: [PATCH 3/4] sched,cgroup: Fix cpu_cgroup_fork()
On 17 June 2016 at 14:01, Peter Zijlstra <peterz@...radead.org> wrote:
> From: Vincent Guittot <vincent.guittot@...aro.org>
>
> A new fair task is detached and attached from/to task_group with:
>
> cgroup_post_fork()
> ss->fork(child) := cpu_cgroup_fork()
> sched_move_task()
> task_move_group_fair()
>
> Which is wrong, because at this point in fork() the task isn't fully
> initialized and it cannot 'move' to another group, because its not
> attached to any group as yet.
>
> In fact, cpu_cgroup_fork needs a small part of sched_move_task so we
> can just call this small part directly instead sched_move_task. And
> the task doesn't really migrate because it is not yet attached so we
> need the sequence:
>
> do_fork()
> sched_fork()
> __set_task_cpu()
>
> cgroup_post_fork()
> set_task_rq() # set task group and runqueue
>
> wake_up_new_task()
> select_task_rq() can select a new cpu
> __set_task_cpu
> post_init_entity_util_avg
> attach_task_cfs_rq()
> activate_task
> enqueue_task
>
> This patch makes that happen.
>
With this patch and patch 1, the fork sequence looks correct in my test
> Maybe-Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
You can remove the Maybe if you want
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
> kernel/sched/core.c | 67 ++++++++++++++++++++++++++++++++++++----------------
> 1 file changed, 47 insertions(+), 20 deletions(-)
>
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7743,27 +7743,17 @@ void sched_offline_group(struct task_gro
> spin_unlock_irqrestore(&task_group_lock, flags);
> }
>
> -/* change task's runqueue when it moves between groups.
> - * The caller of this function should have put the task in its new group
> - * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
> - * reflect its new group.
> +/*
> + * Set task's runqueue and group.
> + *
> + * In case of a move between group, we update src and dst group thanks to
> + * sched_class->task_move_group. Otherwise, we just need to set runqueue and
> + * group pointers. The task will be attached to the runqueue during its wake
> + * up.
> */
> -void sched_move_task(struct task_struct *tsk)
> +static void sched_set_group(struct task_struct *tsk, bool move)
> {
> struct task_group *tg;
> - int queued, running;
> - struct rq_flags rf;
> - struct rq *rq;
> -
> - rq = task_rq_lock(tsk, &rf);
> -
> - running = task_current(rq, tsk);
> - queued = task_on_rq_queued(tsk);
> -
> - if (queued)
> - dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
> - if (unlikely(running))
> - put_prev_task(rq, tsk);
>
> /*
> * All callers are synchronized by task_rq_lock(); we do not use RCU
> @@ -7776,11 +7766,37 @@ void sched_move_task(struct task_struct
> tsk->sched_task_group = tg;
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> - if (tsk->sched_class->task_move_group)
> + if (move && tsk->sched_class->task_move_group)
> tsk->sched_class->task_move_group(tsk);
> else
> #endif
> set_task_rq(tsk, task_cpu(tsk));
> +}
> +
> +/*
> + * Change task's runqueue when it moves between groups.
> + *
> + * The caller of this function should have put the task in its new group by
> + * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect
> + * its new group.
> + */
> +void sched_move_task(struct task_struct *tsk)
> +{
> + int queued, running;
> + struct rq_flags rf;
> + struct rq *rq;
> +
> + rq = task_rq_lock(tsk, &rf);
> +
> + running = task_current(rq, tsk);
> + queued = task_on_rq_queued(tsk);
> +
> + if (queued)
> + dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
> + if (unlikely(running))
> + put_prev_task(rq, tsk);
> +
> + sched_set_group(tsk, true);
>
> if (unlikely(running))
> tsk->sched_class->set_curr_task(rq);
> @@ -8208,9 +8224,20 @@ static void cpu_cgroup_css_free(struct c
> sched_free_group(tg);
> }
>
> +/*
> + * This is called before wake_up_new_task(), therefore we really only
> + * have to set its group bits, all the other stuff does not apply.
> + */
> static void cpu_cgroup_fork(struct task_struct *task)
> {
> - sched_move_task(task);
> + struct rq_flags rf;
> + struct rq *rq;
> +
> + rq = task_rq_lock(task, &rf);
> +
> + sched_set_group(task, false);
> +
> + task_rq_unlock(rq, task, &rf);
> }
>
> static int cpu_cgroup_can_attach(struct cgroup_taskset *tset)
>
>
Powered by blists - more mailing lists