[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtD3rLQH=cZSUYnaavMUJFUj6wOiE74uWYrPvknHUfFXwQ@mail.gmail.com>
Date: Wed, 1 Jun 2016 17:26:41 +0200
From: Vincent Guittot <vincent.guittot@...aro.org>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...nel.org>,
linux-kernel <linux-kernel@...r.kernel.org>,
Paul Turner <pjt@...gle.com>, Yuyang Du <yuyang.du@...el.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Benjamin Segall <bsegall@...gle.com>,
Morten Rasmussen <Morten.Rasmussen@....com>
Subject: Re: [RFC PATCH v2] sched: reflect sched_entity movement into
task_group's utilization
On 1 June 2016 at 15:36, Peter Zijlstra <peterz@...radead.org> wrote:
> On Tue, May 24, 2016 at 10:57:32AM +0200, Vincent Guittot wrote:
>
>> +/*
>> + * Save how much utilization has just been added/removed on cfs rq so we can
>> + * propagate it across the whole tg tree
>> + */
>> +static void set_tg_cfs_rq_util(struct cfs_rq *cfs_rq, int delta)
>> +{
>> + if (cfs_rq->tg == &root_task_group)
>> + return;
>> +
>> + cfs_rq->diff_util_avg += delta;
>> +}
>
> function name doesn't really reflect its purpose..
ok, i will change it
>
>> +
>> +/* Take into account the change of the utilization of a child task group */
>
> This comment could be so much better... :-)
yes, i'm going to add more details of what is done
>
>> +static void update_tg_cfs_util(struct sched_entity *se, int blocked)
>> +{
>> + int delta;
>> + struct cfs_rq *cfs_rq;
>> + long update_util_avg;
>> + long last_update_time;
>> + long old_util_avg;
>> +
>> +
>> + /*
>> + * update_blocked_average will call this function for root cfs_rq
>> + * whose se is null. In this case just return
>> + */
>> + if (!se)
>> + return;
>> +
>> + if (entity_is_task(se))
>> + return 0;
>> +
>> + /* Get sched_entity of cfs rq */
>> + cfs_rq = group_cfs_rq(se);
>> +
>> + update_util_avg = cfs_rq->diff_util_avg;
>> +
>> + if (!update_util_avg)
>> + return 0;
>> +
>> + /* Clear pending changes */
>> + cfs_rq->diff_util_avg = 0;
>> +
>> + /* Add changes in sched_entity utilizaton */
>> + old_util_avg = se->avg.util_avg;
>> + se->avg.util_avg = max_t(long, se->avg.util_avg + update_util_avg, 0);
>> + se->avg.util_sum = se->avg.util_avg * LOAD_AVG_MAX;
>> +
>> + /* Get parent cfs_rq */
>> + cfs_rq = cfs_rq_of(se);
>> +
>> + if (blocked) {
>> + /*
>> + * blocked utilization has to be synchronized with its parent
>> + * cfs_rq's timestamp
>> + */
>> + last_update_time = cfs_rq_last_update_time(cfs_rq);
>> +
>> + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)),
>> + &se->avg,
>> + se->on_rq * scale_load_down(se->load.weight),
>> + cfs_rq->curr == se, NULL);
>> + }
>> +
>> + delta = se->avg.util_avg - old_util_avg;
>> +
>> + cfs_rq->avg.util_avg = max_t(long, cfs_rq->avg.util_avg + delta, 0);
>> + cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * LOAD_AVG_MAX;
>> +
>> + set_tg_cfs_rq_util(cfs_rq, delta);
>> +}
>
> So if I read this right it computes the utilization delta for group se's
> and propagates it into the corresponding parent group cfs_rq ?
yes
>
>> @@ -6276,6 +6368,8 @@ static void update_blocked_averages(int cpu)
>>
>> if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true))
>> update_tg_load_avg(cfs_rq, 0);
>> + /* Propagate pending util changes to the parent */
>> + update_tg_cfs_util(cfs_rq->tg->se[cpu], true);
>
> And this is why you need the strict bottom-up thing fixed..
yes
>
>> @@ -8370,6 +8464,20 @@ static void detach_task_cfs_rq(struct task_struct *p)
>>
>> /* Catch up with the cfs_rq and remove our load when we leave */
>> detach_entity_load_avg(cfs_rq, se);
>> +
>> + /*
>> + * Propagate the detach across the tg tree to ake it visible to the
>> + * root
>> + */
>> + for_each_sched_entity(se) {
>> + cfs_rq = cfs_rq_of(se);
>> +
>> + if (cfs_rq_throttled(cfs_rq))
>> + break;
>> +
>> + update_load_avg(se, 1);
>> + update_tg_cfs_util(se, false);
>> + }
>> }
>>
>> static void attach_task_cfs_rq(struct task_struct *p)
>> @@ -8399,8 +8507,21 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
>>
>> static void switched_to_fair(struct rq *rq, struct task_struct *p)
>> {
>> + struct sched_entity *se = &p->se;
>> + struct cfs_rq *cfs_rq;
>> +
>> attach_task_cfs_rq(p);
>>
>> + for_each_sched_entity(se) {
>> + cfs_rq = cfs_rq_of(se);
>> +
>> + if (cfs_rq_throttled(cfs_rq))
>> + break;
>> +
>> + update_load_avg(se, 1);
>> + update_tg_cfs_util(se, false);
>> + }
>> +
>> if (task_on_rq_queued(p)) {
>> /*
>> * We were most likely switched from sched_rt, so
>
> So I would expect this to be in attach_task_cfs_rq() to mirror the
> detach_task_cfs_rq().
yes. My goal what to rely on the enqueue to the propagate the move
during a task_move_group but it's better to place it in
attach_task_cfs_rq so we have same sequence for attaching and
detaching
>
> Also, this change is somewhat independent but required for the 'flat'
> util measure, right?
don't know if it's needed for flat util measure as the main interest
of flat util measure is to not go through the tg tree
Powered by blists - more mailing lists