[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <da42f1d7-adfe-485e-987e-3e8dae78b4c2@gmail.com>
Date: Wed, 15 Nov 2023 23:36:06 +0800
From: Yiwei Lin <s921975628@...il.com>
To: Abel Wu <wuyun.abel@...edance.com>
Cc: Barry Song <21cnbao@...il.com>,
Benjamin Segall <bsegall@...gle.com>,
Chen Yu <yu.c.chen@...el.com>,
Daniel Jordan <daniel.m.jordan@...cle.com>,
"Gautham R . Shenoy" <gautham.shenoy@....com>,
Joel Fernandes <joel@...lfernandes.org>,
K Prateek Nayak <kprateek.nayak@....com>,
Mike Galbraith <efault@....de>,
Qais Yousef <qyousef@...alina.io>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Yicong Yang <yangyicong@...wei.com>,
Youssef Esmat <youssefesmat@...omium.org>,
linux-kernel@...r.kernel.org,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...nel.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Valentin Schneider <valentin.schneider@....com>
Subject: Re: [PATCH 1/4] sched/eevdf: Fix vruntime adjustment on reweight
On 11/7/23 17:05, Abel Wu wrote:
> vruntime of the (on_rq && !0-lag) entity needs to be adjusted when
> it gets re-weighted, and the calculations can be simplified based
> on the fact that re-weight won't change the w-average of all the
> entities. Please check the proofs in comments.
>
> But adjusting vruntime can also cause position change in RB-tree
> hence require re-queue to fix up which might be costly. This might
> be avoided by deferring adjustment to the time the entity actually
> leaves tree (dequeue/pick), but that will negatively affect task
> selection and probably not good enough either.
>
> Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
> Signed-off-by: Abel Wu <wuyun.abel@...edance.com>
> ---
> kernel/sched/fair.c | 151 +++++++++++++++++++++++++++++++++++++-------
> 1 file changed, 128 insertions(+), 23 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8767988242ee..b00d09a9b601 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3666,41 +3666,140 @@ static inline void
> dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
> #endif
>
> +static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se,
> + unsigned long weight)
> +{
> + unsigned long old_weight = se->load.weight;
> + u64 avruntime = avg_vruntime(cfs_rq);
> + s64 vlag, vslice;
> +
> + /*
> + * VRUNTIME
> + * ========
> + *
> + * COROLLARY #1: The virtual runtime of the entity needs to be
> + * adjusted if re-weight at !0-lag point.
> + *
> + * Proof: For contradiction assume this is not true, so we can
> + * re-weight without changing vruntime at !0-lag point.
> + *
> + * Weight VRuntime Avg-VRuntime
> + * before w v V
> + * after w' v' V'
> + *
> + * Since lag needs to be preserved through re-weight:
> + *
> + * lag = (V - v)*w = (V'- v')*w', where v = v'
> + * ==> V' = (V - v)*w/w' + v (1)
> + *
> + * Let W be the total weight of the entities before reweight,
> + * since V' is the new weighted average of entities:
> + *
> + * V' = (WV + w'v - wv) / (W + w' - w) (2)
> + *
> + * by using (1) & (2) we obtain:
> + *
> + * (WV + w'v - wv) / (W + w' - w) = (V - v)*w/w' + v
> + * ==> (WV-Wv+Wv+w'v-wv)/(W+w'-w) = (V - v)*w/w' + v
> + * ==> (WV - Wv)/(W + w' - w) + v = (V - v)*w/w' + v
> + * ==> (V - v)*W/(W + w' - w) = (V - v)*w/w' (3)
> + *
> + * Since we are doing at !0-lag point which means V != v, we
> + * can simplify (3):
> + *
> + * ==> W / (W + w' - w) = w / w'
> + * ==> Ww' = Ww + ww' - ww
> + * ==> W * (w' - w) = w * (w' - w)
> + * ==> W = w (re-weight indicates w' != w)
> + *
> + * So the cfs_rq contains only one entity, hence vruntime of
> + * the entity @v should always equal to the cfs_rq's weighted
> + * average vruntime @V, which means we will always re-weight
> + * at 0-lag point, thus breach assumption. Proof completed.
> + *
> + *
> + * COROLLARY #2: Re-weight does NOT affect weighted average
> + * vruntime of all the entities.
> + *
> + * Proof: According to corollary #1, Eq. (1) should be:
> + *
> + * (V - v)*w = (V' - v')*w'
> + * ==> v' = V' - (V - v)*w/w' (4)
> + *
> + * According to the weighted average formula, we have:
> + *
> + * V' = (WV - wv + w'v') / (W - w + w')
> + * = (WV - wv + w'(V' - (V - v)w/w')) / (W - w + w')
> + * = (WV - wv + w'V' - Vw + wv) / (W - w + w')
> + * = (WV + w'V' - Vw) / (W - w + w')
> + *
> + * ==> V'*(W - w + w') = WV + w'V' - Vw
> + * ==> V' * (W - w) = (W - w) * V (5)
> + *
> + * If the entity is the only one in the cfs_rq, then reweight
> + * always occurs at 0-lag point, so V won't change. Or else
> + * there are other entities, hence W != w, then Eq. (5) turns
> + * into V' = V. So V won't change in either case, proof done.
> + *
> + *
> + * So according to corollary #1 & #2, the effect of re-weight
> + * on vruntime should be:
> + *
> + * v' = V' - (V - v) * w / w' (4)
> + * = V - (V - v) * w / w'
> + * = V - vl * w / w'
> + * = V - vl'
> + */
> + if (avruntime != se->vruntime) {
> + vlag = (s64)(avruntime - se->vruntime);
> + vlag = div_s64(vlag * old_weight, weight);
> + se->vruntime = avruntime - vlag;
> + }
> +
> + /*
> + * DEADLINE
> + * ========
> + *
> + * When the weight changes, the virtual time slope changes and
> + * we should adjust the relative virtual deadline accordingly.
> + *
> + * d' = v' + (d - v)*w/w'
> + * = V' - (V - v)*w/w' + (d - v)*w/w'
> + * = V - (V - v)*w/w' + (d - v)*w/w'
> + * = V + (d - V)*w/w'
> + */
> + vslice = (s64)(se->deadline - avruntime);
> + vslice = div_s64(vslice * old_weight, weight);
> + se->deadline = avruntime + vslice;
> +}
> +
> static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
> unsigned long weight)
> {
> - unsigned long old_weight = se->load.weight;
> + bool curr = cfs_rq->curr == se;
>
> if (se->on_rq) {
> /* commit outstanding execution time */
> - if (cfs_rq->curr == se)
> + if (curr)
> update_curr(cfs_rq);
> else
> - avg_vruntime_sub(cfs_rq, se);
> + __dequeue_entity(cfs_rq, se);
> update_load_sub(&cfs_rq->load, se->load.weight);
> }
> dequeue_load_avg(cfs_rq, se);
>
> - update_load_set(&se->load, weight);
> -
> if (!se->on_rq) {
> /*
> * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
> * we need to scale se->vlag when w_i changes.
> */
> - se->vlag = div_s64(se->vlag * old_weight, weight);
> + se->vlag = div_s64(se->vlag * se->load.weight, weight);
> } else {
> - s64 deadline = se->deadline - se->vruntime;
> - /*
> - * When the weight changes, the virtual time slope changes and
> - * we should adjust the relative virtual deadline accordingly.
> - */
> - deadline = div_s64(deadline * old_weight, weight);
> - se->deadline = se->vruntime + deadline;
> - if (se != cfs_rq->curr)
> - min_deadline_cb_propagate(&se->run_node, NULL);
> + reweight_eevdf(cfs_rq, se, weight);
> }
>
> + update_load_set(&se->load, weight);
> +
> #ifdef CONFIG_SMP
> do {
> u32 divider = get_pelt_divider(&se->avg);
> @@ -3712,8 +3811,17 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
> enqueue_load_avg(cfs_rq, se);
> if (se->on_rq) {
> update_load_add(&cfs_rq->load, se->load.weight);
> - if (cfs_rq->curr != se)
> - avg_vruntime_add(cfs_rq, se);
> + if (!curr) {
> + /*
> + * The entity's vruntime has been adjusted, so let's check
> + * whether the rq-wide min_vruntime needs updated too. Since
> + * the calculations above require stable min_vruntime rather
> + * than up-to-date one, we do the update at the end of the
> + * reweight process.
> + */
> + __enqueue_entity(cfs_rq, se);
> + update_min_vruntime(cfs_rq);
> + }
> }
> }
Sorry if I am asking stupid question...... It looks like
reweight_entity() may have chance to change the weight of cfs_rq->curr
entity, but we'll never update_min_vruntime() when reweighting it. Is
there any reason that we can skip the update_min_vruntime() for this case?
>
> @@ -3857,14 +3965,11 @@ static void update_cfs_group(struct sched_entity *se)
>
> #ifndef CONFIG_SMP
> shares = READ_ONCE(gcfs_rq->tg->shares);
> -
> - if (likely(se->load.weight == shares))
> - return;
> #else
> - shares = calc_group_shares(gcfs_rq);
> + shares = calc_group_shares(gcfs_rq);
> #endif
> -
> - reweight_entity(cfs_rq_of(se), se, shares);
> + if (unlikely(se->load.weight != shares))
> + reweight_entity(cfs_rq_of(se), se, shares);
> }
>
> #else /* CONFIG_FAIR_GROUP_SCHED */
Thanks,
Yiwei Lin
Powered by blists - more mailing lists