[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAB8ipk_xXY7Dia32NzCpva_Bi1L7ijTHjZHA_riCUwj7e4PtpA@mail.gmail.com>
Date: Thu, 6 Mar 2025 20:03:34 +0800
From: Xuewen Yan <xuewen.yan94@...il.com>
To: Hongyan Xia <hongyan.xia2@....com>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>, Tejun Heo <tj@...nel.org>, David Vernet <void@...ifault.com>,
Andrea Righi <arighi@...dia.com>, Changwoo Min <changwoo@...lia.com>, linux-kernel@...r.kernel.org,
Xuewen Yan <xuewen.yan@...soc.com>
Subject: Re: [PATCH] sched/uclamp: Let each sched_class handle uclamp
Hi Hongyan,
On Thu, Feb 27, 2025 at 9:55 PM Hongyan Xia <hongyan.xia2@....com> wrote:
>
> While delayed dequeue issues were being resolved, uclamp was made out of
> sync with cpufreq, especially in enqueue_task().
>
> For example, when a task with uclamp_min goes through enqueue_task() and
> updates cpufreq, its uclamp_min won't even be considered in the cpufreq
> update. It is only after enqueue will the uclamp_min be added to rq
> buckets, and cpufreq will only pick it up at the next update. This is
> very different from the old behavior, where a uclamp value immediately
> has an effect at enqueue. Worse, sub classes like fair.c issue cpufreq
> updates on utilization changes. If no utilization changes for a while,
> the new uclamp will be delayed further.
>
> So, let each sched_class handle uclamp in its own class, in case delayed
> dequeue needs further tweaks or there are potential future similar
> changes, and make sure uclamp is picked up immediately on enqueue. In
> fair.c, we re-use the guard logic for util_est.
>
> Signed-off-by: Hongyan Xia <hongyan.xia2@....com>
> ---
> kernel/sched/core.c | 28 ++--------------------------
> kernel/sched/ext.c | 8 ++++----
> kernel/sched/fair.c | 12 ++++++------
> kernel/sched/rt.c | 8 ++++----
> kernel/sched/sched.h | 9 +++++----
> 5 files changed, 21 insertions(+), 44 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index b00f884701a6..2d51608a4c46 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1745,7 +1745,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> }
> }
>
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> +void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> {
> enum uclamp_id clamp_id;
>
> @@ -1758,12 +1758,6 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> if (!static_branch_unlikely(&sched_uclamp_used))
> return;
>
> - if (unlikely(!p->sched_class->uclamp_enabled))
> - return;
> -
> - if (p->se.sched_delayed)
> - return;
> -
> for_each_clamp_id(clamp_id)
> uclamp_rq_inc_id(rq, p, clamp_id);
>
> @@ -1772,7 +1766,7 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
> }
>
> -static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
> +void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
> {
> enum uclamp_id clamp_id;
>
> @@ -1785,12 +1779,6 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
> if (!static_branch_unlikely(&sched_uclamp_used))
> return;
>
> - if (unlikely(!p->sched_class->uclamp_enabled))
> - return;
> -
> - if (p->se.sched_delayed)
> - return;
> -
> for_each_clamp_id(clamp_id)
> uclamp_rq_dec_id(rq, p, clamp_id);
> }
> @@ -2029,8 +2017,6 @@ static void __init init_uclamp(void)
> }
>
> #else /* !CONFIG_UCLAMP_TASK */
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> -static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> static inline void uclamp_fork(struct task_struct *p) { }
> static inline void uclamp_post_fork(struct task_struct *p) { }
> static inline void init_uclamp(void) { }
> @@ -2066,11 +2052,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> update_rq_clock(rq);
>
> p->sched_class->enqueue_task(rq, p, flags);
> - /*
> - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> - * ->sched_delayed.
> - */
> - uclamp_rq_inc(rq, p);
>
> psi_enqueue(p, flags);
>
> @@ -2097,11 +2078,6 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
>
> psi_dequeue(p, flags);
>
> - /*
> - * Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
> - * and mark the task ->sched_delayed.
> - */
> - uclamp_rq_dec(rq, p);
> return p->sched_class->dequeue_task(rq, p, flags);
> }
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 8857c0709bdd..4521c27f9ab8 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -2094,6 +2094,8 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
> {
> int sticky_cpu = p->scx.sticky_cpu;
>
> + uclamp_rq_inc(rq, p);
> +
> if (enq_flags & ENQUEUE_WAKEUP)
> rq->scx.flags |= SCX_RQ_IN_WAKEUP;
>
> @@ -2181,6 +2183,8 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
>
> static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
> {
> + uclamp_rq_dec(rq, p);
> +
> if (!(p->scx.flags & SCX_TASK_QUEUED)) {
> WARN_ON_ONCE(task_runnable(p));
> return true;
> @@ -4456,10 +4460,6 @@ DEFINE_SCHED_CLASS(ext) = {
> .prio_changed = prio_changed_scx,
>
> .update_curr = update_curr_scx,
> -
> -#ifdef CONFIG_UCLAMP_TASK
> - .uclamp_enabled = 1,
> -#endif
> };
>
> static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 857808da23d8..7e5a653811ad 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6941,8 +6941,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> * Let's add the task's estimated utilization to the cfs_rq's
> * estimated utilization, before we update schedutil.
> */
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> + if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE)))) {
> + uclamp_rq_inc(rq, p);
> util_est_enqueue(&rq->cfs, p);
> + }
>
> if (flags & ENQUEUE_DELAYED) {
> requeue_delayed_entity(se);
> @@ -7183,8 +7185,10 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> */
> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> + if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE)))) {
> + uclamp_rq_dec(rq, p);
> util_est_dequeue(&rq->cfs, p);
> + }
>
> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
> if (dequeue_entities(rq, &p->se, flags) < 0)
> @@ -13660,10 +13664,6 @@ DEFINE_SCHED_CLASS(fair) = {
> #ifdef CONFIG_SCHED_CORE
> .task_is_throttled = task_is_throttled_fair,
> #endif
> -
> -#ifdef CONFIG_UCLAMP_TASK
> - .uclamp_enabled = 1,
> -#endif
> };
>
> #ifdef CONFIG_SCHED_DEBUG
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 4b8e33c615b1..7c0642ea85f2 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1471,6 +1471,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> {
> struct sched_rt_entity *rt_se = &p->rt;
>
> + uclamp_rq_inc(rq, p);
> +
> if (flags & ENQUEUE_WAKEUP)
> rt_se->timeout = 0;
>
> @@ -1487,6 +1489,8 @@ static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> {
> struct sched_rt_entity *rt_se = &p->rt;
>
> + uclamp_rq_dec(rq, p);
> +
> update_curr_rt(rq);
> dequeue_rt_entity(rt_se, flags);
>
> @@ -2649,10 +2653,6 @@ DEFINE_SCHED_CLASS(rt) = {
> #ifdef CONFIG_SCHED_CORE
> .task_is_throttled = task_is_throttled_rt,
> #endif
> -
> -#ifdef CONFIG_UCLAMP_TASK
> - .uclamp_enabled = 1,
> -#endif
> };
>
> #ifdef CONFIG_RT_GROUP_SCHED
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ab16d3d0e51c..990d87e8d8ed 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2410,10 +2410,6 @@ extern s64 update_curr_common(struct rq *rq);
>
> struct sched_class {
>
> -#ifdef CONFIG_UCLAMP_TASK
> - int uclamp_enabled;
> -#endif
Why delete the uclamp_enable?
> -
> void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
> bool (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
> void (*yield_task) (struct rq *rq);
> @@ -3393,6 +3389,8 @@ static inline bool update_other_load_avgs(struct rq *rq) { return false; }
> #ifdef CONFIG_UCLAMP_TASK
>
> unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
> +void uclamp_rq_inc(struct rq *rq, struct task_struct *p);
> +void uclamp_rq_dec(struct rq *rq, struct task_struct *p);
>
> static inline unsigned long uclamp_rq_get(struct rq *rq,
> enum uclamp_id clamp_id)
> @@ -3470,6 +3468,9 @@ uclamp_se_set(struct uclamp_se *uc_se, unsigned int value, bool user_defined)
>
> #else /* !CONFIG_UCLAMP_TASK: */
>
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { };
> +static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { };
> +
> static inline unsigned long
> uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
> {
> --
> 2.34.1
>
>
Powered by blists - more mailing lists