[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <34521ae6-5dba-4861-a971-8f7c8a37bb04@arm.com>
Date: Thu, 6 Mar 2025 12:24:18 +0000
From: Hongyan Xia <hongyan.xia2@....com>
To: Xuewen Yan <xuewen.yan94@...il.com>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>,
Tejun Heo <tj@...nel.org>, David Vernet <void@...ifault.com>,
Andrea Righi <arighi@...dia.com>, Changwoo Min <changwoo@...lia.com>,
linux-kernel@...r.kernel.org, Xuewen Yan <xuewen.yan@...soc.com>
Subject: Re: [PATCH] sched/uclamp: Let each sched_class handle uclamp
On 06/03/2025 12:03, Xuewen Yan wrote:
> Hi Hongyan,
>
> On Thu, Feb 27, 2025 at 9:55 PM Hongyan Xia <hongyan.xia2@....com> wrote:
>>
>> While delayed dequeue issues were being resolved, uclamp was made out of
>> sync with cpufreq, especially in enqueue_task().
>>
>> For example, when a task with uclamp_min goes through enqueue_task() and
>> updates cpufreq, its uclamp_min won't even be considered in the cpufreq
>> update. It is only after enqueue will the uclamp_min be added to rq
>> buckets, and cpufreq will only pick it up at the next update. This is
>> very different from the old behavior, where a uclamp value immediately
>> has an effect at enqueue. Worse, sub classes like fair.c issue cpufreq
>> updates on utilization changes. If no utilization changes for a while,
>> the new uclamp will be delayed further.
>>
>> So, let each sched_class handle uclamp in its own class, in case delayed
>> dequeue needs further tweaks or there are potential future similar
>> changes, and make sure uclamp is picked up immediately on enqueue. In
>> fair.c, we re-use the guard logic for util_est.
>>
>> Signed-off-by: Hongyan Xia <hongyan.xia2@....com>
>> ---
>> kernel/sched/core.c | 28 ++--------------------------
>> kernel/sched/ext.c | 8 ++++----
>> kernel/sched/fair.c | 12 ++++++------
>> kernel/sched/rt.c | 8 ++++----
>> kernel/sched/sched.h | 9 +++++----
>> 5 files changed, 21 insertions(+), 44 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index b00f884701a6..2d51608a4c46 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -1745,7 +1745,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
>> }
>> }
>>
>> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
>> +void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
>> {
>> enum uclamp_id clamp_id;
>>
>> @@ -1758,12 +1758,6 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
>> if (!static_branch_unlikely(&sched_uclamp_used))
>> return;
>>
>> - if (unlikely(!p->sched_class->uclamp_enabled))
>> - return;
>> -
>> - if (p->se.sched_delayed)
>> - return;
>> -
>> for_each_clamp_id(clamp_id)
>> uclamp_rq_inc_id(rq, p, clamp_id);
>>
>> @@ -1772,7 +1766,7 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
>> rq->uclamp_flags &= ~UCLAMP_FLAG_IDLE;
>> }
>>
>> -static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
>> +void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
>> {
>> enum uclamp_id clamp_id;
>>
>> @@ -1785,12 +1779,6 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
>> if (!static_branch_unlikely(&sched_uclamp_used))
>> return;
>>
>> - if (unlikely(!p->sched_class->uclamp_enabled))
>> - return;
>> -
>> - if (p->se.sched_delayed)
>> - return;
>> -
>> for_each_clamp_id(clamp_id)
>> uclamp_rq_dec_id(rq, p, clamp_id);
>> }
>> @@ -2029,8 +2017,6 @@ static void __init init_uclamp(void)
>> }
>>
>> #else /* !CONFIG_UCLAMP_TASK */
>> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
>> -static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
>> static inline void uclamp_fork(struct task_struct *p) { }
>> static inline void uclamp_post_fork(struct task_struct *p) { }
>> static inline void init_uclamp(void) { }
>> @@ -2066,11 +2052,6 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
>> update_rq_clock(rq);
>>
>> p->sched_class->enqueue_task(rq, p, flags);
>> - /*
>> - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
>> - * ->sched_delayed.
>> - */
>> - uclamp_rq_inc(rq, p);
>>
>> psi_enqueue(p, flags);
>>
>> @@ -2097,11 +2078,6 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
>>
>> psi_dequeue(p, flags);
>>
>> - /*
>> - * Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
>> - * and mark the task ->sched_delayed.
>> - */
>> - uclamp_rq_dec(rq, p);
>> return p->sched_class->dequeue_task(rq, p, flags);
>> }
>>
>> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
>> index 8857c0709bdd..4521c27f9ab8 100644
>> --- a/kernel/sched/ext.c
>> +++ b/kernel/sched/ext.c
>> @@ -2094,6 +2094,8 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
>> {
>> int sticky_cpu = p->scx.sticky_cpu;
>>
>> + uclamp_rq_inc(rq, p);
>> +
>> if (enq_flags & ENQUEUE_WAKEUP)
>> rq->scx.flags |= SCX_RQ_IN_WAKEUP;
>>
>> @@ -2181,6 +2183,8 @@ static void ops_dequeue(struct task_struct *p, u64 deq_flags)
>>
>> static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags)
>> {
>> + uclamp_rq_dec(rq, p);
>> +
>> if (!(p->scx.flags & SCX_TASK_QUEUED)) {
>> WARN_ON_ONCE(task_runnable(p));
>> return true;
>> @@ -4456,10 +4460,6 @@ DEFINE_SCHED_CLASS(ext) = {
>> .prio_changed = prio_changed_scx,
>>
>> .update_curr = update_curr_scx,
>> -
>> -#ifdef CONFIG_UCLAMP_TASK
>> - .uclamp_enabled = 1,
>> -#endif
>> };
>>
>> static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 857808da23d8..7e5a653811ad 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -6941,8 +6941,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>> * Let's add the task's estimated utilization to the cfs_rq's
>> * estimated utilization, before we update schedutil.
>> */
>> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
>> + if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE)))) {
>> + uclamp_rq_inc(rq, p);
>> util_est_enqueue(&rq->cfs, p);
>> + }
>>
>> if (flags & ENQUEUE_DELAYED) {
>> requeue_delayed_entity(se);
>> @@ -7183,8 +7185,10 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
>> */
>> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>> {
>> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
>> + if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE)))) {
>> + uclamp_rq_dec(rq, p);
>> util_est_dequeue(&rq->cfs, p);
>> + }
>>
>> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
>> if (dequeue_entities(rq, &p->se, flags) < 0)
>> @@ -13660,10 +13664,6 @@ DEFINE_SCHED_CLASS(fair) = {
>> #ifdef CONFIG_SCHED_CORE
>> .task_is_throttled = task_is_throttled_fair,
>> #endif
>> -
>> -#ifdef CONFIG_UCLAMP_TASK
>> - .uclamp_enabled = 1,
>> -#endif
>> };
>>
>> #ifdef CONFIG_SCHED_DEBUG
>> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
>> index 4b8e33c615b1..7c0642ea85f2 100644
>> --- a/kernel/sched/rt.c
>> +++ b/kernel/sched/rt.c
>> @@ -1471,6 +1471,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
>> {
>> struct sched_rt_entity *rt_se = &p->rt;
>>
>> + uclamp_rq_inc(rq, p);
>> +
>> if (flags & ENQUEUE_WAKEUP)
>> rt_se->timeout = 0;
>>
>> @@ -1487,6 +1489,8 @@ static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
>> {
>> struct sched_rt_entity *rt_se = &p->rt;
>>
>> + uclamp_rq_dec(rq, p);
>> +
>> update_curr_rt(rq);
>> dequeue_rt_entity(rt_se, flags);
>>
>> @@ -2649,10 +2653,6 @@ DEFINE_SCHED_CLASS(rt) = {
>> #ifdef CONFIG_SCHED_CORE
>> .task_is_throttled = task_is_throttled_rt,
>> #endif
>> -
>> -#ifdef CONFIG_UCLAMP_TASK
>> - .uclamp_enabled = 1,
>> -#endif
>> };
>>
>> #ifdef CONFIG_RT_GROUP_SCHED
>> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>> index ab16d3d0e51c..990d87e8d8ed 100644
>> --- a/kernel/sched/sched.h
>> +++ b/kernel/sched/sched.h
>> @@ -2410,10 +2410,6 @@ extern s64 update_curr_common(struct rq *rq);
>>
>> struct sched_class {
>>
>> -#ifdef CONFIG_UCLAMP_TASK
>> - int uclamp_enabled;
>> -#endif
>
> Why delete the uclamp_enable?
>
After moving uclamp enqueue dequeue into each sched_class, we no longer
check sched_class->uclamp_enabled in core.c and it won't be used anywhere.
>> -
>> void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
>> bool (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
>> void (*yield_task) (struct rq *rq);
>> @@ -3393,6 +3389,8 @@ static inline bool update_other_load_avgs(struct rq *rq) { return false; }
>> #ifdef CONFIG_UCLAMP_TASK
>>
>> unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
>> +void uclamp_rq_inc(struct rq *rq, struct task_struct *p);
>> +void uclamp_rq_dec(struct rq *rq, struct task_struct *p);
>>
>> static inline unsigned long uclamp_rq_get(struct rq *rq,
>> enum uclamp_id clamp_id)
>> @@ -3470,6 +3468,9 @@ uclamp_se_set(struct uclamp_se *uc_se, unsigned int value, bool user_defined)
>>
>> #else /* !CONFIG_UCLAMP_TASK: */
>>
>> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { };
>> +static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { };
>> +
>> static inline unsigned long
>> uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id)
>> {
>> --
>> 2.34.1
>>
>>
Powered by blists - more mailing lists