[<prev] [next>] [day] [month] [year] [list]
Message-ID: <1e4d39d0-8905-4ede-9c9b-c771b0b6016b@redhat.com>
Date: Sun, 29 Jun 2025 19:08:17 -0400
From: Waiman Long <llong@...hat.com>
To: Waiman Long <llong@...hat.com>, Juri Lelli <juri.lelli@...hat.com>,
Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>
Cc: linux-kernel@...r.kernel.org,
Marcel Ziswiler <marcel.ziswiler@...ethink.co.uk>,
Luca Abeni <luca.abeni@...tannapisa.it>
Subject: Re: [PATCH 1/5] sched/deadline: Initialize dl_servers after SMP
Resend again.
On 6/29/25 6:48 PM, Waiman Long wrote:
> On 6/27/25 7:51 AM, Juri Lelli wrote:
>> dl-servers are currently initialized too early at boot when CPUs are not
>> fully up (only boot CPU is). This results in miscalculation of per
>> runqueue DEADLINE variables like extra_bw (which needs a stable CPU
>> count).
>>
>> Move initialization of dl-servers later on after SMP has been
>> initialized and CPUs are all online, so that CPU count is stable and
>> DEADLINE variables can be computed correctly.
>>
>> Fixes: d741f297bceaf ("sched/fair: Fair server interface")
>> Reported-by: Marcel Ziswiler<marcel.ziswiler@...ethink.co.uk>
>> Signed-off-by: Juri Lelli<juri.lelli@...hat.com>
>> ---
>> kernel/sched/core.c | 2 ++
>> kernel/sched/deadline.c | 50 ++++++++++++++++++++++++++---------------
>> kernel/sched/sched.h | 1 +
>> 3 files changed, 35 insertions(+), 18 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 2f8caa9db78d5..89b3ed637465b 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -8371,6 +8371,8 @@ void __init sched_init_smp(void)
>> init_sched_rt_class();
>> init_sched_dl_class();
>>
>> + sched_init_dl_servers();
>> +
>> sched_smp_initialized = true;
>> }
>>
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 0f30697ad7956..c1f223f372968 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -761,6 +761,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
>> struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
>> struct rq *rq = rq_of_dl_rq(dl_rq);
>>
>> + update_rq_clock(rq);
>> +
>> WARN_ON(is_dl_boosted(dl_se));
>> WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
>>
>> @@ -1580,23 +1582,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
>> {
>> struct rq *rq = dl_se->rq;
>>
>> - /*
>> - * XXX: the apply do not work fine at the init phase for the
>> - * fair server because things are not yet set. We need to improve
>> - * this before getting generic.
>> - */
>> - if (!dl_server(dl_se)) {
>> - u64 runtime = 50 * NSEC_PER_MSEC;
>> - u64 period = 1000 * NSEC_PER_MSEC;
>> -
>> - dl_server_apply_params(dl_se, runtime, period, 1);
>> -
>> - dl_se->dl_server = 1;
>> - dl_se->dl_defer = 1;
>> - setup_new_dl_entity(dl_se);
>> - }
>> -
>> - if (!dl_se->dl_runtime)
>> + if (!dl_server(dl_se))
>> return;
>>
>> dl_se->dl_server_active = 1;
>> @@ -1607,7 +1593,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
>>
>> void dl_server_stop(struct sched_dl_entity *dl_se)
>> {
>> - if (!dl_se->dl_runtime)
>> + if (!dl_server(dl_se) || !dl_server_active(dl_se))
>> return;
>>
>> dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
>> @@ -1626,6 +1612,32 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
>> dl_se->server_pick_task = pick_task;
>> }
>>
>> +void sched_init_dl_servers(void)
>> +{
>> + int cpu;
>> + struct rq *rq;
>> + struct sched_dl_entity *dl_se;
>> +
>> + for_each_online_cpu(cpu) {
>> + u64 runtime = 50 * NSEC_PER_MSEC;
>> + u64 period = 1000 * NSEC_PER_MSEC;
>> +
>> + rq = cpu_rq(cpu);
>> +
>> + guard(rq_lock_irq)(rq);
>> +
>> + dl_se = &rq->fair_server;
>> +
>> + WARN_ON(dl_server(dl_se));
>> +
>> + dl_server_apply_params(dl_se, runtime, period, 1);
>> +
>> + dl_se->dl_server = 1;
>> + dl_se->dl_defer = 1;
>> + setup_new_dl_entity(dl_se);
>> + }
>> +}
>> +
>> void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
>> {
>> u64 new_bw = dl_se->dl_bw;
>> @@ -1652,6 +1664,8 @@ int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 perio
>> int retval = 0;
>> int cpus;
>>
>> + guard(rcu)();
>> +
>
> Your patch doesn't explain why a RCU guard is needed here?
> sched_init_dl_servers() is the changed caller, but it is called with
> rq_lock_irq held which should implies a RCU read critical section as
> IRQ is disabled.
>
> Cheers, Longman
>
Powered by blists - more mailing lists