linux-kernel - Re: [PATCH 1/5] sched/deadline: Initialize dl

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <1e4d39d0-8905-4ede-9c9b-c771b0b6016b@redhat.com>
Date: Sun, 29 Jun 2025 19:08:17 -0400
From: Waiman Long <llong@...hat.com>
To: Waiman Long <llong@...hat.com>, Juri Lelli <juri.lelli@...hat.com>,
 Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
 Vincent Guittot <vincent.guittot@...aro.org>,
 Dietmar Eggemann <dietmar.eggemann@....com>,
 Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
 Mel Gorman <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>
Cc: linux-kernel@...r.kernel.org,
 Marcel Ziswiler <marcel.ziswiler@...ethink.co.uk>,
 Luca Abeni <luca.abeni@...tannapisa.it>
Subject: Re: [PATCH 1/5] sched/deadline: Initialize dl_servers after SMP

Resend again.

On 6/29/25 6:48 PM, Waiman Long wrote:
> On 6/27/25 7:51 AM, Juri Lelli wrote:
>> dl-servers are currently initialized too early at boot when CPUs are not
>> fully up (only boot CPU is). This results in miscalculation of per
>> runqueue DEADLINE variables like extra_bw (which needs a stable CPU
>> count).
>>
>> Move initialization of dl-servers later on after SMP has been
>> initialized and CPUs are all online, so that CPU count is stable and
>> DEADLINE variables can be computed correctly.
>>
>> Fixes: d741f297bceaf ("sched/fair: Fair server interface")
>> Reported-by: Marcel Ziswiler<marcel.ziswiler@...ethink.co.uk>
>> Signed-off-by: Juri Lelli<juri.lelli@...hat.com>
>> ---
>>   kernel/sched/core.c     |  2 ++
>>   kernel/sched/deadline.c | 50 ++++++++++++++++++++++++++---------------
>>   kernel/sched/sched.h    |  1 +
>>   3 files changed, 35 insertions(+), 18 deletions(-)
>>
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 2f8caa9db78d5..89b3ed637465b 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -8371,6 +8371,8 @@ void __init sched_init_smp(void)
>>   	init_sched_rt_class();
>>   	init_sched_dl_class();
>>   
>> +	sched_init_dl_servers();
>> +
>>   	sched_smp_initialized = true;
>>   }
>>   
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 0f30697ad7956..c1f223f372968 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -761,6 +761,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
>>   	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
>>   	struct rq *rq = rq_of_dl_rq(dl_rq);
>>   
>> +	update_rq_clock(rq);
>> +
>>   	WARN_ON(is_dl_boosted(dl_se));
>>   	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
>>   
>> @@ -1580,23 +1582,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
>>   {
>>   	struct rq *rq = dl_se->rq;
>>   
>> -	/*
>> -	 * XXX: the apply do not work fine at the init phase for the
>> -	 * fair server because things are not yet set. We need to improve
>> -	 * this before getting generic.
>> -	 */
>> -	if (!dl_server(dl_se)) {
>> -		u64 runtime =  50 * NSEC_PER_MSEC;
>> -		u64 period = 1000 * NSEC_PER_MSEC;
>> -
>> -		dl_server_apply_params(dl_se, runtime, period, 1);
>> -
>> -		dl_se->dl_server = 1;
>> -		dl_se->dl_defer = 1;
>> -		setup_new_dl_entity(dl_se);
>> -	}
>> -
>> -	if (!dl_se->dl_runtime)
>> +	if (!dl_server(dl_se))
>>   		return;
>>   
>>   	dl_se->dl_server_active = 1;
>> @@ -1607,7 +1593,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
>>   
>>   void dl_server_stop(struct sched_dl_entity *dl_se)
>>   {
>> -	if (!dl_se->dl_runtime)
>> +	if (!dl_server(dl_se) || !dl_server_active(dl_se))
>>   		return;
>>   
>>   	dequeue_dl_entity(dl_se, DEQUEUE_SLEEP);
>> @@ -1626,6 +1612,32 @@ void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
>>   	dl_se->server_pick_task = pick_task;
>>   }
>>   
>> +void sched_init_dl_servers(void)
>> +{
>> +	int cpu;
>> +	struct rq *rq;
>> +	struct sched_dl_entity *dl_se;
>> +
>> +	for_each_online_cpu(cpu) {
>> +		u64 runtime =  50 * NSEC_PER_MSEC;
>> +		u64 period = 1000 * NSEC_PER_MSEC;
>> +
>> +		rq = cpu_rq(cpu);
>> +
>> +		guard(rq_lock_irq)(rq);
>> +
>> +		dl_se = &rq->fair_server;
>> +
>> +		WARN_ON(dl_server(dl_se));
>> +
>> +		dl_server_apply_params(dl_se, runtime, period, 1);
>> +
>> +		dl_se->dl_server = 1;
>> +		dl_se->dl_defer = 1;
>> +		setup_new_dl_entity(dl_se);
>> +	}
>> +}
>> +
>>   void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq)
>>   {
>>   	u64 new_bw = dl_se->dl_bw;
>> @@ -1652,6 +1664,8 @@ int dl_server_apply_params(struct sched_dl_entity *dl_se, u64 runtime, u64 perio
>>   	int retval = 0;
>>   	int cpus;
>>   
>> +	guard(rcu)();
>> +
>
> Your patch doesn't explain why a RCU guard is needed here? 
> sched_init_dl_servers() is the changed caller, but it is called with 
> rq_lock_irq held which should implies a RCU read critical section as 
> IRQ is disabled.
>
> Cheers, Longman
>