lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aYHJuvVaa8xNbuHR@gpd4>
Date: Tue, 3 Feb 2026 11:11:06 +0100
From: Andrea Righi <arighi@...dia.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...hat.com>, Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>, Tejun Heo <tj@...nel.org>,
	Joel Fernandes <joelagnelf@...dia.com>,
	David Vernet <void@...ifault.com>,
	Changwoo Min <changwoo@...lia.com>,
	Daniel Hodges <hodgesd@...a.com>,
	Christian Loehle <christian.loehle@....com>,
	Emil Tsalapatis <emil@...alapatis.com>, sched-ext@...ts.linux.dev,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 3/7] sched/debug: Stop and start server based on if it
 was active

Hi Peter,

On Mon, Feb 02, 2026 at 10:17:23PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 02, 2026 at 10:13:26PM +0100, Peter Zijlstra wrote:
> > On Mon, Jan 26, 2026 at 10:59:01AM +0100, Andrea Righi wrote:
> > > From: Joel Fernandes <joelagnelf@...dia.com>
> > > 
> > > Currently the DL server interface for applying parameters checks
> > > CFS-internals to identify if the server is active. This is error-prone
> > > and makes it difficult when adding new servers in the future.
> > > 
> > > Fix it, by using dl_server_active() which is also used by the DL server
> > > code to determine if the DL server was started.
> > > 
> > > Tested-by: Christian Loehle <christian.loehle@....com>
> > > Acked-by: Tejun Heo <tj@...nel.org>
> > > Reviewed-by: Juri Lelli <juri.lelli@...hat.com>
> > > Reviewed-by: Andrea Righi <arighi@...dia.com>
> > > Signed-off-by: Joel Fernandes <joelagnelf@...dia.com>
> > > ---
> > >  kernel/sched/debug.c | 11 ++++++++---
> > >  1 file changed, 8 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> > > index 93f009e1076d8..dd793f8f3858a 100644
> > > --- a/kernel/sched/debug.c
> > > +++ b/kernel/sched/debug.c
> > > @@ -354,6 +354,8 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
> > >  		return err;
> > >  
> > >  	scoped_guard (rq_lock_irqsave, rq) {
> > > +		bool is_active;
> > > +
> > >  		runtime  = rq->fair_server.dl_runtime;
> > >  		period = rq->fair_server.dl_period;
> > >  
> > > @@ -376,8 +378,11 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
> > >  			return  -EINVAL;
> > >  		}
> > >  
> > > -		update_rq_clock(rq);
> > > -		dl_server_stop(&rq->fair_server);
> > > +		is_active = dl_server_active(&rq->fair_server);
> > > +		if (is_active) {
> > > +			update_rq_clock(rq);
> > > +			dl_server_stop(&rq->fair_server);
> > > +		}
> > >  
> > >  		retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0);
> > >  
> > > @@ -385,7 +390,7 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu
> > >  			printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n",
> > >  					cpu_of(rq));
> > >  
> > > -		if (rq->cfs.h_nr_queued)
> > > +		if (is_active && runtime)
> > >  			dl_server_start(&rq->fair_server);
> > >  
> > >  		if (retval < 0)
> > 
> > Suppose runtime was 0, and gets incremented while there are already
> > tasks enqueued, then the above isn't going to DTRT.
> 
> Something like so perhaps?
> 
> ---
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 59e650f9d436..884bdf7a292f 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -340,7 +340,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
>  	long cpu = (long) ((struct seq_file *) filp->private_data)->private;
>  	struct rq *rq = cpu_rq(cpu);
>  	struct sched_dl_entity *dl_se = (struct sched_dl_entity *)server;
> -	u64 runtime, period;
> +	u64 old_runtime, runtime, period;
>  	int retval = 0;
>  	size_t err;
>  	u64 value;
> @@ -352,7 +352,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
>  	scoped_guard (rq_lock_irqsave, rq) {
>  		bool is_active;
>  
> -		runtime = dl_se->dl_runtime;
> +		old_runtime = runtime = dl_se->dl_runtime;
>  		period = dl_se->dl_period;
>  
>  		switch (param) {
> @@ -382,17 +382,20 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
>  
>  		retval = dl_server_apply_params(dl_se, runtime, period, 0);
>  
> -		if (!runtime)
> -			printk_deferred("%s server disabled in CPU %d, system may crash due to starvation.\n",
> -					server == &rq->fair_server ? "Fair" : "Ext", cpu_of(rq));
> -
> -		if (is_active && runtime)
> +		if (runtime)
>  			dl_server_start(dl_se);
>  
>  		if (retval < 0)
>  			return retval;
>  	}
>  
> +	if (!!old_runtime ^ !!runtime) {
> +		pr_info("%s server %sabled in CPU %d, system may crash due to starvation.\n",
> +			server == &rq->fair_server ? "Fair" : "Ext",
> +			runtime ? "en" : "dis",
> +			cpu_of(rq));
> +	}
> +
>  	*ppos += cnt;
>  	return cnt;
>  }

I slightly changed your patch (see below), adding a missing
update_rq_clock(rq) before starting the DL server and updated the pr_info
message, as mentioned in my previous email.

I ran some tests, and with this change the DL server starts correctly when
runtime is changed from 0 to a value > 0, so this fixes the issue.

Would you prefer that I send an updated patch set, or should we apply this
fix on top?

Thanks,
-Andrea

---
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2e9896668c6fd..dbd5e67a16c67 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -346,7 +346,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
 	long cpu = (long) ((struct seq_file *) filp->private_data)->private;
 	struct rq *rq = cpu_rq(cpu);
 	struct sched_dl_entity *dl_se = (struct sched_dl_entity *)server;
-	u64 runtime, period;
+	u64 old_runtime, runtime, period;
 	int retval = 0;
 	size_t err;
 	u64 value;
@@ -358,7 +358,7 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
 	scoped_guard (rq_lock_irqsave, rq) {
 		bool is_active;
 
-		runtime = dl_se->dl_runtime;
+		old_runtime = runtime = dl_se->dl_runtime;
 		period = dl_se->dl_period;
 
 		switch (param) {
@@ -388,17 +388,23 @@ static ssize_t sched_server_write_common(struct file *filp, const char __user *u
 
 		retval = dl_server_apply_params(dl_se, runtime, period, 0);
 
-		if (!runtime)
-			printk_deferred("%s server disabled in CPU %d, system may crash due to starvation.\n",
-					server == &rq->fair_server ? "Fair" : "Ext", cpu_of(rq));
-
-		if (is_active && runtime)
+		if (runtime) {
+			update_rq_clock(rq);
 			dl_server_start(dl_se);
+		}
 
 		if (retval < 0)
 			return retval;
 	}
 
+	if (!!old_runtime ^ !!runtime) {
+		pr_info("%s server %sabled in CPU %d%s\n",
+			server == &rq->fair_server ? "Fair" : "Ext",
+			runtime ? "en" : "dis",
+			cpu_of(rq),
+			runtime ? "" : ", system may crash due to starvation");
+	}
+
 	*ppos += cnt;
 	return cnt;
 }
-- 
2.52.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ