lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aHpYvwRizPkUe8Iv@jlelli-thinkpadt14gen4.remote.csb>
Date: Fri, 18 Jul 2025 16:22:55 +0200
From: Juri Lelli <juri.lelli@...hat.com>
To: Yuri Andriaccio <yurand2000@...il.com>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	linux-kernel@...r.kernel.org,
	Luca Abeni <luca.abeni@...tannapisa.it>,
	Yuri Andriaccio <yuri.andriaccio@...tannapisa.it>
Subject: Re: [BUG] Bw accounting warning on fair-servers' parameters change

Hi,

Thanks for reporting.

On 18/07/25 13:38, Yuri Andriaccio wrote:
> Hi,
> 
> I've been lately working on fair-servers and dl_servers for some patches and
> I've come across a bandwidth accounting warning on the latest tip/master (as of
> 2025-07-18, git sha ed0272f0675f). The warning is triggered by simply starting
> the machine, mounting debugfs and then just zeroing any fair-server's runtime.
> 
> 
> The warning:
> 
> WARNING: kernel/sched/deadline.c:266 at dl_rq_change_utilization+0x208/0x230
> static inline void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) {
>     ...
> 	WARN_ON_ONCE(dl_rq->running_bw > dl_rq->this_bw);
> }
> 
> Steps to reproduce:
> 
> mount -t debugfs none /sys/kernel/debug
> echo 0 > /sys/kernel/debug/sched/fair_server/cpu0/runtime
> 
> 
> It does not happen at every machine boot, but happens on most. Could it possibly
> be related to some of the deadline timers?

I took a quick first look and currently suspect cccb45d7c4295
("sched/deadline: Less agressive dl_server handling") could be playing a
role in this as it delays actual server stop.

Could you please try to repro after having reverted such commit?

Thanks,
Juri

---
 include/linux/sched.h   |  1 -
 kernel/sched/deadline.c | 25 +++----------------------
 kernel/sched/fair.c     |  9 +++++++++
 3 files changed, 12 insertions(+), 23 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5b4e1cd52e27a..e7ed7ae1871d7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -703,7 +703,6 @@ struct sched_dl_entity {
 	unsigned int			dl_defer	  : 1;
 	unsigned int			dl_defer_armed	  : 1;
 	unsigned int			dl_defer_running  : 1;
-	unsigned int			dl_server_idle    : 1;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e2d51f4306b31..1ad1924ee767a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
 /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
 static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
 
-static bool dl_server_stopped(struct sched_dl_entity *dl_se);
-
 static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
 {
 	struct rq *rq = rq_of_dl_se(dl_se);
@@ -1173,7 +1171,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
 
 		if (!dl_se->server_has_tasks(dl_se)) {
 			replenish_dl_entity(dl_se);
-			dl_server_stopped(dl_se);
 			return HRTIMER_NORESTART;
 		}
 
@@ -1577,17 +1574,15 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
 void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
 {
 	/* 0 runtime = fair server disabled */
-	if (dl_se->dl_runtime) {
-		dl_se->dl_server_idle = 0;
+	if (dl_se->dl_runtime)
 		update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
-	}
 }
 
 void dl_server_start(struct sched_dl_entity *dl_se)
 {
 	struct rq *rq = dl_se->rq;
 
-	if (!dl_server(dl_se) || dl_se->dl_server_active)
+	if (!dl_se->dl_runtime)
 		return;
 
 	dl_se->dl_server_active = 1;
@@ -1608,20 +1603,6 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
 	dl_se->dl_server_active = 0;
 }
 
-static bool dl_server_stopped(struct sched_dl_entity *dl_se)
-{
-	if (!dl_se->dl_server_active)
-		return false;
-
-	if (dl_se->dl_server_idle) {
-		dl_server_stop(dl_se);
-		return true;
-	}
-
-	dl_se->dl_server_idle = 1;
-	return false;
-}
-
 void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
 		    dl_server_has_tasks_f has_tasks,
 		    dl_server_pick_f pick_task)
@@ -2388,7 +2369,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
 	if (dl_server(dl_se)) {
 		p = dl_se->server_pick_task(dl_se);
 		if (!p) {
-			if (!dl_server_stopped(dl_se)) {
+			if (dl_server_active(dl_se)) {
 				dl_se->dl_yielded = 1;
 				update_curr_dl_se(rq, dl_se, 0);
 			}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b173a059315c2..b2b6f3f6a12db 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5802,6 +5802,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
 	long queued_delta, runnable_delta, idle_delta, dequeue = 1;
+	long rq_h_nr_queued = rq->cfs.h_nr_queued;
 
 	raw_spin_lock(&cfs_b->lock);
 	/* This will start the period timer if necessary */
@@ -5885,6 +5886,10 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 
 	/* At this point se is NULL and we are at root level*/
 	sub_nr_running(rq, queued_delta);
+
+	/* Stop the fair server if throttling resulted in no runnable tasks */
+	if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
+		dl_server_stop(&rq->fair_server);
 done:
 	/*
 	 * Note: distribution will already see us throttled via the
@@ -6961,6 +6966,7 @@ static void set_next_buddy(struct sched_entity *se);
 static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 {
 	bool was_sched_idle = sched_idle_rq(rq);
+	int rq_h_nr_queued = rq->cfs.h_nr_queued;
 	bool task_sleep = flags & DEQUEUE_SLEEP;
 	bool task_delayed = flags & DEQUEUE_DELAYED;
 	struct task_struct *p = NULL;
@@ -7044,6 +7050,9 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 
 	sub_nr_running(rq, h_nr_queued);
 
+	if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
+		dl_server_stop(&rq->fair_server);
+
 	/* balance early to pull high priority tasks */
 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
 		rq->next_balance = jiffies;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ