[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aHpYvwRizPkUe8Iv@jlelli-thinkpadt14gen4.remote.csb>
Date: Fri, 18 Jul 2025 16:22:55 +0200
From: Juri Lelli <juri.lelli@...hat.com>
To: Yuri Andriaccio <yurand2000@...il.com>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
linux-kernel@...r.kernel.org,
Luca Abeni <luca.abeni@...tannapisa.it>,
Yuri Andriaccio <yuri.andriaccio@...tannapisa.it>
Subject: Re: [BUG] Bw accounting warning on fair-servers' parameters change
Hi,
Thanks for reporting.
On 18/07/25 13:38, Yuri Andriaccio wrote:
> Hi,
>
> I've been lately working on fair-servers and dl_servers for some patches and
> I've come across a bandwidth accounting warning on the latest tip/master (as of
> 2025-07-18, git sha ed0272f0675f). The warning is triggered by simply starting
> the machine, mounting debugfs and then just zeroing any fair-server's runtime.
>
>
> The warning:
>
> WARNING: kernel/sched/deadline.c:266 at dl_rq_change_utilization+0x208/0x230
> static inline void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) {
> ...
> WARN_ON_ONCE(dl_rq->running_bw > dl_rq->this_bw);
> }
>
> Steps to reproduce:
>
> mount -t debugfs none /sys/kernel/debug
> echo 0 > /sys/kernel/debug/sched/fair_server/cpu0/runtime
>
>
> It does not happen at every machine boot, but happens on most. Could it possibly
> be related to some of the deadline timers?
I took a quick first look and currently suspect cccb45d7c4295
("sched/deadline: Less agressive dl_server handling") could be playing a
role in this as it delays actual server stop.
Could you please try to repro after having reverted such commit?
Thanks,
Juri
---
include/linux/sched.h | 1 -
kernel/sched/deadline.c | 25 +++----------------------
kernel/sched/fair.c | 9 +++++++++
3 files changed, 12 insertions(+), 23 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5b4e1cd52e27a..e7ed7ae1871d7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -703,7 +703,6 @@ struct sched_dl_entity {
unsigned int dl_defer : 1;
unsigned int dl_defer_armed : 1;
unsigned int dl_defer_running : 1;
- unsigned int dl_server_idle : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e2d51f4306b31..1ad1924ee767a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
-static bool dl_server_stopped(struct sched_dl_entity *dl_se);
-
static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
{
struct rq *rq = rq_of_dl_se(dl_se);
@@ -1173,7 +1171,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
if (!dl_se->server_has_tasks(dl_se)) {
replenish_dl_entity(dl_se);
- dl_server_stopped(dl_se);
return HRTIMER_NORESTART;
}
@@ -1577,17 +1574,15 @@ void dl_server_update_idle_time(struct rq *rq, struct task_struct *p)
void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec)
{
/* 0 runtime = fair server disabled */
- if (dl_se->dl_runtime) {
- dl_se->dl_server_idle = 0;
+ if (dl_se->dl_runtime)
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
- }
}
void dl_server_start(struct sched_dl_entity *dl_se)
{
struct rq *rq = dl_se->rq;
- if (!dl_server(dl_se) || dl_se->dl_server_active)
+ if (!dl_se->dl_runtime)
return;
dl_se->dl_server_active = 1;
@@ -1608,20 +1603,6 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
dl_se->dl_server_active = 0;
}
-static bool dl_server_stopped(struct sched_dl_entity *dl_se)
-{
- if (!dl_se->dl_server_active)
- return false;
-
- if (dl_se->dl_server_idle) {
- dl_server_stop(dl_se);
- return true;
- }
-
- dl_se->dl_server_idle = 1;
- return false;
-}
-
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task)
@@ -2388,7 +2369,7 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
if (dl_server(dl_se)) {
p = dl_se->server_pick_task(dl_se);
if (!p) {
- if (!dl_server_stopped(dl_se)) {
+ if (dl_server_active(dl_se)) {
dl_se->dl_yielded = 1;
update_curr_dl_se(rq, dl_se, 0);
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b173a059315c2..b2b6f3f6a12db 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5802,6 +5802,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
struct sched_entity *se;
long queued_delta, runnable_delta, idle_delta, dequeue = 1;
+ long rq_h_nr_queued = rq->cfs.h_nr_queued;
raw_spin_lock(&cfs_b->lock);
/* This will start the period timer if necessary */
@@ -5885,6 +5886,10 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/
sub_nr_running(rq, queued_delta);
+
+ /* Stop the fair server if throttling resulted in no runnable tasks */
+ if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
+ dl_server_stop(&rq->fair_server);
done:
/*
* Note: distribution will already see us throttled via the
@@ -6961,6 +6966,7 @@ static void set_next_buddy(struct sched_entity *se);
static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
{
bool was_sched_idle = sched_idle_rq(rq);
+ int rq_h_nr_queued = rq->cfs.h_nr_queued;
bool task_sleep = flags & DEQUEUE_SLEEP;
bool task_delayed = flags & DEQUEUE_DELAYED;
struct task_struct *p = NULL;
@@ -7044,6 +7050,9 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
sub_nr_running(rq, h_nr_queued);
+ if (rq_h_nr_queued && !rq->cfs.h_nr_queued)
+ dl_server_stop(&rq->fair_server);
+
/* balance early to pull high priority tasks */
if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
rq->next_balance = jiffies;
Powered by blists - more mailing lists