[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251101000057.GA2184199@noisy.programming.kicks-ass.net>
Date: Sat, 1 Nov 2025 01:00:57 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Gabriele Monaco <gmonaco@...hat.com>
Cc: Juri Lelli <juri.lelli@...hat.com>, linux-kernel@...r.kernel.org,
Ingo Molnar <mingo@...hat.com>,
Clark Williams <williams@...hat.com>, arighi@...dia.com
Subject: Re: [RFC PATCH] sched/deadline: Avoid dl_server boosting with
expired deadline
On Fri, Oct 31, 2025 at 04:44:55PM +0100, Peter Zijlstra wrote:
> Anyway, back to noodling on how to make it stop on idle :-)
This seems to behave (at least, it did before the cleanup).
It has the fancy comment, ascii-art and everything. Hopefully we'll all
get less confused when looking at this in the future.
---
include/linux/sched.h | 15 +--
kernel/sched/deadline.c | 233 +++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 240 insertions(+), 8 deletions(-)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -685,20 +685,22 @@ struct sched_dl_entity {
*
* @dl_server tells if this is a server entity.
*
- * @dl_defer tells if this is a deferred or regular server. For
- * now only defer server exists.
- *
- * @dl_defer_armed tells if the deferrable server is waiting
- * for the replenishment timer to activate it.
- *
* @dl_server_active tells if the dlserver is active(started).
* dlserver is started on first cfs enqueue on an idle runqueue
* and is stopped when a dequeue results in 0 cfs tasks on the
* runqueue. In other words, dlserver is active only when cpu's
* runqueue has atleast one cfs task.
*
+ * @dl_defer tells if this is a deferred or regular server. For
+ * now only defer server exists.
+ *
+ * @dl_defer_armed tells if the deferrable server is waiting
+ * for the replenishment timer to activate it.
+ *
* @dl_defer_running tells if the deferrable server is actually
* running, skipping the defer phase.
+ *
+ * @dl_defer_idle tracks idle state
*/
unsigned int dl_throttled : 1;
unsigned int dl_yielded : 1;
@@ -709,6 +711,7 @@ struct sched_dl_entity {
unsigned int dl_defer : 1;
unsigned int dl_defer_armed : 1;
unsigned int dl_defer_running : 1;
+ unsigned int dl_defer_idle : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1173,6 +1173,11 @@ static enum hrtimer_restart dl_server_ti
*/
rq->curr->sched_class->update_curr(rq);
+ if (dl_se->dl_defer_idle) {
+ dl_server_stop(dl_se);
+ return HRTIMER_NORESTART;
+ }
+
if (dl_se->dl_defer_armed) {
/*
* First check if the server could consume runtime in background.
@@ -1420,10 +1425,11 @@ s64 dl_scaled_delta_exec(struct rq *rq,
}
static inline void
-update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
- int flags);
+update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se, int flags);
+
static void update_curr_dl_se(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec)
{
+ bool idle = rq->curr == rq->idle;
s64 scaled_delta_exec;
if (unlikely(delta_exec <= 0)) {
@@ -1444,6 +1450,9 @@ static void update_curr_dl_se(struct rq
dl_se->runtime -= scaled_delta_exec;
+ if (dl_se->dl_defer_idle && !idle)
+ dl_se->dl_defer_idle = 0;
+
/*
* The fair server can consume its runtime while throttled (not queued/
* running as regular CFS).
@@ -1454,6 +1463,29 @@ static void update_curr_dl_se(struct rq
*/
if (dl_se->dl_defer && dl_se->dl_throttled && dl_runtime_exceeded(dl_se)) {
/*
+ * Non-servers would never get time accounted while throttled.
+ */
+ WARN_ON_ONCE(!dl_server(dl_se));
+
+ /*
+ * While the server is marked idle, do not push out the
+ * activation further, instead wait for the period timer
+ * to lapse and stop the server.
+ */
+ if (dl_se->dl_defer_idle && idle) {
+ /*
+ * The timer is at the zero-laxity point, this means
+ * dl_server_stop() / dl_server_start() can happen
+ * while now < deadline. This means update_dl_entity()
+ * will not replenish. Additionally start_dl_timer()
+ * will be set for 'deadline - runtime'. Negative
+ * runtime will not do.
+ */
+ dl_se->runtime = 0;
+ return;
+ }
+
+ /*
* If the server was previously activated - the starving condition
* took place, it this point it went away because the fair scheduler
* was able to get runtime in background. So return to the initial
@@ -1465,6 +1497,9 @@ static void update_curr_dl_se(struct rq
replenish_dl_new_period(dl_se, dl_se->rq);
+ if (idle)
+ dl_se->dl_defer_idle = 1;
+
/*
* Not being able to start the timer seems problematic. If it could not
* be started for whatever reason, we need to "unthrottle" the DL server
@@ -1560,6 +1595,199 @@ void dl_server_update(struct sched_dl_en
update_curr_dl_se(dl_se->rq, dl_se, delta_exec);
}
+/*
+ * dl_server && dl_defer:
+ *
+ * 6
+ * +--------------------+
+ * v |
+ * +-------------+ 4 +-----------+ 5 +------------------+
+ * +-> | A:init | <--- | D:running | -----> | E:replenish-wait |
+ * | +-------------+ +-----------+ +------------------+
+ * | | | 1 ^ ^ |
+ * | | 1 +----------+ | 3 |
+ * | v | |
+ * | +--------------------------------+ 2 |
+ * | | | ----+ |
+ * | 8 | B:zero_laxity-wait | | |
+ * | | | <---+ |
+ * | +--------------------------------+ |
+ * | | ^ ^ 2 |
+ * | | 7 | 2 +--------------------+
+ * | v |
+ * | +-------------+ |
+ * +-- | C:idle-wait | -+
+ * +-------------+
+ * ^ 7 |
+ * +---------+
+ *
+ *
+ * [A] - init
+ * dl_server_active = 0
+ * dl_throttled = 0
+ * dl_defer_armed = 0
+ * dl_defer_running = 0/1
+ * dl_defer_idle = 0
+ *
+ * [B] - zero_laxity-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 1
+ * dl_defer_running = 0
+ * dl_defer_idle = 0
+ *
+ * [C] - idle-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 1
+ * dl_defer_running = 0
+ * dl_defer_idle = 1
+ *
+ * [D] - running
+ * dl_server_active = 1
+ * dl_throttled = 0
+ * dl_defer_armed = 0
+ * dl_defer_running = 1
+ * dl_defer_idle = 0
+ *
+ * [E] - replenish-wait
+ * dl_server_active = 1
+ * dl_throttled = 1
+ * dl_defer_armed = 0
+ * dl_defer_running = 1
+ * dl_defer_idle = 0
+ *
+ *
+ * [1] A->B, A->D
+ * dl_server_start()
+ * dl_server_active = 1;
+ * enqueue_dl_entity()
+ * update_dl_entity(WAKEUP)
+ * if (!dl_defer_running)
+ * dl_defer_armed = 1;
+ * dl_throttled = 1;
+ * if (dl_throttled && start_dl_timer())
+ * return;
+ * // start server into waiting for zero-laxity
+ * __enqueue_dl_entity();
+ * // start running
+ *
+ * // deplete server runtime from client-class
+ * [2] B->B, C->B, E->B
+ * dl_server_update()
+ * update_curr_dl_se()
+ * if (dl_defer_idle)
+ * dl_defer_idle = 0;
+ * if (dl_defer && dl_throttled && dl_runtime_exceeded())
+ * dl_defer_running = 0;
+ * hrtimer_try_to_cancel(); // stop timer
+ * replenish_dl_new_period()
+ * // fwd period
+ * dl_throttled = 1;
+ * dl_defer_armed = 1;
+ * start_dl_timer(); // restart timer
+ * // back into waiting for zero-laxity
+ *
+ * // timer actually fires means we have runtime
+ * [3] B->D
+ * dl_server_timer()
+ * if (dl_defer_armed)
+ * dl_defer_running = 1;
+ * enqueue_dl_entity(REPLENISH)
+ * replenish_dl_entity()
+ * // fwd period
+ * if (dl_throttled)
+ * dl_throttled = 0;
+ * if (dl_defer_armed)
+ * dl_defer_armed = 0;
+ * __enqueue_dl_entity();
+ * // goto [4]
+ *
+ * // schedule server
+ * [4] D->A
+ * pick_task_dl()
+ * p = server_pick_task();
+ * if (!p)
+ * dl_server_stop()
+ * dequeue_dl_entity();
+ * hrtimer_try_to_cancel();
+ * dl_defer_armed = 0;
+ * dl_throttled = 0;
+ * dl_server_active = 0;
+ * // goto [1]
+ * return p;
+ *
+ * // server running
+ * [5] D->E
+ * update_curr_dl_se()
+ * if (dl_runtime_exceeded())
+ * dl_throttled = 1;
+ * dequeue_dl_entity();
+ * start_dl_timer();
+ * // replenish-timer
+ *
+ * // server exchausted
+ * [6] E->D
+ * dl_server_timer()
+ * enqueue_dl_entity(REPLENISH)
+ * replenish_dl_entity()
+ * fwd-period
+ * if (dl_throttled)
+ * dl_throttled = 0;
+ * __enqueue_dl_entity();
+ * // goto [4]
+ *
+ * // deplete server runtime from idle
+ * [7] B->C, C->C
+ * dl_server_update_idle()
+ * update_curr_dl_se()
+ * if (dl_defer && dl_throttled && dl_runtime_exceeded())
+ * if (dl_defer_idle)
+ * return;
+ * dl_defer_running = 0;
+ * hrtimer_try_to_cancel();
+ * replenish_dl_new_period()
+ * // fwd period
+ * dl_throttled = 1;
+ * dl_defer_armed = 1;
+ * dl_defer_idle = 1;
+ * start_dl_timer(); // restart timer
+ *
+ * // stop idle server
+ * [8] C->A
+ * dl_server_timer()
+ * if (dl_defer_idle)
+ * dl_server_stop();
+ *
+ *
+ * digraph dl_server {
+ * "A:init" -> "B:zero_laxity-wait" [label="1:dl_server_start"]
+ * "A:init" -> "D:running" [label="1:dl_server_start"]
+ * "B:zero_laxity-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "B:zero_laxity-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
+ * "B:zero_laxity-wait" -> "D:running" [label="3:dl_server_timer"]
+ * "C:idle-wait" -> "C:idle-wait" [label="7:dl_server_update_idle"]
+ * "C:idle-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "C:idle-wait" -> "A:init" [label="8:dl_server_timer"]
+ * "D:running" -> "A:init" [label="4:pick_task_dl"]
+ * "D:running" -> "E:replenish-wait" [label="5:update_curr_dl_se"]
+ * "E:replenish-wait" -> "B:zero_laxity-wait" [label="2:dl_server_update"]
+ * "E:replenish-wait" -> "D:running" [label="6:dl_server_timer"]
+ * }
+ *
+ *
+ * Notes:
+ *
+ * - When there are fair tasks running the most likely loop is [2]->[2].
+ * the dl_server never actually runs, the timer never fires.
+ *
+ * - When there is actual fair starvation; the timer fires and starts the
+ * dl_server. This will then throttle and replenish like a normal DL
+ * task. Notably it will not 'defer' again.
+ *
+ * - When idle it will push the activation forward once, and then wait
+ * for the timer to hit or a non-idle update to restart things.
+ */
void dl_server_start(struct sched_dl_entity *dl_se)
{
struct rq *rq = dl_se->rq;
@@ -1590,6 +1818,7 @@ void dl_server_stop(struct sched_dl_enti
hrtimer_try_to_cancel(&dl_se->dl_timer);
dl_se->dl_defer_armed = 0;
dl_se->dl_throttled = 0;
+ dl_se->dl_defer_idle = 0;
dl_se->dl_server_active = 0;
}
Powered by blists - more mailing lists