[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250520104110.3673059-4-ziqianlu@bytedance.com>
Date: Tue, 20 May 2025 18:41:06 +0800
From: Aaron Lu <ziqianlu@...edance.com>
To: Valentin Schneider <vschneid@...hat.com>,
Ben Segall <bsegall@...gle.com>,
K Prateek Nayak <kprateek.nayak@....com>,
Peter Zijlstra <peterz@...radead.org>,
Josh Don <joshdon@...gle.com>,
Ingo Molnar <mingo@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Xi Wang <xii@...gle.com>
Cc: linux-kernel@...r.kernel.org,
Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Mel Gorman <mgorman@...e.de>,
Chengming Zhou <chengming.zhou@...ux.dev>,
Chuyi Zhou <zhouchuyi@...edance.com>,
Jan Kiszka <jan.kiszka@...mens.com>,
Florian Bezdeka <florian.bezdeka@...mens.com>
Subject: [PATCH 3/7] sched/fair: prepare unthrottle path for task based throttle
From: Valentin Schneider <vschneid@...hat.com>
During unthrottle, enqueued back throttled tasks so that they can
resume execution.
This patch only sets up the enqueue mechanism; it does not take effect
yet because no tasks in the throttled hierarchy are placed on the limbo
list as of now.
Signed-off-by: Valentin Schneider <vschneid@...hat.com>
Signed-off-by: Aaron Lu <ziqianlu@...edance.com>
---
kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++-----------
1 file changed, 42 insertions(+), 13 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e87ceb0a2d37f..74bc320cbc238 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5825,6 +5825,11 @@ static inline int throttled_lb_pair(struct task_group *tg,
throttled_hierarchy(dest_cfs_rq);
}
+static inline bool task_is_throttled(struct task_struct *p)
+{
+ return !list_empty(&p->throttle_node);
+}
+
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags);
static void throttle_cfs_rq_work(struct callback_head *work)
{
@@ -5876,32 +5881,41 @@ void init_cfs_throttle_work(struct task_struct *p)
INIT_LIST_HEAD(&p->throttle_node);
}
+static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags);
static int tg_unthrottle_up(struct task_group *tg, void *data)
{
struct rq *rq = data;
struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+ struct task_struct *p, *tmp;
cfs_rq->throttle_count--;
- if (!cfs_rq->throttle_count) {
- cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
- cfs_rq->throttled_clock_pelt;
+ if (cfs_rq->throttle_count)
+ return 0;
- /* Add cfs_rq with load or one or more already running entities to the list */
- if (!cfs_rq_is_decayed(cfs_rq))
- list_add_leaf_cfs_rq(cfs_rq);
+ cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
+ cfs_rq->throttled_clock_pelt;
- if (cfs_rq->throttled_clock_self) {
- u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+ if (cfs_rq->throttled_clock_self) {
+ u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
- cfs_rq->throttled_clock_self = 0;
+ cfs_rq->throttled_clock_self = 0;
- if (WARN_ON_ONCE((s64)delta < 0))
- delta = 0;
+ if (WARN_ON_ONCE((s64)delta < 0))
+ delta = 0;
- cfs_rq->throttled_clock_self_time += delta;
- }
+ cfs_rq->throttled_clock_self_time += delta;
+ }
+
+ /* Re-enqueue the tasks that have been throttled at this level. */
+ list_for_each_entry_safe(p, tmp, &cfs_rq->throttled_limbo_list, throttle_node) {
+ list_del_init(&p->throttle_node);
+ enqueue_task_fair(rq_of(cfs_rq), p, ENQUEUE_WAKEUP);
}
+ /* Add cfs_rq with load or one or more already running entities to the list */
+ if (!cfs_rq_is_decayed(cfs_rq))
+ list_add_leaf_cfs_rq(cfs_rq);
+
return 0;
}
@@ -6059,6 +6073,19 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
long queued_delta, runnable_delta, idle_delta;
long rq_h_nr_queued = rq->cfs.h_nr_queued;
+ /*
+ * It's possible we are called with !runtime_remaining due to things
+ * like user changed quota setting(see tg_set_cfs_bandwidth()) or async
+ * unthrottled us with a positive runtime_remaining but other still
+ * running entities consumed those runtime before we reached here.
+ *
+ * Anyway, we can't unthrottle this cfs_rq without any runtime remaining
+ * because any enqueue in tg_unthrottle_up() will immediately trigger a
+ * throttle, which is not supposed to happen on unthrottle path.
+ */
+ if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0)
+ return;
+
se = cfs_rq->tg->se[cpu_of(rq)];
cfs_rq->throttled = 0;
@@ -6806,6 +6833,7 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
static inline void sync_throttle(struct task_group *tg, int cpu) {}
static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static void task_throttle_setup_work(struct task_struct *p) {}
+static bool task_is_throttled(struct task_struct *p) { return false; }
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
{
@@ -7014,6 +7042,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
util_est_enqueue(&rq->cfs, p);
if (flags & ENQUEUE_DELAYED) {
+ WARN_ON_ONCE(task_is_throttled(p));
requeue_delayed_entity(se);
return;
}
--
2.39.5
Powered by blists - more mailing lists