linux-kernel - [PATCH 3/7] sched/fair: prepare unthrottle path for task based throttle

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20250520104110.3673059-4-ziqianlu@bytedance.com>
Date: Tue, 20 May 2025 18:41:06 +0800
From: Aaron Lu <ziqianlu@...edance.com>
To: Valentin Schneider <vschneid@...hat.com>,
	Ben Segall <bsegall@...gle.com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	Peter Zijlstra <peterz@...radead.org>,
	Josh Don <joshdon@...gle.com>,
	Ingo Molnar <mingo@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Xi Wang <xii@...gle.com>
Cc: linux-kernel@...r.kernel.org,
	Juri Lelli <juri.lelli@...hat.com>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Mel Gorman <mgorman@...e.de>,
	Chengming Zhou <chengming.zhou@...ux.dev>,
	Chuyi Zhou <zhouchuyi@...edance.com>,
	Jan Kiszka <jan.kiszka@...mens.com>,
	Florian Bezdeka <florian.bezdeka@...mens.com>
Subject: [PATCH 3/7] sched/fair: prepare unthrottle path for task based throttle

From: Valentin Schneider <vschneid@...hat.com>

During unthrottle, enqueued back throttled tasks so that they can
resume execution.

This patch only sets up the enqueue mechanism; it does not take effect
yet because no tasks in the throttled hierarchy are placed on the limbo
list as of now.

Signed-off-by: Valentin Schneider <vschneid@...hat.com>
Signed-off-by: Aaron Lu <ziqianlu@...edance.com>
---
 kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e87ceb0a2d37f..74bc320cbc238 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5825,6 +5825,11 @@ static inline int throttled_lb_pair(struct task_group *tg,
 	       throttled_hierarchy(dest_cfs_rq);
 }
 
+static inline bool task_is_throttled(struct task_struct *p)
+{
+	return !list_empty(&p->throttle_node);
+}
+
 static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags);
 static void throttle_cfs_rq_work(struct callback_head *work)
 {
@@ -5876,32 +5881,41 @@ void init_cfs_throttle_work(struct task_struct *p)
 	INIT_LIST_HEAD(&p->throttle_node);
 }
 
+static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags);
 static int tg_unthrottle_up(struct task_group *tg, void *data)
 {
 	struct rq *rq = data;
 	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+	struct task_struct *p, *tmp;
 
 	cfs_rq->throttle_count--;
-	if (!cfs_rq->throttle_count) {
-		cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
-					     cfs_rq->throttled_clock_pelt;
+	if (cfs_rq->throttle_count)
+		return 0;
 
-		/* Add cfs_rq with load or one or more already running entities to the list */
-		if (!cfs_rq_is_decayed(cfs_rq))
-			list_add_leaf_cfs_rq(cfs_rq);
+	cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
+		cfs_rq->throttled_clock_pelt;
 
-		if (cfs_rq->throttled_clock_self) {
-			u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+	if (cfs_rq->throttled_clock_self) {
+		u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
 
-			cfs_rq->throttled_clock_self = 0;
+		cfs_rq->throttled_clock_self = 0;
 
-			if (WARN_ON_ONCE((s64)delta < 0))
-				delta = 0;
+		if (WARN_ON_ONCE((s64)delta < 0))
+			delta = 0;
 
-			cfs_rq->throttled_clock_self_time += delta;
-		}
+		cfs_rq->throttled_clock_self_time += delta;
+	}
+
+	/* Re-enqueue the tasks that have been throttled at this level. */
+	list_for_each_entry_safe(p, tmp, &cfs_rq->throttled_limbo_list, throttle_node) {
+		list_del_init(&p->throttle_node);
+		enqueue_task_fair(rq_of(cfs_rq), p, ENQUEUE_WAKEUP);
 	}
 
+	/* Add cfs_rq with load or one or more already running entities to the list */
+	if (!cfs_rq_is_decayed(cfs_rq))
+		list_add_leaf_cfs_rq(cfs_rq);
+
 	return 0;
 }
 
@@ -6059,6 +6073,19 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	long queued_delta, runnable_delta, idle_delta;
 	long rq_h_nr_queued = rq->cfs.h_nr_queued;
 
+	/*
+	 * It's possible we are called with !runtime_remaining due to things
+	 * like user changed quota setting(see tg_set_cfs_bandwidth()) or async
+	 * unthrottled us with a positive runtime_remaining but other still
+	 * running entities consumed those runtime before we reached here.
+	 *
+	 * Anyway, we can't unthrottle this cfs_rq without any runtime remaining
+	 * because any enqueue in tg_unthrottle_up() will immediately trigger a
+	 * throttle, which is not supposed to happen on unthrottle path.
+	 */
+	if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0)
+		return;
+
 	se = cfs_rq->tg->se[cpu_of(rq)];
 
 	cfs_rq->throttled = 0;
@@ -6806,6 +6833,7 @@ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
 static inline void sync_throttle(struct task_group *tg, int cpu) {}
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 static void task_throttle_setup_work(struct task_struct *p) {}
+static bool task_is_throttled(struct task_struct *p) { return false; }
 
 static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
 {
@@ -7014,6 +7042,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		util_est_enqueue(&rq->cfs, p);
 
 	if (flags & ENQUEUE_DELAYED) {
+		WARN_ON_ONCE(task_is_throttled(p));
 		requeue_delayed_entity(se);
 		return;
 	}
-- 
2.39.5