lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240405110010.522077707@infradead.org>
Date: Fri, 05 Apr 2024 12:28:01 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: mingo@...hat.com,
 peterz@...radead.org,
 juri.lelli@...hat.com,
 vincent.guittot@...aro.org,
 dietmar.eggemann@....com,
 rostedt@...dmis.org,
 bsegall@...gle.com,
 mgorman@...e.de,
 bristot@...hat.com,
 vschneid@...hat.com,
 linux-kernel@...r.kernel.org
Cc: kprateek.nayak@....com,
 wuyun.abel@...edance.com,
 tglx@...utronix.de,
 efault@....de
Subject: [RFC][PATCH 07/10] sched/fair: Re-organize dequeue_task_fair()

Working towards delaying dequeue, notably also inside the hierachy,
rework dequeue_task_fair() such that it can 'resume' an interrupted
hierarchy walk.

Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
 kernel/sched/fair.c |   82 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 27 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6824,33 +6824,45 @@ enqueue_task_fair(struct rq *rq, struct
 static void set_next_buddy(struct sched_entity *se);
 
 /*
- * The dequeue_task method is called before nr_running is
- * decreased. We remove the task from the rbtree and
- * update the fair scheduling stats:
+ * Basically dequeue_task_fair(), except it can deal with dequeue_entity()
+ * failing half-way through and resume the dequeue later.
+ *
+ * Returns:
+ * -1 - dequeue delayed
+ *  0 - dequeue throttled
+ *  1 - dequeue complete
  */
-static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 {
-	struct cfs_rq *cfs_rq;
-	struct sched_entity *se = &p->se;
-	int task_sleep = flags & DEQUEUE_SLEEP;
-	int idle_h_nr_running = task_has_idle_policy(p);
 	bool was_sched_idle = sched_idle_rq(rq);
+	bool task_sleep = flags & DEQUEUE_SLEEP;
+	struct task_struct *p = NULL;
+	struct cfs_rq *cfs_rq;
+	int idle_h_nr_running;
 
-	util_est_dequeue(&rq->cfs, p);
+	if (entity_is_task(se)) {
+		p = task_of(se);
+		idle_h_nr_running = task_has_idle_policy(p);
+	} else {
+		idle_h_nr_running = cfs_rq_is_idle(group_cfs_rq(se));
+	}
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
 
-		cfs_rq->h_nr_running--;
-		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+		/* h_nr_running is the hierachical count of tasks */
+		if (p) {
+			cfs_rq->h_nr_running--;
+			cfs_rq->idle_h_nr_running -= idle_h_nr_running;
 
-		if (cfs_rq_is_idle(cfs_rq))
-			idle_h_nr_running = 1;
+			if (cfs_rq_is_idle(cfs_rq))
+				idle_h_nr_running = 1;
+		}
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(cfs_rq))
-			goto dequeue_throttle;
+			return 0;
 
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight) {
@@ -6870,33 +6882,49 @@ static bool dequeue_task_fair(struct rq
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 
+		// XXX avoid these load updates for delayed dequeues ?
 		update_load_avg(cfs_rq, se, UPDATE_TG);
 		se_update_runnable(se);
 		update_cfs_group(se);
 
-		cfs_rq->h_nr_running--;
-		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
+		if (p) {
+			cfs_rq->h_nr_running--;
+			cfs_rq->idle_h_nr_running -= idle_h_nr_running;
 
-		if (cfs_rq_is_idle(cfs_rq))
-			idle_h_nr_running = 1;
+			if (cfs_rq_is_idle(cfs_rq))
+				idle_h_nr_running = 1;
+		}
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(cfs_rq))
-			goto dequeue_throttle;
+			return 0;
+	}
+
+	if (p) {
+		sub_nr_running(rq, 1);
 
+		/* balance early to pull high priority tasks */
+		if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+			rq->next_balance = jiffies;
 	}
 
-	/* At this point se is NULL and we are at root level*/
-	sub_nr_running(rq, 1);
+	return 1;
+}
 
-	/* balance early to pull high priority tasks */
-	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
-		rq->next_balance = jiffies;
+/*
+ * The dequeue_task method is called before nr_running is
+ * decreased. We remove the task from the rbtree and
+ * update the fair scheduling stats:
+ */
+static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
+{
+	util_est_dequeue(&rq->cfs, p);
 
-dequeue_throttle:
-	util_est_update(&rq->cfs, p, task_sleep);
-	hrtick_update(rq);
+	if (dequeue_entities(rq, &p->se, flags) < 0)
+		return false;
 
+	util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
+	hrtick_update(rq);
 	return true;
 }
 



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ