lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250920005931.2753828-33-tj@kernel.org>
Date: Fri, 19 Sep 2025 14:58:55 -1000
From: Tejun Heo <tj@...nel.org>
To: void@...ifault.com,
	arighi@...dia.com,
	multics69@...il.com
Cc: linux-kernel@...r.kernel.org,
	sched-ext@...ts.linux.dev,
	memxor@...il.com,
	bpf@...r.kernel.org,
	Tejun Heo <tj@...nel.org>
Subject: [PATCH 32/46] sched_ext: Make bypass mode sub-sched aware

Bypass mode is used to simplify enable and disable paths and guarantee
forward progress when something goes wrong. When enabled, all tasks skip
BPF scheduling and fall back to simple in-kernel FIFO scheduling. While
this global behavior can be used as-is when dealing with sub-scheds, that
would allow any sub-sched instance to affect the whole system in a
significantly disruptive manner.

Make bypass mode hierarchical instead. An scx_sched bypasses if itself or
any of its ancestors are in the bypass mode.

Signed-off-by: Tejun Heo <tj@...nel.org>
---
 kernel/sched/ext.c | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 5f22a79e19ec..44f9cc7f0915 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3949,6 +3949,7 @@ static void scx_bypass(struct scx_sched *sch, bool bypass)
 {
 	static DEFINE_RAW_SPINLOCK(bypass_lock);
 	static unsigned long bypass_timestamp;
+	struct scx_sched *pos;
 	unsigned long flags;
 	int cpu;
 
@@ -3970,6 +3971,24 @@ static void scx_bypass(struct scx_sched *sch, bool bypass)
 			      ktime_get_ns() - bypass_timestamp);
 	}
 
+	/*
+	 * Bypass state is propagated to all descendants - an scx_sched bypasses
+	 * if itself or any of its ancestors are in bypass mode.
+	 */
+	raw_spin_lock(&scx_sched_lock);
+	scx_for_each_descendant_pre(pos, sch) {
+		if (pos == sch)
+			continue;
+		if (bypass) {
+			pos->bypass_depth++;
+			WARN_ON_ONCE(pos->bypass_depth <= 0);
+		} else {
+			pos->bypass_depth--;
+			WARN_ON_ONCE(pos->bypass_depth < 0);
+		}
+	}
+	raw_spin_unlock(&scx_sched_lock);
+
 	if (!scx_parent(sch))
 		atomic_inc(&scx_breather_depth);
 
@@ -3984,18 +4003,20 @@ static void scx_bypass(struct scx_sched *sch, bool bypass)
 	 */
 	for_each_possible_cpu(cpu) {
 		struct rq *rq = cpu_rq(cpu);
-		struct scx_sched_pcpu *pcpu = per_cpu_ptr(sch->pcpu, cpu);
 		struct task_struct *p, *n;
 
 		raw_spin_rq_lock(rq);
 
-		if (bypass) {
-			WARN_ON_ONCE(pcpu->flags & SCX_SCHED_PCPU_BYPASSING);
-			pcpu->flags |= SCX_SCHED_PCPU_BYPASSING;
-		} else {
-			WARN_ON_ONCE(!(pcpu->flags & SCX_SCHED_PCPU_BYPASSING));
-			pcpu->flags &= ~SCX_SCHED_PCPU_BYPASSING;
+		raw_spin_lock(&scx_sched_lock);
+		scx_for_each_descendant_pre(pos, sch) {
+			struct scx_sched_pcpu *pcpu = per_cpu_ptr(pos->pcpu, cpu);
+
+			if (pos->bypass_depth)
+				pcpu->flags |= SCX_SCHED_PCPU_BYPASSING;
+			else
+				pcpu->flags &= ~SCX_SCHED_PCPU_BYPASSING;
 		}
+		raw_spin_unlock(&scx_sched_lock);
 
 		/*
 		 * We need to guarantee that no tasks are on the BPF scheduler
@@ -4018,6 +4039,9 @@ static void scx_bypass(struct scx_sched *sch, bool bypass)
 						 scx.runnable_node) {
 			struct sched_enq_and_set_ctx ctx;
 
+			if (!scx_is_descendant(scx_task_sched(p), sch))
+				continue;
+
 			/* cycling deq/enq is enough, see the function comment */
 			sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
 			sched_enq_and_set_task(&ctx);
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ