lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260203030400.3313990-1-realwujing@gmail.com>
Date: Mon,  2 Feb 2026 22:03:46 -0500
From: Qiliang Yuan <realwujing@...il.com>
To: Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Tejun Heo <tj@...nel.org>,
	Andrea Righi <arighi@...dia.com>,
	Emil Tsalapatis <emil@...alapatis.com>,
	Qiliang Yuan <realwujing@...il.com>,
	Ryan Newton <newton@...a.com>,
	David Dai <david.dai@...ux.dev>,
	zhidao su <suzhidao@...omi.com>,
	Jake Hillion <jake@...lion.co.uk>
Cc: Qiliang Yuan <yuanql9@...natelecom.cn>,
	David Vernet <void@...ifault.com>,
	Changwoo Min <changwoo@...lia.com>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Dan Schatzberg <schatzberg.dan@...il.com>,
	sched-ext@...ts.linux.dev,
	linux-kernel@...r.kernel.org
Subject: [PATCH] sched/ext: Add cpumask to skip unsuitable dispatch queues

Add a cpumask field to struct scx_dispatch_q to track the union of
allowed CPUs for all tasks in the queue. Use this mask to perform an
O(1) check in consume_dispatch_q() before scanning the queue.

When a CPU attempts to consume from a queue, it currently must iterate
through all N tasks to determine if any can run on that CPU. If the
queue contains only tasks pinned to other CPUs (via sched_setaffinity
or cgroups), this O(N) scan finds nothing.

With the cpumask, if the current CPU is not in the allowed set, skip
the entire queue immediately with a single bit test. This changes the
"queue is unsuitable" case from O(N) to O(1).

The mask is updated when tasks are enqueued and cleared when the queue
becomes empty, preventing permanent saturation from transient pinned
tasks.

This benefits large systems with CPU-pinned workloads, where CPUs
frequently scan queues containing no eligible tasks.

Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
Signed-off-by: Qiliang Yuan <realwujing@...il.com>
---
 include/linux/sched/ext.h |  1 +
 kernel/sched/ext.c        | 21 ++++++++++++++++++++-
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index bcb962d5ee7d..f20e57cf53a3 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -79,6 +79,7 @@ struct scx_dispatch_q {
 	struct rhash_head	hash_node;
 	struct llist_node	free_node;
 	struct rcu_head		rcu;
+	struct cpumask		*cpus_allowed; /* union of all tasks' allowed cpus */
 };
 
 /* scx_entity.flags */
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index afe28c04d5aa..5a060c97cd64 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1120,8 +1120,12 @@ static void dispatch_enqueue(struct scx_sched *sch, struct scx_dispatch_q *dsq,
 
 	if (is_local)
 		local_dsq_post_enq(dsq, p, enq_flags);
-	else
+	else {
+		/* Update cpumask to track union of all tasks' allowed CPUs */
+		if (dsq->cpus_allowed)
+			cpumask_or(dsq->cpus_allowed, dsq->cpus_allowed, p->cpus_ptr);
 		raw_spin_unlock(&dsq->lock);
+	}
 }
 
 static void task_unlink_from_dsq(struct task_struct *p,
@@ -1138,6 +1142,10 @@ static void task_unlink_from_dsq(struct task_struct *p,
 	list_del_init(&p->scx.dsq_list.node);
 	dsq_mod_nr(dsq, -1);
 
+	/* Clear cpumask when queue becomes empty to prevent saturation */
+	if (dsq->nr == 0 && dsq->cpus_allowed)
+		cpumask_clear(dsq->cpus_allowed);
+
 	if (!(dsq->id & SCX_DSQ_FLAG_BUILTIN) && dsq->first_task == p) {
 		struct task_struct *first_task;
 
@@ -1897,6 +1905,14 @@ static bool consume_dispatch_q(struct scx_sched *sch, struct rq *rq,
 	if (list_empty(&dsq->list))
 		return false;
 
+	/*
+	 * O(1) optimization: Check if any task in the queue can run on this CPU.
+	 * If the cpumask is allocated and this CPU is not in the allowed set,
+	 * we can skip the entire queue without scanning.
+	 */
+	if (dsq->cpus_allowed && !cpumask_test_cpu(cpu_of(rq), dsq->cpus_allowed))
+		return false;
+
 	raw_spin_lock(&dsq->lock);
 
 	nldsq_for_each_task(p, dsq) {
@@ -3397,6 +3413,9 @@ static void init_dsq(struct scx_dispatch_q *dsq, u64 dsq_id)
 	raw_spin_lock_init(&dsq->lock);
 	INIT_LIST_HEAD(&dsq->list);
 	dsq->id = dsq_id;
+	
+	/* Allocate cpumask for tracking allowed CPUs */
+	dsq->cpus_allowed = kzalloc(cpumask_size(), GFP_KERNEL);
 }
 
 static void free_dsq_irq_workfn(struct irq_work *irq_work)
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ