lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <228ebd9e6ed3437996dffe15735a9caa@honor.com>
Date: Tue, 19 Aug 2025 06:52:03 +0000
From: liuwenfang <liuwenfang@...or.com>
To: 'Tejun Heo' <tj@...nel.org>
CC: 'David Vernet' <void@...ifault.com>, 'Andrea Righi' <arighi@...dia.com>,
	'Changwoo Min' <changwoo@...lia.com>, 'Ingo Molnar' <mingo@...hat.com>,
	'Peter Zijlstra' <peterz@...radead.org>, 'Juri Lelli'
	<juri.lelli@...hat.com>, 'Vincent Guittot' <vincent.guittot@...aro.org>,
	'Dietmar Eggemann' <dietmar.eggemann@....com>, 'Steven Rostedt'
	<rostedt@...dmis.org>, 'Ben Segall' <bsegall@...gle.com>, 'Mel Gorman'
	<mgorman@...e.de>, 'Valentin Schneider' <vschneid@...hat.com>,
	"'linux-kernel@...r.kernel.org'" <linux-kernel@...r.kernel.org>
Subject: [PATCH v4 1/3] sched_ext: Fix pnt_seq calculation when picking the
 next task

Now as the rq->scx.pnt_seq is only incremented when the target CPU
switches from one SCX task to one non-SCX task, the pair CPU would
not exit the busy-wait state reasonably in scx_pair.

In scx_pair, rq->scx.pnt_seq is introduced to improve exclusion
guarantees. The invoking CPU calls scx_bpf_kick_cpu() with
SCX_KICK_WAIT and enters the busy-wait state. It should exit this
state once the target CPU has entered the rescheduling path with
rq->scx.pnt_seq incremented.

So, pnt_seq calculation is moved to put_prev_set_next_task(), it
will be incremented for any task switches on the target CPU, then
the invoking CPU can exit the busy-wait state properly.

Signed-off-by: Wenfang Liu liuwenfang@...or.com
---
 kernel/sched/ext.c   | 10 +---------
 kernel/sched/fair.c  |  2 +-
 kernel/sched/sched.h | 30 +++++++++++++++++++++++++++++-
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f5133249f..ba99739d7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3191,14 +3191,6 @@ static void switch_class(struct rq *rq, struct task_struct *next)
 {
 	const struct sched_class *next_class = next->sched_class;
 
-#ifdef CONFIG_SMP
-	/*
-	 * Pairs with the smp_load_acquire() issued by a CPU in
-	 * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
-	 * resched.
-	 */
-	smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
-#endif
 	if (!static_branch_unlikely(&scx_ops_cpu_preempt))
 		return;
 
@@ -5966,7 +5958,7 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
 		if (cpu != cpu_of(this_rq)) {
 			/*
 			 * Pairs with smp_store_release() issued by this CPU in
-			 * switch_class() on the resched path.
+			 * __put_prev_set_next_scx() on the resched path.
 			 *
 			 * We busy-wait here to guarantee that no other task can
 			 * be scheduled on our core before the target CPU has
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995..21214b3fa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8885,7 +8885,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
 	if (prev->sched_class != &fair_sched_class)
 		goto simple;
 
-	__put_prev_set_next_dl_server(rq, prev, p);
+	__put_prev_set_next(rq, prev, p);
 
 	/*
 	 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47972f34e..435de61c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1738,12 +1738,32 @@ static inline void scx_rq_clock_invalidate(struct rq *rq)
 	WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
 }
 
+static inline void __put_prev_set_next_scx(struct rq *rq,
+					   struct task_struct *prev,
+					   struct task_struct *next)
+{
+	if (!scx_enabled())
+		return;
+
+#ifdef CONFIG_SMP
+	/*
+	 * Pairs with the smp_load_acquire() issued by a CPU in
+	 * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
+	 * resched.
+	 */
+	smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
+#endif
+}
+
 #else /* !CONFIG_SCHED_CLASS_EXT */
 #define scx_enabled()		false
 #define scx_switched_all()	false
 
 static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
 static inline void scx_rq_clock_invalidate(struct rq *rq) {}
+static inline void __put_prev_set_next_scx(struct rq *rq,
+					   struct task_struct *prev,
+					   struct task_struct *next) {}
 #endif /* !CONFIG_SCHED_CLASS_EXT */
 
 /*
@@ -2457,13 +2477,21 @@ __put_prev_set_next_dl_server(struct rq *rq,
 	rq->dl_server = NULL;
 }
 
+static inline void __put_prev_set_next(struct rq *rq,
+				       struct task_struct *prev,
+				       struct task_struct *next)
+{
+	__put_prev_set_next_dl_server(rq, prev, next);
+	__put_prev_set_next_scx(rq, prev, next);
+}
+
 static inline void put_prev_set_next_task(struct rq *rq,
 					  struct task_struct *prev,
 					  struct task_struct *next)
 {
 	WARN_ON_ONCE(rq->curr != prev);
 
-	__put_prev_set_next_dl_server(rq, prev, next);
+	__put_prev_set_next(rq, prev, next);
 
 	if (next == prev)
 		return;
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ