[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <228ebd9e6ed3437996dffe15735a9caa@honor.com>
Date: Tue, 19 Aug 2025 06:52:03 +0000
From: liuwenfang <liuwenfang@...or.com>
To: 'Tejun Heo' <tj@...nel.org>
CC: 'David Vernet' <void@...ifault.com>, 'Andrea Righi' <arighi@...dia.com>,
'Changwoo Min' <changwoo@...lia.com>, 'Ingo Molnar' <mingo@...hat.com>,
'Peter Zijlstra' <peterz@...radead.org>, 'Juri Lelli'
<juri.lelli@...hat.com>, 'Vincent Guittot' <vincent.guittot@...aro.org>,
'Dietmar Eggemann' <dietmar.eggemann@....com>, 'Steven Rostedt'
<rostedt@...dmis.org>, 'Ben Segall' <bsegall@...gle.com>, 'Mel Gorman'
<mgorman@...e.de>, 'Valentin Schneider' <vschneid@...hat.com>,
"'linux-kernel@...r.kernel.org'" <linux-kernel@...r.kernel.org>
Subject: [PATCH v4 1/3] sched_ext: Fix pnt_seq calculation when picking the
next task
Now as the rq->scx.pnt_seq is only incremented when the target CPU
switches from one SCX task to one non-SCX task, the pair CPU would
not exit the busy-wait state reasonably in scx_pair.
In scx_pair, rq->scx.pnt_seq is introduced to improve exclusion
guarantees. The invoking CPU calls scx_bpf_kick_cpu() with
SCX_KICK_WAIT and enters the busy-wait state. It should exit this
state once the target CPU has entered the rescheduling path with
rq->scx.pnt_seq incremented.
So, pnt_seq calculation is moved to put_prev_set_next_task(), it
will be incremented for any task switches on the target CPU, then
the invoking CPU can exit the busy-wait state properly.
Signed-off-by: Wenfang Liu liuwenfang@...or.com
---
kernel/sched/ext.c | 10 +---------
kernel/sched/fair.c | 2 +-
kernel/sched/sched.h | 30 +++++++++++++++++++++++++++++-
3 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index f5133249f..ba99739d7 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3191,14 +3191,6 @@ static void switch_class(struct rq *rq, struct task_struct *next)
{
const struct sched_class *next_class = next->sched_class;
-#ifdef CONFIG_SMP
- /*
- * Pairs with the smp_load_acquire() issued by a CPU in
- * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
- * resched.
- */
- smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
-#endif
if (!static_branch_unlikely(&scx_ops_cpu_preempt))
return;
@@ -5966,7 +5958,7 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
if (cpu != cpu_of(this_rq)) {
/*
* Pairs with smp_store_release() issued by this CPU in
- * switch_class() on the resched path.
+ * __put_prev_set_next_scx() on the resched path.
*
* We busy-wait here to guarantee that no other task can
* be scheduled on our core before the target CPU has
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0fb9bf995..21214b3fa 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8885,7 +8885,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
if (prev->sched_class != &fair_sched_class)
goto simple;
- __put_prev_set_next_dl_server(rq, prev, p);
+ __put_prev_set_next(rq, prev, p);
/*
* Because of the set_next_buddy() in dequeue_task_fair() it is rather
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 47972f34e..435de61c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1738,12 +1738,32 @@ static inline void scx_rq_clock_invalidate(struct rq *rq)
WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
}
+static inline void __put_prev_set_next_scx(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+{
+ if (!scx_enabled())
+ return;
+
+#ifdef CONFIG_SMP
+ /*
+ * Pairs with the smp_load_acquire() issued by a CPU in
+ * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
+ * resched.
+ */
+ smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
+#endif
+}
+
#else /* !CONFIG_SCHED_CLASS_EXT */
#define scx_enabled() false
#define scx_switched_all() false
static inline void scx_rq_clock_update(struct rq *rq, u64 clock) {}
static inline void scx_rq_clock_invalidate(struct rq *rq) {}
+static inline void __put_prev_set_next_scx(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next) {}
#endif /* !CONFIG_SCHED_CLASS_EXT */
/*
@@ -2457,13 +2477,21 @@ __put_prev_set_next_dl_server(struct rq *rq,
rq->dl_server = NULL;
}
+static inline void __put_prev_set_next(struct rq *rq,
+ struct task_struct *prev,
+ struct task_struct *next)
+{
+ __put_prev_set_next_dl_server(rq, prev, next);
+ __put_prev_set_next_scx(rq, prev, next);
+}
+
static inline void put_prev_set_next_task(struct rq *rq,
struct task_struct *prev,
struct task_struct *next)
{
WARN_ON_ONCE(rq->curr != prev);
- __put_prev_set_next_dl_server(rq, prev, next);
+ __put_prev_set_next(rq, prev, next);
if (next == prev)
return;
--
2.17.1
Powered by blists - more mailing lists