[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8d64c74118c6440f81bcf5a4ac6b9f00@honor.com>
Date: Tue, 19 Aug 2025 06:55:38 +0000
From: liuwenfang <liuwenfang@...or.com>
To: 'Tejun Heo' <tj@...nel.org>
CC: 'David Vernet' <void@...ifault.com>, 'Andrea Righi' <arighi@...dia.com>,
'Changwoo Min' <changwoo@...lia.com>, 'Ingo Molnar' <mingo@...hat.com>,
'Peter Zijlstra' <peterz@...radead.org>, 'Juri Lelli'
<juri.lelli@...hat.com>, 'Vincent Guittot' <vincent.guittot@...aro.org>,
'Dietmar Eggemann' <dietmar.eggemann@....com>, 'Steven Rostedt'
<rostedt@...dmis.org>, 'Ben Segall' <bsegall@...gle.com>, 'Mel Gorman'
<mgorman@...e.de>, 'Valentin Schneider' <vschneid@...hat.com>,
"'linux-kernel@...r.kernel.org'" <linux-kernel@...r.kernel.org>
Subject: [PATCH v4 2/3] sched_ext: Fix cpu_released while RT task and SCX task
are scheduled concurrently
Supposed RT task(RT1) is running on CPU0 and RT task(RT2) is awakened on CPU1,
RT1 becomes sleep and SCX task(SCX1) will be dispatched to CPU0, RT2 will be
placed on CPU0:
CPU0(schedule) CPU1(try_to_wake_up)
set_current_state(TASK_INTERRUPTIBLE) try_to_wake_up # RT2
__schedule select_task_rq # CPU0 is selected
LOCK rq(0)->lock # lock CPU0 rq ttwu_queue
deactivate_task # RT1 LOCK rq(0)->lock # busy waiting
pick_next_task # no more RT tasks on rq |
prev_balance |
balance_scx |
balance_one |
rq->scx.cpu_released = false; |
consume_global_dsq |
consume_dispatch_q |
consume_remote_task |
UNLOCK rq(0)->lock V
LOCK rq(0)->lock # succ
deactivate_task # SCX1 ttwu_do_activate
LOCK rq(0)->lock # busy waiting activate_task # RT2 equeued
| UNLOCK rq(0)->lock
V
LOCK rq(0)->lock # succ
activate_task # SCX1
pick_task # RT2 is picked
put_prev_set_next_task # prev is RT1, next is RT2, rq->scx.cpu_released = false;
UNLOCK rq(0)->lock
At last, RT2 will be running on CPU0 with rq->scx.cpu_released being false, which would
lead the BPF scheduler to make decisions improperly.
So, check the sched class in __put_prev_set_next_scx() to fix the value of
rq->scx.cpu_released.
Signed-off-by: Wenfang Liu liuwenfang@...or.com
---
kernel/sched/ext.c | 8 ++------
kernel/sched/sched.h | 7 +++++++
2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index ba99739d7..98a05025b 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3187,7 +3187,7 @@ preempt_reason_from_class(const struct sched_class *class)
return SCX_CPU_PREEMPT_UNKNOWN;
}
-static void switch_class(struct rq *rq, struct task_struct *next)
+void switch_class(struct rq *rq, struct task_struct *next)
{
const struct sched_class *next_class = next->sched_class;
@@ -3245,7 +3245,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
*/
if (p->scx.slice && !scx_rq_bypassing(rq)) {
dispatch_enqueue(&rq->scx.local_dsq, p, SCX_ENQ_HEAD);
- goto switch_class;
+ return;
}
/*
@@ -3261,10 +3261,6 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p,
do_enqueue_task(rq, p, 0, -1);
}
}
-
-switch_class:
- if (next && next->sched_class != &ext_sched_class)
- switch_class(rq, next);
}
static struct task_struct *first_local_task(struct rq *rq)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 435de61c4..e46becfed 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1738,6 +1738,8 @@ static inline void scx_rq_clock_invalidate(struct rq *rq)
WRITE_ONCE(rq->scx.flags, rq->scx.flags & ~SCX_RQ_CLK_VALID);
}
+extern void switch_class(struct rq *rq, struct task_struct *next);
+
static inline void __put_prev_set_next_scx(struct rq *rq,
struct task_struct *prev,
struct task_struct *next)
@@ -1753,6 +1755,11 @@ static inline void __put_prev_set_next_scx(struct rq *rq,
*/
smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
#endif
+
+ if (next->sched_class == &ext_sched_class)
+ return;
+
+ switch_class(rq, next);
}
#else /* !CONFIG_SCHED_CLASS_EXT */
--
2.17.1
Powered by blists - more mailing lists