[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZnSp5mVp3uhYganb@slm.duckdns.org>
Date: Thu, 20 Jun 2024 12:15:02 -1000
From: Tejun Heo <tj@...nel.org>
To: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Thomas Gleixner <tglx@...utronix.de>, mingo@...hat.com,
peterz@...radead.org, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
bristot@...hat.com, vschneid@...hat.com, ast@...nel.org,
daniel@...earbox.net, andrii@...nel.org, martin.lau@...nel.org,
joshdon@...gle.com, brho@...gle.com, pjt@...gle.com,
derkling@...gle.com, haoluo@...gle.com, dvernet@...a.com,
dschatzberg@...a.com, dskarlat@...cmu.edu, riel@...riel.com,
changwoo@...lia.com, himadrics@...ia.fr, memxor@...il.com,
andrea.righi@...onical.com, joel@...lfernandes.org,
linux-kernel@...r.kernel.org, bpf@...r.kernel.org,
kernel-team@...a.com
Subject: [PATCH sched_ext/for-6.11] sched, sched_ext: Replace
scx_next_task_picked() with sched_class->switch_class()
scx_next_task_picked() is used by sched_ext to notify the BPF scheduler when
a CPU is taken away by a task dispatched from a higher priority sched_class
so that the BPF scheduler can, e.g., punt the task[s] which was running or
were waiting for the CPU to other CPUs.
Replace the sched_ext specific hook scx_next_task_picked() with a new
sched_class operation switch_class().
The changes are straightforward and the code looks better afterwards.
However, when !CONFIG_SCHED_CLASS_EXT, this just ends up adding an unused
hook which is unlikely to be useful to other sched_classes. We can #ifdef
the op with CONFIG_SCHED_CLASS_EXT but then I'm not sure the code
necessarily looks better afterwards.
Please let me know the preference. If adding #ifdef's is preferable, that's
okay too.
Signed-off-by: Tejun Heo <tj@...nel.org>
Suggested-by: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
---
kernel/sched/core.c | 5 ++++-
kernel/sched/ext.c | 20 ++++++++++----------
kernel/sched/ext.h | 4 ----
kernel/sched/sched.h | 2 ++
4 files changed, 16 insertions(+), 15 deletions(-)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5907,7 +5907,10 @@ restart:
for_each_active_class(class) {
p = class->pick_next_task(rq);
if (p) {
- scx_next_task_picked(rq, p, class);
+ const struct sched_class *prev_class = prev->sched_class;
+
+ if (class != prev_class && prev_class->switch_class)
+ prev_class->switch_class(rq, p);
return p;
}
}
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2749,10 +2749,9 @@ preempt_reason_from_class(const struct s
return SCX_CPU_PREEMPT_UNKNOWN;
}
-void scx_next_task_picked(struct rq *rq, struct task_struct *p,
- const struct sched_class *active)
+static void switch_class_scx(struct rq *rq, struct task_struct *next)
{
- lockdep_assert_rq_held(rq);
+ const struct sched_class *next_class = next->sched_class;
if (!scx_enabled())
return;
@@ -2769,12 +2768,11 @@ void scx_next_task_picked(struct rq *rq,
/*
* The callback is conceptually meant to convey that the CPU is no
- * longer under the control of SCX. Therefore, don't invoke the
- * callback if the CPU is is staying on SCX, or going idle (in which
- * case the SCX scheduler has actively decided not to schedule any
- * tasks on the CPU).
+ * longer under the control of SCX. Therefore, don't invoke the callback
+ * if the next class is below SCX (in which case the BPF scheduler has
+ * actively decided not to schedule any tasks on the CPU).
*/
- if (likely(active >= &ext_sched_class))
+ if (sched_class_above(&ext_sched_class, next_class))
return;
/*
@@ -2789,8 +2787,8 @@ void scx_next_task_picked(struct rq *rq,
if (!rq->scx.cpu_released) {
if (SCX_HAS_OP(cpu_release)) {
struct scx_cpu_release_args args = {
- .reason = preempt_reason_from_class(active),
- .task = p,
+ .reason = preempt_reason_from_class(next_class),
+ .task = next,
};
SCX_CALL_OP(SCX_KF_CPU_RELEASE,
@@ -3496,6 +3494,8 @@ DEFINE_SCHED_CLASS(ext) = {
.put_prev_task = put_prev_task_scx,
.set_next_task = set_next_task_scx,
+ .switch_class = switch_class_scx,
+
#ifdef CONFIG_SMP
.balance = balance_scx,
.select_task_rq = select_task_rq_scx,
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -33,8 +33,6 @@ static inline bool task_on_scx(const str
return scx_enabled() && p->sched_class == &ext_sched_class;
}
-void scx_next_task_picked(struct rq *rq, struct task_struct *p,
- const struct sched_class *active);
void scx_tick(struct rq *rq);
void init_scx_entity(struct sched_ext_entity *scx);
void scx_pre_fork(struct task_struct *p);
@@ -82,8 +80,6 @@ bool scx_prio_less(const struct task_str
#define scx_enabled() false
#define scx_switched_all() false
-static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p,
- const struct sched_class *active) {}
static inline void scx_tick(struct rq *rq) {}
static inline void scx_pre_fork(struct task_struct *p) {}
static inline int scx_fork(struct task_struct *p) { return 0; }
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2355,6 +2355,8 @@ struct sched_class {
void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first);
+ void (*switch_class)(struct rq *rq, struct task_struct *next);
+
#ifdef CONFIG_SMP
int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);
Powered by blists - more mailing lists