[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241030152142.488737132@infradead.org>
Date: Wed, 30 Oct 2024 16:12:57 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: mingo@...nel.org
Cc: peterz@...radead.org,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
tj@...nel.org,
void@...ifault.com,
linux-kernel@...r.kernel.org
Subject: [RFC][PATCH 2/6] sched: Employ sched_change guards
As proposed a long while ago -- and half done by scx -- wrap the
scheduler's 'change' pattern in a guard helper.
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
include/linux/cleanup.h | 5 +
kernel/sched/core.c | 158 ++++++++++++++++++------------------------------
kernel/sched/ext.c | 33 +++-------
kernel/sched/sched.h | 21 +++---
kernel/sched/syscalls.c | 65 ++++++-------------
5 files changed, 112 insertions(+), 170 deletions(-)
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -297,6 +297,11 @@ static inline class_##_name##_t class_##
#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \
static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
+#define DEFINE_CLASS_IS_UNCONDITIONAL(_name) \
+ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
+ static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
+ { return (void *)1; }
+
#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7099,7 +7099,7 @@ void rt_mutex_post_schedule(void)
*/
void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
{
- int prio, oldprio, queued, running, queue_flag =
+ int prio, oldprio, queue_flag =
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
const struct sched_class *prev_class, *next_class;
struct rq_flags rf;
@@ -7164,52 +7164,42 @@ void rt_mutex_setprio(struct task_struct
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
- queued = task_on_rq_queued(p);
- running = task_current_donor(rq, p);
- if (queued)
- dequeue_task(rq, p, queue_flag);
- if (running)
- put_prev_task(rq, p);
-
- /*
- * Boosting condition are:
- * 1. -rt task is running and holds mutex A
- * --> -dl task blocks on mutex A
- *
- * 2. -dl task is running and holds mutex A
- * --> -dl task blocks on mutex A and could preempt the
- * running task
- */
- if (dl_prio(prio)) {
- if (!dl_prio(p->normal_prio) ||
- (pi_task && dl_prio(pi_task->prio) &&
- dl_entity_preempt(&pi_task->dl, &p->dl))) {
- p->dl.pi_se = pi_task->dl.pi_se;
- queue_flag |= ENQUEUE_REPLENISH;
+ scoped_guard (sched_change, p, queue_flag) {
+ /*
+ * Boosting condition are:
+ * 1. -rt task is running and holds mutex A
+ * --> -dl task blocks on mutex A
+ *
+ * 2. -dl task is running and holds mutex A
+ * --> -dl task blocks on mutex A and could preempt the
+ * running task
+ */
+ if (dl_prio(prio)) {
+ if (!dl_prio(p->normal_prio) ||
+ (pi_task && dl_prio(pi_task->prio) &&
+ dl_entity_preempt(&pi_task->dl, &p->dl))) {
+ p->dl.pi_se = pi_task->dl.pi_se;
+ scope.flags |= ENQUEUE_REPLENISH;
+ } else {
+ p->dl.pi_se = &p->dl;
+ }
+ } else if (rt_prio(prio)) {
+ if (dl_prio(oldprio))
+ p->dl.pi_se = &p->dl;
+ if (oldprio < prio)
+ scope.flags |= ENQUEUE_HEAD;
} else {
- p->dl.pi_se = &p->dl;
+ if (dl_prio(oldprio))
+ p->dl.pi_se = &p->dl;
+ if (rt_prio(oldprio))
+ p->rt.timeout = 0;
}
- } else if (rt_prio(prio)) {
- if (dl_prio(oldprio))
- p->dl.pi_se = &p->dl;
- if (oldprio < prio)
- queue_flag |= ENQUEUE_HEAD;
- } else {
- if (dl_prio(oldprio))
- p->dl.pi_se = &p->dl;
- if (rt_prio(oldprio))
- p->rt.timeout = 0;
- }
-
- p->sched_class = next_class;
- p->prio = prio;
- check_class_changing(rq, p, prev_class);
+ p->sched_class = next_class;
+ p->prio = prio;
- if (queued)
- enqueue_task(rq, p, queue_flag);
- if (running)
- set_next_task(rq, p);
+ check_class_changing(rq, p, prev_class);
+ }
check_class_changed(rq, p, prev_class, oldprio);
out_unlock:
@@ -7819,26 +7809,9 @@ int migrate_task_to(struct task_struct *
*/
void sched_setnuma(struct task_struct *p, int nid)
{
- bool queued, running;
- struct rq_flags rf;
- struct rq *rq;
-
- rq = task_rq_lock(p, &rf);
- queued = task_on_rq_queued(p);
- running = task_current_donor(rq, p);
-
- if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE);
- if (running)
- put_prev_task(rq, p);
-
- p->numa_preferred_nid = nid;
-
- if (queued)
- enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
- if (running)
- set_next_task(rq, p);
- task_rq_unlock(rq, p, &rf);
+ guard(task_rq_lock)(p);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE)
+ p->numa_preferred_nid = nid;
}
#endif /* CONFIG_NUMA_BALANCING */
@@ -8957,9 +8930,10 @@ static void sched_change_group(struct ta
*/
void sched_move_task(struct task_struct *tsk)
{
- int queued, running, queue_flags =
+ unsigned int queue_flags =
DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct task_group *group;
+ bool resched = false;
struct rq *rq;
CLASS(task_rq_lock, rq_guard)(tsk);
@@ -8975,21 +8949,14 @@ void sched_move_task(struct task_struct
update_rq_clock(rq);
- running = task_current_donor(rq, tsk);
- queued = task_on_rq_queued(tsk);
+ scoped_guard (sched_change, tsk, queue_flags) {
+ sched_change_group(tsk, group);
+ scx_move_task(tsk);
+ if (scope.running)
+ resched = true;
+ }
- if (queued)
- dequeue_task(rq, tsk, queue_flags);
- if (running)
- put_prev_task(rq, tsk);
-
- sched_change_group(tsk, group);
- scx_move_task(tsk);
-
- if (queued)
- enqueue_task(rq, tsk, queue_flags);
- if (running) {
- set_next_task(rq, tsk);
+ if (resched) {
/*
* After changing group, the running task may have joined a
* throttled one but it's still the running task. Trigger a
@@ -10580,37 +10547,34 @@ void sched_mm_cid_fork(struct task_struc
}
#endif
-#ifdef CONFIG_SCHED_CLASS_EXT
-void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
- struct sched_enq_and_set_ctx *ctx)
+struct sched_change_ctx sched_change_begin(struct task_struct *p, unsigned int flags)
{
struct rq *rq = task_rq(p);
-
- lockdep_assert_rq_held(rq);
-
- *ctx = (struct sched_enq_and_set_ctx){
+ struct sched_change_ctx ctx = {
.p = p,
- .queue_flags = queue_flags,
+ .flags = flags,
.queued = task_on_rq_queued(p),
.running = task_current(rq, p),
};
- update_rq_clock(rq);
- if (ctx->queued)
- dequeue_task(rq, p, queue_flags | DEQUEUE_NOCLOCK);
- if (ctx->running)
+ lockdep_assert_rq_held(rq);
+
+ if (ctx.queued)
+ dequeue_task(rq, p, flags);
+ if (ctx.running)
put_prev_task(rq, p);
+
+ return ctx;
}
-void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx)
+void sched_change_end(struct sched_change_ctx ctx)
{
- struct rq *rq = task_rq(ctx->p);
+ struct rq *rq = task_rq(ctx.p);
lockdep_assert_rq_held(rq);
- if (ctx->queued)
- enqueue_task(rq, ctx->p, ctx->queue_flags | ENQUEUE_NOCLOCK);
- if (ctx->running)
- set_next_task(rq, ctx->p);
+ if (ctx.queued)
+ enqueue_task(rq, ctx.p, ctx.flags | ENQUEUE_NOCLOCK);
+ if (ctx.running)
+ set_next_task(rq, ctx.p);
}
-#endif /* CONFIG_SCHED_CLASS_EXT */
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4355,11 +4355,10 @@ static void scx_ops_bypass(bool bypass)
*/
list_for_each_entry_safe_reverse(p, n, &rq->scx.runnable_list,
scx.runnable_node) {
- struct sched_enq_and_set_ctx ctx;
-
/* cycling deq/enq is enough, see the function comment */
- sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
- sched_enq_and_set_task(&ctx);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ /* nothing */ ;
+ }
}
rq_unlock_irqrestore(rq, &rf);
@@ -4491,17 +4490,14 @@ static void scx_ops_disable_workfn(struc
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
- struct sched_enq_and_set_ctx ctx;
if (old_class != new_class && p->se.sched_delayed)
dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
- sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
-
- p->sched_class = new_class;
- check_class_changing(task_rq(p), p, old_class);
-
- sched_enq_and_set_task(&ctx);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ p->sched_class = new_class;
+ check_class_changing(task_rq(p), p, old_class);
+ }
check_class_changed(task_rq(p), p, old_class, p->prio);
scx_ops_exit_task(p);
@@ -5206,18 +5202,15 @@ static int scx_ops_enable(struct sched_e
const struct sched_class *old_class = p->sched_class;
const struct sched_class *new_class =
__setscheduler_class(p->policy, p->prio);
- struct sched_enq_and_set_ctx ctx;
if (old_class != new_class && p->se.sched_delayed)
- dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEE_DELAYED);
-
- sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
-
- p->scx.slice = SCX_SLICE_DFL;
- p->sched_class = new_class;
- check_class_changing(task_rq(p), p, old_class);
+ dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
- sched_enq_and_set_task(&ctx);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_MOVE) {
+ p->scx.slice = SCX_SLICE_DFL;
+ p->sched_class = new_class;
+ check_class_changing(task_rq(p), p, old_class);
+ }
check_class_changed(task_rq(p), p, old_class, p->prio);
}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3921,23 +3921,22 @@ static inline void balance_callbacks(str
#endif
-#ifdef CONFIG_SCHED_CLASS_EXT
-/*
- * Used by SCX in the enable/disable paths to move tasks between sched_classes
- * and establish invariants.
- */
-struct sched_enq_and_set_ctx {
+struct sched_change_ctx {
struct task_struct *p;
- int queue_flags;
+ unsigned int flags;
bool queued;
bool running;
};
-void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
- struct sched_enq_and_set_ctx *ctx);
-void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
+struct sched_change_ctx sched_change_begin(struct task_struct *p, unsigned int flags);
+void sched_change_end(struct sched_change_ctx ctx);
-#endif /* CONFIG_SCHED_CLASS_EXT */
+DEFINE_CLASS(sched_change, struct sched_change_ctx,
+ sched_change_end(_T),
+ sched_change_begin(p, flags),
+ struct task_struct *p, unsigned int flags)
+
+DEFINE_CLASS_IS_UNCONDITIONAL(sched_change)
#include "ext.h"
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -64,7 +64,6 @@ static int effective_prio(struct task_st
void set_user_nice(struct task_struct *p, long nice)
{
- bool queued, running;
struct rq *rq;
int old_prio;
@@ -90,22 +89,12 @@ void set_user_nice(struct task_struct *p
return;
}
- queued = task_on_rq_queued(p);
- running = task_current_donor(rq, p);
- if (queued)
- dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
- if (running)
- put_prev_task(rq, p);
-
- p->static_prio = NICE_TO_PRIO(nice);
- set_load_weight(p, true);
- old_prio = p->prio;
- p->prio = effective_prio(p);
-
- if (queued)
- enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
- if (running)
- set_next_task(rq, p);
+ scoped_guard (sched_change, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK) {
+ p->static_prio = NICE_TO_PRIO(nice);
+ set_load_weight(p, true);
+ old_prio = p->prio;
+ p->prio = effective_prio(p);
+ }
/*
* If the task increased its priority or is running and
@@ -528,7 +517,7 @@ int __sched_setscheduler(struct task_str
bool user, bool pi)
{
int oldpolicy = -1, policy = attr->sched_policy;
- int retval, oldprio, newprio, queued, running;
+ int retval, oldprio, newprio;
const struct sched_class *prev_class, *next_class;
struct balance_callback *head;
struct rq_flags rf;
@@ -712,33 +701,25 @@ int __sched_setscheduler(struct task_str
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
- queued = task_on_rq_queued(p);
- running = task_current_donor(rq, p);
- if (queued)
- dequeue_task(rq, p, queue_flags);
- if (running)
- put_prev_task(rq, p);
-
- if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
- __setscheduler_params(p, attr);
- p->sched_class = next_class;
- p->prio = newprio;
- }
- __setscheduler_uclamp(p, attr);
- check_class_changing(rq, p, prev_class);
+ scoped_guard (sched_change, p, queue_flags) {
- if (queued) {
- /*
- * We enqueue to tail when the priority of a task is
- * increased (user space view).
- */
- if (oldprio < p->prio)
- queue_flags |= ENQUEUE_HEAD;
+ if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
+ __setscheduler_params(p, attr);
+ p->sched_class = next_class;
+ p->prio = newprio;
+ }
+ __setscheduler_uclamp(p, attr);
+ check_class_changing(rq, p, prev_class);
- enqueue_task(rq, p, queue_flags);
+ if (scope.queued) {
+ /*
+ * We enqueue to tail when the priority of a task is
+ * increased (user space view).
+ */
+ if (oldprio < p->prio)
+ scope.flags |= ENQUEUE_HEAD;
+ }
}
- if (running)
- set_next_task(rq, p);
check_class_changed(rq, p, prev_class, oldprio);
Powered by blists - more mailing lists