[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240727105029.631948434@infradead.org>
Date: Sat, 27 Jul 2024 12:27:44 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: mingo@...hat.com,
peterz@...radead.org,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
linux-kernel@...r.kernel.org
Cc: kprateek.nayak@....com,
wuyun.abel@...edance.com,
youssefesmat@...omium.org,
tglx@...utronix.de,
efault@....de
Subject: [PATCH 12/24] sched/fair: Prepare exit/cleanup paths for delayed_dequeue
When dequeue_task() is delayed it becomes possible to exit a task (or
cgroup) that is still enqueued. Ensure things are dequeued before
freeing.
NOTE: switched_from_fair() causes spurious wakeups due to clearing
sched_delayed after enqueueing a task in another class that should've
been dequeued. This *should* be harmless.
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
kernel/sched/fair.c | 61 ++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 48 insertions(+), 13 deletions(-)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8318,7 +8318,20 @@ static void migrate_task_rq_fair(struct
static void task_dead_fair(struct task_struct *p)
{
- remove_entity_load_avg(&p->se);
+ struct sched_entity *se = &p->se;
+
+ if (se->sched_delayed) {
+ struct rq_flags rf;
+ struct rq *rq;
+
+ rq = task_rq_lock(p, &rf);
+ update_rq_clock(rq);
+ if (se->sched_delayed)
+ dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+ task_rq_unlock(rq, p, &rf);
+ }
+
+ remove_entity_load_avg(se);
}
/*
@@ -12817,10 +12830,26 @@ static void attach_task_cfs_rq(struct ta
static void switched_from_fair(struct rq *rq, struct task_struct *p)
{
detach_task_cfs_rq(p);
+ /*
+ * Since this is called after changing class, this isn't quite right.
+ * Specifically, this causes the task to get queued in the target class
+ * and experience a 'spurious' wakeup.
+ *
+ * However, since 'spurious' wakeups are harmless, this shouldn't be a
+ * problem.
+ */
+ p->se.sched_delayed = 0;
+ /*
+ * While here, also clear the vlag, it makes little sense to carry that
+ * over the excursion into the new class.
+ */
+ p->se.vlag = 0;
}
static void switched_to_fair(struct rq *rq, struct task_struct *p)
{
+ SCHED_WARN_ON(p->se.sched_delayed);
+
attach_task_cfs_rq(p);
set_task_max_allowed_capacity(p);
@@ -12971,28 +13000,33 @@ void online_fair_sched_group(struct task
void unregister_fair_sched_group(struct task_group *tg)
{
- unsigned long flags;
- struct rq *rq;
int cpu;
destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
for_each_possible_cpu(cpu) {
- if (tg->se[cpu])
- remove_entity_load_avg(tg->se[cpu]);
+ struct cfs_rq *cfs_rq = tg->cfs_rq[cpu];
+ struct sched_entity *se = tg->se[cpu];
+ struct rq *rq = cpu_rq(cpu);
+
+ if (se) {
+ if (se->sched_delayed) {
+ guard(rq_lock_irqsave)(rq);
+ if (se->sched_delayed)
+ dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+ list_del_leaf_cfs_rq(cfs_rq);
+ }
+ remove_entity_load_avg(se);
+ }
/*
* Only empty task groups can be destroyed; so we can speculatively
* check on_list without danger of it being re-added.
*/
- if (!tg->cfs_rq[cpu]->on_list)
- continue;
-
- rq = cpu_rq(cpu);
-
- raw_spin_rq_lock_irqsave(rq, flags);
- list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
- raw_spin_rq_unlock_irqrestore(rq, flags);
+ if (cfs_rq->on_list) {
+ guard(rq_lock_irqsave)(rq);
+ list_del_leaf_cfs_rq(cfs_rq);
+ }
}
}
Powered by blists - more mailing lists