[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220628092259.330171-1-nsaenzju@redhat.com>
Date: Tue, 28 Jun 2022 11:22:59 +0200
From: Nicolas Saenz Julienne <nsaenzju@...hat.com>
To: linux-kernel@...r.kernel.org, fweisbec@...il.com
Cc: bristot@...hat.com, vschneid@...hat.com, cmetcalf@...hip.com,
mgorman@...e.de, bsegall@...gle.com, rostedt@...dmis.org,
dietmar.eggemann@....com, vincent.guittot@...aro.org,
juri.lelli@...hat.com, peterz@...radead.org, mingo@...hat.com,
mtosatti@...hat.com, Nicolas Saenz Julienne <nsaenzju@...hat.com>
Subject: [PATCH] nohz/full, sched/rt: Fix missed tick-reenabling bug in dequeue_task_rt
dequeue_task_rt() only decrements 'rt_rq->rt_nr_running' after having
called sched_update_tick_dependency() preventing it from re-enabling the
tick on systems that no longer have pending SCHED_RT tasks but have
multiple runnable SCHED_OTHER tasks:
dequeue_task_rt()
dequeue_rt_entity()
dequeue_rt_stack()
dequeue_top_rt_rq()
sub_nr_running() // decrements rq->nr_running
sched_update_tick_dependency()
sched_can_stop_tick() // checks rq->rt.rt_nr_running,
...
__dequeue_rt_entity()
dec_rt_tasks() // decrements rq->rt.rt_nr_running
...
Every other scheduler class performs the operation in the opposite
order, and sched_update_tick_dependency() expects the values to be
updated as such. So avoid the misbehaviour by inverting the order in
which the above operations are performed in the RT scheduler.
Fixes: 76d92ac305f2 ("sched: Migrate sched to use new tick dependency mask model")
Signed-off-by: Nicolas Saenz Julienne <nsaenzju@...hat.com>
---
kernel/sched/rt.c | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 8c9ed96648409..55f39c8f42032 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq)
#endif /* CONFIG_SMP */
static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
-static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
+static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count);
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
@@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
rt_se = rt_rq->tg->rt_se[cpu];
if (!rt_se) {
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
}
@@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
{
- dequeue_top_rt_rq(rt_rq);
+ dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running);
}
static inline int rt_rq_throttled(struct rt_rq *rt_rq)
@@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq)
}
static void
-dequeue_top_rt_rq(struct rt_rq *rt_rq)
+dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
BUG_ON(!rq->nr_running);
- sub_nr_running(rq, rt_rq->rt_nr_running);
+ sub_nr_running(rq, count);
rt_rq->rt_queued = 0;
}
@@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct sched_rt_entity *back = NULL;
+ unsigned int rt_nr_running;
for_each_sched_rt_entity(rt_se) {
rt_se->back = back;
back = rt_se;
}
- dequeue_top_rt_rq(rt_rq_of_se(back));
+ rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
for (rt_se = back; rt_se; rt_se = rt_se->back) {
if (on_rt_rq(rt_se))
__dequeue_rt_entity(rt_se, flags);
}
+
+ dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
--
2.36.1
Powered by blists - more mailing lists