[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231107215742.363031-43-ankur.a.arora@oracle.com>
Date: Tue, 7 Nov 2023 13:57:28 -0800
From: Ankur Arora <ankur.a.arora@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, peterz@...radead.org,
torvalds@...ux-foundation.org, paulmck@...nel.org,
linux-mm@...ck.org, x86@...nel.org, akpm@...ux-foundation.org,
luto@...nel.org, bp@...en8.de, dave.hansen@...ux.intel.com,
hpa@...or.com, mingo@...hat.com, juri.lelli@...hat.com,
vincent.guittot@...aro.org, willy@...radead.org, mgorman@...e.de,
jon.grimm@....com, bharata@....com, raghavendra.kt@....com,
boris.ostrovsky@...cle.com, konrad.wilk@...cle.com,
jgross@...e.com, andrew.cooper3@...rix.com, mingo@...nel.org,
bristot@...nel.org, mathieu.desnoyers@...icios.com,
geert@...ux-m68k.org, glaubitz@...sik.fu-berlin.de,
anton.ivanov@...bridgegreys.com, mattst88@...il.com,
krypton@...ich-teichert.org, rostedt@...dmis.org,
David.Laight@...LAB.COM, richard@....at, mjguzik@...il.com,
Ankur Arora <ankur.a.arora@...cle.com>
Subject: [RFC PATCH 42/86] sched: force preemption on tick expiration
The kernel can have long running tasks which don't pass through
preemption points for prolonged periods and so will never see
a scheduler's polite TIF_NEED_RESCHED_LAZY.
Force a reschedule at the next tick by upgrading to TIF_NEED_RESCHED,
which will get folded into the preempt_count and a reschedule at the
next safe preemption point.
TODO: deadline scheduler.
Originally-by: Thomas Gleixner <tglx@...utronix.de>
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
kernel/sched/fair.c | 32 +++++++++++++++++++++++---------
kernel/sched/rt.c | 7 ++++++-
kernel/sched/sched.h | 1 +
3 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4d86c618ffa2..fe7e5e9b2207 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
* XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
* this is probably good enough.
*/
-static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void update_deadline(struct cfs_rq *cfs_rq,
+ struct sched_entity *se, bool tick)
{
+ struct rq *rq = rq_of(cfs_rq);
+
if ((s64)(se->vruntime - se->deadline) < 0)
return;
@@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
*/
se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
+ if (cfs_rq->nr_running < 2)
+ return;
+
/*
- * The task has consumed its request, reschedule.
+ * The task has consumed its request, reschedule; eagerly
+ * if it ignored our last lazy reschedule.
*/
- if (cfs_rq->nr_running > 1) {
- resched_curr(rq_of(cfs_rq));
- clear_buddies(cfs_rq, se);
- }
+ if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
+ __resched_curr(rq, RESCHED_eager);
+ else
+ resched_curr(rq);
+
+ clear_buddies(cfs_rq, se);
}
#include "pelt.h"
@@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
/*
* Update the current task's runtime statistics.
*/
-static void update_curr(struct cfs_rq *cfs_rq)
+static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
{
struct sched_entity *curr = cfs_rq->curr;
u64 now = rq_clock_task(rq_of(cfs_rq));
@@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
schedstat_add(cfs_rq->exec_clock, delta_exec);
curr->vruntime += calc_delta_fair(delta_exec, curr);
- update_deadline(cfs_rq, curr);
+ update_deadline(cfs_rq, curr, tick);
update_min_vruntime(cfs_rq);
if (entity_is_task(curr)) {
@@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
account_cfs_rq_runtime(cfs_rq, delta_exec);
}
+static void update_curr(struct cfs_rq *cfs_rq)
+{
+ __update_curr(cfs_rq, false);
+}
+
static void update_curr_fair(struct rq *rq)
{
update_curr(cfs_rq_of(&rq->curr->se));
@@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
/*
* Update run-time statistics of the 'current'.
*/
- update_curr(cfs_rq);
+ __update_curr(cfs_rq, true);
/*
* Ensure that runnable average is periodically updated.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a79ce6746dd0..5fdb93f1b87e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2664,7 +2664,12 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
for_each_sched_rt_entity(rt_se) {
if (rt_se->run_list.prev != rt_se->run_list.next) {
requeue_task_rt(rq, p, 0);
- resched_curr(rq);
+
+ if (test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
+ __resched_curr(rq, RESCHED_eager);
+ else
+ resched_curr(rq);
+
return;
}
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9e1329a4e890..e29a8897f573 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2434,6 +2434,7 @@ extern void init_sched_fair_class(void);
extern void reweight_task(struct task_struct *p, int prio);
+extern void __resched_curr(struct rq *rq, resched_t rs);
extern void resched_curr(struct rq *rq);
extern void resched_cpu(int cpu);
--
2.31.1
Powered by blists - more mailing lists