[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240213055554.1802415-6-ankur.a.arora@oracle.com>
Date: Mon, 12 Feb 2024 21:55:29 -0800
From: Ankur Arora <ankur.a.arora@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, peterz@...radead.org, torvalds@...ux-foundation.org,
paulmck@...nel.org, akpm@...ux-foundation.org, luto@...nel.org,
bp@...en8.de, dave.hansen@...ux.intel.com, hpa@...or.com,
mingo@...hat.com, juri.lelli@...hat.com, vincent.guittot@...aro.org,
willy@...radead.org, mgorman@...e.de, jpoimboe@...nel.org,
mark.rutland@....com, jgross@...e.com, andrew.cooper3@...rix.com,
bristot@...nel.org, mathieu.desnoyers@...icios.com,
geert@...ux-m68k.org, glaubitz@...sik.fu-berlin.de,
anton.ivanov@...bridgegreys.com, mattst88@...il.com,
krypton@...ich-teichert.org, rostedt@...dmis.org,
David.Laight@...LAB.COM, richard@....at, mjguzik@...il.com,
jon.grimm@....com, bharata@....com, raghavendra.kt@....com,
boris.ostrovsky@...cle.com, konrad.wilk@...cle.com,
Ankur Arora <ankur.a.arora@...cle.com>
Subject: [PATCH 05/30] sched: *_tsk_need_resched() now takes resched_t as param
*_tsk_need_resched() now test for the immediacy of the need-resched.
These interfaces are primarily used in the scheduler and RCU. Outside
of the scheduler, set_tsk_need_resched() is typically used to force
a context switch by setting need-resched and folding it in. Update
those calls with set_tsk_need_resched(..., NR_now).
For scheduler usage, preserve the current semantics by using
set_tsk_need_resched(..., NR_now), test_tsk_need_resched(..., NR_now).
Note that clear_tsk_need_resched() is only used from __schedule()
to do any clearing needing before switching context. Now it clears
all the need-resched flags.
Cc: Peter Ziljstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Juri Lelli <juri.lelli@...hat.com>
Cc: Vincent Guittot <vincent.guittot@...aro.org>
Cc: Paul E. McKenney <paulmck@...nel.org>
Originally-by: Thomas Gleixner <tglx@...utronix.de>
Link: https://lore.kernel.org/lkml/87jzshhexi.ffs@tglx/
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
arch/s390/mm/pfault.c | 2 +-
include/linux/sched.h | 30 ++++++++++++++++++++++++------
kernel/rcu/tiny.c | 2 +-
kernel/rcu/tree.c | 4 ++--
kernel/rcu/tree_exp.h | 4 ++--
kernel/rcu/tree_plugin.h | 4 ++--
kernel/rcu/tree_stall.h | 2 +-
kernel/sched/core.c | 9 +++++----
kernel/sched/deadline.c | 4 ++--
kernel/sched/fair.c | 2 +-
kernel/sched/idle.c | 2 +-
kernel/sched/rt.c | 4 ++--
12 files changed, 44 insertions(+), 25 deletions(-)
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..4e075059d2e7 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -198,7 +198,7 @@ static void pfault_interrupt(struct ext_code ext_code,
* return to userspace schedule() to block.
*/
__set_current_state(TASK_UNINTERRUPTIBLE);
- set_tsk_need_resched(tsk);
+ set_tsk_need_resched(tsk, NR_now);
set_preempt_need_resched();
}
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e790860d89c3..d226c2920cff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1948,19 +1948,36 @@ static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
return test_ti_thread_flag(task_thread_info(tsk), flag);
}
-static inline void set_tsk_need_resched(struct task_struct *tsk)
+/*
+ * With !CONFIG_PREEMPT_AUTO, tif_resched(NR_lazy) reduces to
+ * tif_resched(NR_now). Add a check in the helpers below to ensure
+ * we don't touch the tif_reshed(NR_now) bit unnecessarily.
+ */
+static inline void set_tsk_need_resched(struct task_struct *tsk, resched_t rs)
{
- set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+ set_tsk_thread_flag(tsk, tif_resched(rs));
+ else
+ /*
+ * NR_lazy is only touched under CONFIG_PREEMPT_AUTO.
+ */
+ BUG();
}
static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
- clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ clear_tsk_thread_flag(tsk, tif_resched(NR_now));
+
+ if (IS_ENABLED(CONFIG_PREEMPT_AUTO))
+ clear_tsk_thread_flag(tsk, tif_resched(NR_lazy));
}
-static inline bool test_tsk_need_resched(struct task_struct *tsk)
+static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
{
- return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+ return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
+ else
+ return false;
}
/*
@@ -2104,7 +2121,8 @@ static __always_inline bool need_resched(void)
static __always_inline bool need_resched_lazy(void)
{
- return unlikely(tif_need_resched(NR_lazy));
+ return IS_ENABLED(CONFIG_PREEMPT_AUTO) &&
+ unlikely(tif_need_resched(NR_lazy));
}
/*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fec804b79080..a085d337434e 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -73,7 +73,7 @@ void rcu_sched_clock_irq(int user)
if (user) {
rcu_qs();
} else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ae851777806..d6ac2b703a6d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2250,7 +2250,7 @@ void rcu_sched_clock_irq(int user)
if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
/* Idle and userspace execution already are quiescent states. */
if (!rcu_is_cpu_rrupt_from_idle() && !user) {
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
__this_cpu_write(rcu_data.rcu_urgent_qs, false);
@@ -2409,7 +2409,7 @@ static __latent_entropy void rcu_core(void)
if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
rcu_preempt_deferred_qs(current);
} else if (rcu_preempt_need_deferred_qs(current)) {
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d7cea5d591f..34d0bbd93343 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -759,7 +759,7 @@ static void rcu_exp_handler(void *unused)
rcu_report_exp_rdp(rdp);
} else {
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
- set_tsk_need_resched(t);
+ set_tsk_need_resched(t, NR_now);
set_preempt_need_resched();
}
return;
@@ -860,7 +860,7 @@ static void rcu_exp_need_qs(void)
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
/* Store .exp before .rcu_urgent_qs. */
smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 41021080ad25..26c79246873a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -658,7 +658,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
// Also if no expediting and no possible deboosting,
// slow is OK. Plus nohz_full CPUs eventually get
// tick enabled.
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
@@ -725,7 +725,7 @@ static void rcu_flavor_sched_clock_irq(int user)
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
/* No QS, force context switch if deferred. */
if (rcu_preempt_need_deferred_qs(t)) {
- set_tsk_need_resched(t);
+ set_tsk_need_resched(t, NR_now);
set_preempt_need_resched();
}
} else if (rcu_preempt_need_deferred_qs(t)) {
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5d666428546b..9d4aa4fde5ae 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -712,7 +712,7 @@ static void print_cpu_stall(unsigned long gps)
* progress and it could be we're stuck in kernel space without context
* switches for an entirely unreasonable amount of time.
*/
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..41c3bd49a700 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,7 +933,7 @@ static bool set_nr_if_polling(struct task_struct *p)
#else
static inline bool set_nr_and_not_polling(struct task_struct *p)
{
- set_tsk_need_resched(p);
+ set_tsk_need_resched(p, NR_now);
return true;
}
@@ -1045,13 +1045,13 @@ void resched_curr(struct rq *rq)
lockdep_assert_rq_held(rq);
- if (test_tsk_need_resched(curr))
+ if (test_tsk_need_resched(curr, NR_now))
return;
cpu = cpu_of(rq);
if (cpu == smp_processor_id()) {
- set_tsk_need_resched(curr);
+ set_tsk_need_resched(curr, NR_now);
set_preempt_need_resched();
return;
}
@@ -2247,7 +2247,8 @@ void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
- if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+ if (task_on_rq_queued(rq->curr) &&
+ test_tsk_need_resched(rq->curr, NR_now))
rq_clock_skip_update(rq);
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a04a436af8cc..b4e68cfc3c3a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2035,7 +2035,7 @@ static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
* let us try to decide what's the best thing to do...
*/
if ((p->dl.deadline == rq->curr->dl.deadline) &&
- !test_tsk_need_resched(rq->curr))
+ !test_tsk_need_resched(rq->curr, NR_now))
check_preempt_equal_dl(rq, p);
#endif /* CONFIG_SMP */
}
@@ -2564,7 +2564,7 @@ static void pull_dl_task(struct rq *this_rq)
static void task_woken_dl(struct rq *rq, struct task_struct *p)
{
if (!task_on_cpu(rq, p) &&
- !test_tsk_need_resched(rq->curr) &&
+ !test_tsk_need_resched(rq->curr, NR_now) &&
p->nr_cpus_allowed > 1 &&
dl_task(rq->curr) &&
(rq->curr->nr_cpus_allowed < 2 ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..ae9b237fa32b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8298,7 +8298,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* prevents us from potentially nominating it as a false LAST_BUDDY
* below.
*/
- if (test_tsk_need_resched(curr))
+ if (test_tsk_need_resched(curr, NR_now))
return;
/* Idle tasks are by definition preempted by non-idle tasks. */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index be53d164e267..cd25f71f43a7 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -359,7 +359,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
struct idle_timer *it = container_of(timer, struct idle_timer, timer);
WRITE_ONCE(it->done, 1);
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
return HRTIMER_NORESTART;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3261b067b67e..c57cc8427a57 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1680,7 +1680,7 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
* to move current somewhere else, making room for our non-migratable
* task.
*/
- if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
+ if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr, NR_now))
check_preempt_equal_prio(rq, p);
#endif
}
@@ -2415,7 +2415,7 @@ static void pull_rt_task(struct rq *this_rq)
static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
bool need_to_push = !task_on_cpu(rq, p) &&
- !test_tsk_need_resched(rq->curr) &&
+ !test_tsk_need_resched(rq->curr, NR_now) &&
p->nr_cpus_allowed > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) &&
(rq->curr->nr_cpus_allowed < 2 ||
--
2.31.1
Powered by blists - more mailing lists