lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240213055554.1802415-6-ankur.a.arora@oracle.com>
Date: Mon, 12 Feb 2024 21:55:29 -0800
From: Ankur Arora <ankur.a.arora@...cle.com>
To: linux-kernel@...r.kernel.org
Cc: tglx@...utronix.de, peterz@...radead.org, torvalds@...ux-foundation.org,
        paulmck@...nel.org, akpm@...ux-foundation.org, luto@...nel.org,
        bp@...en8.de, dave.hansen@...ux.intel.com, hpa@...or.com,
        mingo@...hat.com, juri.lelli@...hat.com, vincent.guittot@...aro.org,
        willy@...radead.org, mgorman@...e.de, jpoimboe@...nel.org,
        mark.rutland@....com, jgross@...e.com, andrew.cooper3@...rix.com,
        bristot@...nel.org, mathieu.desnoyers@...icios.com,
        geert@...ux-m68k.org, glaubitz@...sik.fu-berlin.de,
        anton.ivanov@...bridgegreys.com, mattst88@...il.com,
        krypton@...ich-teichert.org, rostedt@...dmis.org,
        David.Laight@...LAB.COM, richard@....at, mjguzik@...il.com,
        jon.grimm@....com, bharata@....com, raghavendra.kt@....com,
        boris.ostrovsky@...cle.com, konrad.wilk@...cle.com,
        Ankur Arora <ankur.a.arora@...cle.com>
Subject: [PATCH 05/30] sched: *_tsk_need_resched() now takes resched_t as param

*_tsk_need_resched() now test for the immediacy of the need-resched.

These interfaces are primarily used in the scheduler and RCU. Outside
of the scheduler, set_tsk_need_resched() is typically used to force
a context switch by setting need-resched and folding it in. Update
those calls with set_tsk_need_resched(..., NR_now).

For scheduler usage, preserve the current semantics by using
set_tsk_need_resched(..., NR_now), test_tsk_need_resched(..., NR_now).

Note that clear_tsk_need_resched() is only used from __schedule()
to do any clearing needing before switching context. Now it clears
all the need-resched flags.

Cc: Peter Ziljstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Juri Lelli <juri.lelli@...hat.com>
Cc: Vincent Guittot <vincent.guittot@...aro.org>
Cc: Paul E. McKenney <paulmck@...nel.org>
Originally-by: Thomas Gleixner <tglx@...utronix.de>
Link: https://lore.kernel.org/lkml/87jzshhexi.ffs@tglx/
Signed-off-by: Ankur Arora <ankur.a.arora@...cle.com>
---
 arch/s390/mm/pfault.c    |  2 +-
 include/linux/sched.h    | 30 ++++++++++++++++++++++++------
 kernel/rcu/tiny.c        |  2 +-
 kernel/rcu/tree.c        |  4 ++--
 kernel/rcu/tree_exp.h    |  4 ++--
 kernel/rcu/tree_plugin.h |  4 ++--
 kernel/rcu/tree_stall.h  |  2 +-
 kernel/sched/core.c      |  9 +++++----
 kernel/sched/deadline.c  |  4 ++--
 kernel/sched/fair.c      |  2 +-
 kernel/sched/idle.c      |  2 +-
 kernel/sched/rt.c        |  4 ++--
 12 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..4e075059d2e7 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -198,7 +198,7 @@ static void pfault_interrupt(struct ext_code ext_code,
 			 * return to userspace schedule() to block.
 			 */
 			__set_current_state(TASK_UNINTERRUPTIBLE);
-			set_tsk_need_resched(tsk);
+			set_tsk_need_resched(tsk, NR_now);
 			set_preempt_need_resched();
 		}
 	}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e790860d89c3..d226c2920cff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1948,19 +1948,36 @@ static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
 	return test_ti_thread_flag(task_thread_info(tsk), flag);
 }
 
-static inline void set_tsk_need_resched(struct task_struct *tsk)
+/*
+ * With !CONFIG_PREEMPT_AUTO, tif_resched(NR_lazy) reduces to
+ * tif_resched(NR_now). Add a check in the helpers below to ensure
+ * we don't touch the tif_reshed(NR_now) bit unnecessarily.
+ */
+static inline void set_tsk_need_resched(struct task_struct *tsk, resched_t rs)
 {
-	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+		set_tsk_thread_flag(tsk, tif_resched(rs));
+	else
+		/*
+		 * NR_lazy is only touched under CONFIG_PREEMPT_AUTO.
+		 */
+		BUG();
 }
 
 static inline void clear_tsk_need_resched(struct task_struct *tsk)
 {
-	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+	clear_tsk_thread_flag(tsk, tif_resched(NR_now));
+
+	if (IS_ENABLED(CONFIG_PREEMPT_AUTO))
+		clear_tsk_thread_flag(tsk, tif_resched(NR_lazy));
 }
 
-static inline bool test_tsk_need_resched(struct task_struct *tsk)
+static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
 {
-	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+	if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+		return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
+	else
+		return false;
 }
 
 /*
@@ -2104,7 +2121,8 @@ static __always_inline bool need_resched(void)
 
 static __always_inline bool need_resched_lazy(void)
 {
-	return unlikely(tif_need_resched(NR_lazy));
+	return IS_ENABLED(CONFIG_PREEMPT_AUTO) &&
+		unlikely(tif_need_resched(NR_lazy));
 }
 
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fec804b79080..a085d337434e 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -73,7 +73,7 @@ void rcu_sched_clock_irq(int user)
 	if (user) {
 		rcu_qs();
 	} else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
-		set_tsk_need_resched(current);
+		set_tsk_need_resched(current, NR_now);
 		set_preempt_need_resched();
 	}
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ae851777806..d6ac2b703a6d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2250,7 +2250,7 @@ void rcu_sched_clock_irq(int user)
 	if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
 		/* Idle and userspace execution already are quiescent states. */
 		if (!rcu_is_cpu_rrupt_from_idle() && !user) {
-			set_tsk_need_resched(current);
+			set_tsk_need_resched(current, NR_now);
 			set_preempt_need_resched();
 		}
 		__this_cpu_write(rcu_data.rcu_urgent_qs, false);
@@ -2409,7 +2409,7 @@ static __latent_entropy void rcu_core(void)
 	if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
 		rcu_preempt_deferred_qs(current);
 	} else if (rcu_preempt_need_deferred_qs(current)) {
-		set_tsk_need_resched(current);
+		set_tsk_need_resched(current, NR_now);
 		set_preempt_need_resched();
 	}
 
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d7cea5d591f..34d0bbd93343 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -759,7 +759,7 @@ static void rcu_exp_handler(void *unused)
 			rcu_report_exp_rdp(rdp);
 		} else {
 			WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
-			set_tsk_need_resched(t);
+			set_tsk_need_resched(t, NR_now);
 			set_preempt_need_resched();
 		}
 		return;
@@ -860,7 +860,7 @@ static void rcu_exp_need_qs(void)
 	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
 	/* Store .exp before .rcu_urgent_qs. */
 	smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
-	set_tsk_need_resched(current);
+	set_tsk_need_resched(current, NR_now);
 	set_preempt_need_resched();
 }
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 41021080ad25..26c79246873a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -658,7 +658,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 			// Also if no expediting and no possible deboosting,
 			// slow is OK.  Plus nohz_full CPUs eventually get
 			// tick enabled.
-			set_tsk_need_resched(current);
+			set_tsk_need_resched(current, NR_now);
 			set_preempt_need_resched();
 			if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
 			    expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
@@ -725,7 +725,7 @@ static void rcu_flavor_sched_clock_irq(int user)
 	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
 		/* No QS, force context switch if deferred. */
 		if (rcu_preempt_need_deferred_qs(t)) {
-			set_tsk_need_resched(t);
+			set_tsk_need_resched(t, NR_now);
 			set_preempt_need_resched();
 		}
 	} else if (rcu_preempt_need_deferred_qs(t)) {
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5d666428546b..9d4aa4fde5ae 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -712,7 +712,7 @@ static void print_cpu_stall(unsigned long gps)
 	 * progress and it could be we're stuck in kernel space without context
 	 * switches for an entirely unreasonable amount of time.
 	 */
-	set_tsk_need_resched(current);
+	set_tsk_need_resched(current, NR_now);
 	set_preempt_need_resched();
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..41c3bd49a700 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,7 +933,7 @@ static bool set_nr_if_polling(struct task_struct *p)
 #else
 static inline bool set_nr_and_not_polling(struct task_struct *p)
 {
-	set_tsk_need_resched(p);
+	set_tsk_need_resched(p, NR_now);
 	return true;
 }
 
@@ -1045,13 +1045,13 @@ void resched_curr(struct rq *rq)
 
 	lockdep_assert_rq_held(rq);
 
-	if (test_tsk_need_resched(curr))
+	if (test_tsk_need_resched(curr, NR_now))
 		return;
 
 	cpu = cpu_of(rq);
 
 	if (cpu == smp_processor_id()) {
-		set_tsk_need_resched(curr);
+		set_tsk_need_resched(curr, NR_now);
 		set_preempt_need_resched();
 		return;
 	}
@@ -2247,7 +2247,8 @@ void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
 	 * A queue event has occurred, and we're going to schedule.  In
 	 * this case, we can save a useless back to back clock update.
 	 */
-	if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+	if (task_on_rq_queued(rq->curr) &&
+	    test_tsk_need_resched(rq->curr, NR_now))
 		rq_clock_skip_update(rq);
 }
 
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a04a436af8cc..b4e68cfc3c3a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2035,7 +2035,7 @@ static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
 	 * let us try to decide what's the best thing to do...
 	 */
 	if ((p->dl.deadline == rq->curr->dl.deadline) &&
-	    !test_tsk_need_resched(rq->curr))
+	    !test_tsk_need_resched(rq->curr, NR_now))
 		check_preempt_equal_dl(rq, p);
 #endif /* CONFIG_SMP */
 }
@@ -2564,7 +2564,7 @@ static void pull_dl_task(struct rq *this_rq)
 static void task_woken_dl(struct rq *rq, struct task_struct *p)
 {
 	if (!task_on_cpu(rq, p) &&
-	    !test_tsk_need_resched(rq->curr) &&
+	    !test_tsk_need_resched(rq->curr, NR_now) &&
 	    p->nr_cpus_allowed > 1 &&
 	    dl_task(rq->curr) &&
 	    (rq->curr->nr_cpus_allowed < 2 ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..ae9b237fa32b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8298,7 +8298,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
 	 * prevents us from potentially nominating it as a false LAST_BUDDY
 	 * below.
 	 */
-	if (test_tsk_need_resched(curr))
+	if (test_tsk_need_resched(curr, NR_now))
 		return;
 
 	/* Idle tasks are by definition preempted by non-idle tasks. */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index be53d164e267..cd25f71f43a7 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -359,7 +359,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
 	struct idle_timer *it = container_of(timer, struct idle_timer, timer);
 
 	WRITE_ONCE(it->done, 1);
-	set_tsk_need_resched(current);
+	set_tsk_need_resched(current, NR_now);
 
 	return HRTIMER_NORESTART;
 }
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3261b067b67e..c57cc8427a57 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1680,7 +1680,7 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
 	 * to move current somewhere else, making room for our non-migratable
 	 * task.
 	 */
-	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
+	if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr, NR_now))
 		check_preempt_equal_prio(rq, p);
 #endif
 }
@@ -2415,7 +2415,7 @@ static void pull_rt_task(struct rq *this_rq)
 static void task_woken_rt(struct rq *rq, struct task_struct *p)
 {
 	bool need_to_push = !task_on_cpu(rq, p) &&
-			    !test_tsk_need_resched(rq->curr) &&
+			    !test_tsk_need_resched(rq->curr, NR_now) &&
 			    p->nr_cpus_allowed > 1 &&
 			    (dl_task(rq->curr) || rt_task(rq->curr)) &&
 			    (rq->curr->nr_cpus_allowed < 2 ||
-- 
2.31.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ