lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 5 Jan 2021 10:55:03 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Frederic Weisbecker <frederic@...nel.org>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        "Rafael J . Wysocki" <rafael.j.wysocki@...el.com>,
        Ingo Molnar <mingo@...nel.org>,
        Fabio Estevam <festevam@...il.com>, stable@...r.kernel.org,
        Thomas Gleixner <tglx@...utronix.de>,
        "Paul E . McKenney" <paulmck@...nel.org>,
        Len Brown <lenb@...nel.org>,
        Pengutronix Kernel Team <kernel@...gutronix.de>,
        NXP Linux Team <linux-imx@....com>,
        Daniel Lezcano <daniel.lezcano@...aro.org>,
        Shawn Guo <shawnguo@...nel.org>,
        Sascha Hauer <s.hauer@...gutronix.de>
Subject: Re: [PATCH 1/4] sched/idle: Fix missing need_resched() check after
 rcu_idle_enter()

On Mon, Jan 04, 2021 at 04:20:55PM +0100, Frederic Weisbecker wrote:
> Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
> kthread (rcuog) to be serviced.
> 
> Usually a wake up happening while running the idle task is spotted in
> one of the need_resched() checks carefully placed within the idle loop
> that can break to the scheduler.

Urgh, this is horrific and fragile :/ You having had to audit and fix a
number of rcu_idle_enter() callers should've made you realize that
making rcu_idle_enter() return something would've been saner.

Also, I might hope that when RCU does do that wakeup, it will not have
put RCU in idle mode? So it is a natural 'fail' state for
rcu_idle_enter(), *sigh* it continues to put RCU to sleep, so that needs
fixing too.

I'm thinking that rcu_user_enter() will have the exact same problem? Did
you audit that?

Something like the below, combined with a fixup for all callers (which
the compiler will help us find thanks to __must_check).

---

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index de0826411311..612f66c16078 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -95,10 +95,10 @@ static inline void rcu_sysrq_end(void) { }
 #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
-void rcu_user_enter(void);
+bool __must_check rcu_user_enter(void);
 void rcu_user_exit(void);
 #else
-static inline void rcu_user_enter(void) { }
+static inline bool __must_check rcu_user_enter(void) { return true; }
 static inline void rcu_user_exit(void) { }
 #endif /* CONFIG_NO_HZ_FULL */
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index df578b73960f..9ba0c5d9e99e 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -43,7 +43,7 @@ bool rcu_gp_might_be_stalled(void);
 unsigned long get_state_synchronize_rcu(void);
 void cond_synchronize_rcu(unsigned long oldstate);
 
-void rcu_idle_enter(void);
+bool __must_check rcu_idle_enter(void);
 void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 40e5e3dd253e..13e19e5db0b8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
  * the possibility of usermode upcalls having messed up our count
  * of interrupt nesting level during the prior busy period.
  */
-static noinstr void rcu_eqs_enter(bool user)
+static noinstr bool rcu_eqs_enter(bool user)
 {
 	struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
 
@@ -636,7 +636,7 @@ static noinstr void rcu_eqs_enter(bool user)
 	if (rdp->dynticks_nesting != 1) {
 		// RCU will still be watching, so just do accounting and leave.
 		rdp->dynticks_nesting--;
-		return;
+		return true;
 	}
 
 	lockdep_assert_irqs_disabled();
@@ -644,7 +644,14 @@ static noinstr void rcu_eqs_enter(bool user)
 	trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
 	rdp = this_cpu_ptr(&rcu_data);
-	do_nocb_deferred_wakeup(rdp);
+	if (do_nocb_deferred_wakeup(rdp)) {
+		/*
+		 * We did the wakeup, don't enter EQS, we'll need to abort idle
+		 * and schedule.
+		 */
+		return false;
+	}
+
 	rcu_prepare_for_idle();
 	rcu_preempt_deferred_qs(current);
 
@@ -657,6 +664,8 @@ static noinstr void rcu_eqs_enter(bool user)
 	rcu_dynticks_eqs_enter();
 	// ... but is no longer watching here.
 	rcu_dynticks_task_enter();
+
+	return true;
 }
 
 /**
@@ -670,10 +679,10 @@ static noinstr void rcu_eqs_enter(bool user)
  * If you add or remove a call to rcu_idle_enter(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-void rcu_idle_enter(void)
+bool rcu_idle_enter(void)
 {
 	lockdep_assert_irqs_disabled();
-	rcu_eqs_enter(false);
+	return rcu_eqs_enter(false);
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
@@ -689,10 +698,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
  * If you add or remove a call to rcu_user_enter(), be sure to test with
  * CONFIG_RCU_EQS_DEBUG=y.
  */
-noinstr void rcu_user_enter(void)
+noinstr bool rcu_user_enter(void)
 {
 	lockdep_assert_irqs_disabled();
-	rcu_eqs_enter(true);
+	return rcu_eqs_enter(true);
 }
 #endif /* CONFIG_NO_HZ_FULL */
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7708ed161f4a..9226f4021a36 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -433,7 +433,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
 static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
 				 unsigned long flags);
 static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
 static void rcu_spawn_cpu_nocb_kthread(int cpu);
 static void __init rcu_spawn_nocb_kthreads(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7e291ce0a1d6..8ca41b3fe4f9 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1631,7 +1631,7 @@ bool rcu_is_nocb_cpu(int cpu)
  * Kick the GP kthread for this NOCB group.  Caller holds ->nocb_lock
  * and this function releases it.
  */
-static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
 			   unsigned long flags)
 	__releases(rdp->nocb_lock)
 {
@@ -1654,8 +1654,11 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
 	}
 	raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
-	if (needwake)
+	if (needwake) {
 		wake_up_process(rdp_gp->nocb_gp_kthread);
+		return true;
+	}
+	return false;
 }
 
 /*
@@ -2155,17 +2158,19 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
 {
 	unsigned long flags;
+	bool ret;
 	int ndw;
 
 	rcu_nocb_lock_irqsave(rdp, flags);
 	if (!rcu_nocb_need_deferred_wakeup(rdp)) {
 		rcu_nocb_unlock_irqrestore(rdp, flags);
-		return;
+		return false;
 	}
 	ndw = READ_ONCE(rdp->nocb_defer_wakeup);
 	WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
-	wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+	ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
 	trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
+	return ret;
 }
 
 /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
@@ -2181,10 +2186,12 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
  * This means we do an inexact common-case check.  Note that if
  * we miss, ->nocb_timer will eventually clean things up.
  */
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
 {
 	if (rcu_nocb_need_deferred_wakeup(rdp))
-		do_nocb_deferred_wakeup_common(rdp);
+		return do_nocb_deferred_wakeup_common(rdp);
+
+	return false;
 }
 
 void __init rcu_init_nohz(void)
@@ -2518,8 +2525,9 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 	return false;
 }
 
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
 {
+	return false
 }
 
 static void rcu_spawn_cpu_nocb_kthread(int cpu)

Powered by blists - more mailing lists