[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210105095503.GF3040@hirez.programming.kicks-ass.net>
Date: Tue, 5 Jan 2021 10:55:03 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Frederic Weisbecker <frederic@...nel.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
"Rafael J . Wysocki" <rafael.j.wysocki@...el.com>,
Ingo Molnar <mingo@...nel.org>,
Fabio Estevam <festevam@...il.com>, stable@...r.kernel.org,
Thomas Gleixner <tglx@...utronix.de>,
"Paul E . McKenney" <paulmck@...nel.org>,
Len Brown <lenb@...nel.org>,
Pengutronix Kernel Team <kernel@...gutronix.de>,
NXP Linux Team <linux-imx@....com>,
Daniel Lezcano <daniel.lezcano@...aro.org>,
Shawn Guo <shawnguo@...nel.org>,
Sascha Hauer <s.hauer@...gutronix.de>
Subject: Re: [PATCH 1/4] sched/idle: Fix missing need_resched() check after
rcu_idle_enter()
On Mon, Jan 04, 2021 at 04:20:55PM +0100, Frederic Weisbecker wrote:
> Entering RCU idle mode may cause a deferred wake up of an RCU NOCB_GP
> kthread (rcuog) to be serviced.
>
> Usually a wake up happening while running the idle task is spotted in
> one of the need_resched() checks carefully placed within the idle loop
> that can break to the scheduler.
Urgh, this is horrific and fragile :/ You having had to audit and fix a
number of rcu_idle_enter() callers should've made you realize that
making rcu_idle_enter() return something would've been saner.
Also, I might hope that when RCU does do that wakeup, it will not have
put RCU in idle mode? So it is a natural 'fail' state for
rcu_idle_enter(), *sigh* it continues to put RCU to sleep, so that needs
fixing too.
I'm thinking that rcu_user_enter() will have the exact same problem? Did
you audit that?
Something like the below, combined with a fixup for all callers (which
the compiler will help us find thanks to __must_check).
---
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index de0826411311..612f66c16078 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -95,10 +95,10 @@ static inline void rcu_sysrq_end(void) { }
#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
#ifdef CONFIG_NO_HZ_FULL
-void rcu_user_enter(void);
+bool __must_check rcu_user_enter(void);
void rcu_user_exit(void);
#else
-static inline void rcu_user_enter(void) { }
+static inline bool __must_check rcu_user_enter(void) { return true; }
static inline void rcu_user_exit(void) { }
#endif /* CONFIG_NO_HZ_FULL */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index df578b73960f..9ba0c5d9e99e 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -43,7 +43,7 @@ bool rcu_gp_might_be_stalled(void);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);
-void rcu_idle_enter(void);
+bool __must_check rcu_idle_enter(void);
void rcu_idle_exit(void);
void rcu_irq_enter(void);
void rcu_irq_exit(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 40e5e3dd253e..13e19e5db0b8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -625,7 +625,7 @@ EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
-static noinstr void rcu_eqs_enter(bool user)
+static noinstr bool rcu_eqs_enter(bool user)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
@@ -636,7 +636,7 @@ static noinstr void rcu_eqs_enter(bool user)
if (rdp->dynticks_nesting != 1) {
// RCU will still be watching, so just do accounting and leave.
rdp->dynticks_nesting--;
- return;
+ return true;
}
lockdep_assert_irqs_disabled();
@@ -644,7 +644,14 @@ static noinstr void rcu_eqs_enter(bool user)
trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rdp = this_cpu_ptr(&rcu_data);
- do_nocb_deferred_wakeup(rdp);
+ if (do_nocb_deferred_wakeup(rdp)) {
+ /*
+ * We did the wakeup, don't enter EQS, we'll need to abort idle
+ * and schedule.
+ */
+ return false;
+ }
+
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
@@ -657,6 +664,8 @@ static noinstr void rcu_eqs_enter(bool user)
rcu_dynticks_eqs_enter();
// ... but is no longer watching here.
rcu_dynticks_task_enter();
+
+ return true;
}
/**
@@ -670,10 +679,10 @@ static noinstr void rcu_eqs_enter(bool user)
* If you add or remove a call to rcu_idle_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-void rcu_idle_enter(void)
+bool rcu_idle_enter(void)
{
lockdep_assert_irqs_disabled();
- rcu_eqs_enter(false);
+ return rcu_eqs_enter(false);
}
EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -689,10 +698,10 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
* If you add or remove a call to rcu_user_enter(), be sure to test with
* CONFIG_RCU_EQS_DEBUG=y.
*/
-noinstr void rcu_user_enter(void)
+noinstr bool rcu_user_enter(void)
{
lockdep_assert_irqs_disabled();
- rcu_eqs_enter(true);
+ return rcu_eqs_enter(true);
}
#endif /* CONFIG_NO_HZ_FULL */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 7708ed161f4a..9226f4021a36 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -433,7 +433,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp,
static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty,
unsigned long flags);
static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp);
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
static void rcu_spawn_cpu_nocb_kthread(int cpu);
static void __init rcu_spawn_nocb_kthreads(void);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7e291ce0a1d6..8ca41b3fe4f9 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1631,7 +1631,7 @@ bool rcu_is_nocb_cpu(int cpu)
* Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock
* and this function releases it.
*/
-static void wake_nocb_gp(struct rcu_data *rdp, bool force,
+static bool wake_nocb_gp(struct rcu_data *rdp, bool force,
unsigned long flags)
__releases(rdp->nocb_lock)
{
@@ -1654,8 +1654,11 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force,
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DoWake"));
}
raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags);
- if (needwake)
+ if (needwake) {
wake_up_process(rdp_gp->nocb_gp_kthread);
+ return true;
+ }
+ return false;
}
/*
@@ -2155,17 +2158,19 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
{
unsigned long flags;
+ bool ret;
int ndw;
rcu_nocb_lock_irqsave(rdp, flags);
if (!rcu_nocb_need_deferred_wakeup(rdp)) {
rcu_nocb_unlock_irqrestore(rdp, flags);
- return;
+ return false;
}
ndw = READ_ONCE(rdp->nocb_defer_wakeup);
WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
- wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
+ ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
+ return ret;
}
/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
@@ -2181,10 +2186,12 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t)
* This means we do an inexact common-case check. Note that if
* we miss, ->nocb_timer will eventually clean things up.
*/
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
{
if (rcu_nocb_need_deferred_wakeup(rdp))
- do_nocb_deferred_wakeup_common(rdp);
+ return do_nocb_deferred_wakeup_common(rdp);
+
+ return false;
}
void __init rcu_init_nohz(void)
@@ -2518,8 +2525,9 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
return false;
}
-static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
+static bool do_nocb_deferred_wakeup(struct rcu_data *rdp)
{
+ return false
}
static void rcu_spawn_cpu_nocb_kthread(int cpu)
Powered by blists - more mailing lists