[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230112195540.130014793@infradead.org>
Date: Thu, 12 Jan 2023 20:43:27 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: peterz@...radead.org
Cc: richard.henderson@...aro.org, ink@...assic.park.msu.ru,
mattst88@...il.com, vgupta@...nel.org, linux@...linux.org.uk,
nsekhar@...com, brgl@...ev.pl, ulli.kroll@...glemail.com,
linus.walleij@...aro.org, shawnguo@...nel.org,
Sascha Hauer <s.hauer@...gutronix.de>, kernel@...gutronix.de,
festevam@...il.com, linux-imx@....com, tony@...mide.com,
khilman@...nel.org, krzysztof.kozlowski@...aro.org,
alim.akhtar@...sung.com, catalin.marinas@....com, will@...nel.org,
guoren@...nel.org, bcain@...cinc.com, chenhuacai@...nel.org,
kernel@...0n.name, geert@...ux-m68k.org, sammy@...my.net,
monstr@...str.eu, tsbogend@...ha.franken.de, dinguyen@...nel.org,
jonas@...thpole.se, stefan.kristiansson@...nalahti.fi,
shorne@...il.com, James.Bottomley@...senPartnership.com,
deller@....de, mpe@...erman.id.au, npiggin@...il.com,
christophe.leroy@...roup.eu, paul.walmsley@...ive.com,
palmer@...belt.com, aou@...s.berkeley.edu, hca@...ux.ibm.com,
gor@...ux.ibm.com, agordeev@...ux.ibm.com,
borntraeger@...ux.ibm.com, svens@...ux.ibm.com,
ysato@...rs.sourceforge.jp, dalias@...c.org, davem@...emloft.net,
richard@....at, anton.ivanov@...bridgegreys.com,
johannes@...solutions.net, tglx@...utronix.de, mingo@...hat.com,
bp@...en8.de, dave.hansen@...ux.intel.com, x86@...nel.org,
hpa@...or.com, acme@...nel.org, mark.rutland@....com,
alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
namhyung@...nel.org, jgross@...e.com, srivatsa@...il.mit.edu,
amakhalov@...are.com, pv-drivers@...are.com,
boris.ostrovsky@...cle.com, chris@...kel.net, jcmvbkbc@...il.com,
rafael@...nel.org, lenb@...nel.org, pavel@....cz,
gregkh@...uxfoundation.org, mturquette@...libre.com,
sboyd@...nel.org, daniel.lezcano@...aro.org, lpieralisi@...nel.org,
sudeep.holla@....com, agross@...nel.org, andersson@...nel.org,
konrad.dybcio@...aro.org, anup@...infault.org,
thierry.reding@...il.com, jonathanh@...dia.com,
jacob.jun.pan@...ux.intel.com, atishp@...shpatra.org,
Arnd Bergmann <arnd@...db.de>, yury.norov@...il.com,
andriy.shevchenko@...ux.intel.com, linux@...musvillemoes.dk,
dennis@...nel.org, tj@...nel.org, cl@...ux.com,
rostedt@...dmis.org, mhiramat@...nel.org, frederic@...nel.org,
paulmck@...nel.org, pmladek@...e.com, senozhatsky@...omium.org,
john.ogness@...utronix.de, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
bsegall@...gle.com, mgorman@...e.de, bristot@...hat.com,
vschneid@...hat.com, ryabinin.a.a@...il.com, glider@...gle.com,
andreyknvl@...il.com, dvyukov@...gle.com,
vincenzo.frascino@....com,
Andrew Morton <akpm@...ux-foundation.org>, jpoimboe@...nel.org,
linux-alpha@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-snps-arc@...ts.infradead.org, linux-omap@...r.kernel.org,
linux-samsung-soc@...r.kernel.org, linux-csky@...r.kernel.org,
linux-hexagon@...r.kernel.org, linux-ia64@...r.kernel.org,
loongarch@...ts.linux.dev, linux-m68k@...ts.linux-m68k.org,
linux-mips@...r.kernel.org, openrisc@...ts.librecores.org,
linux-parisc@...r.kernel.org, linuxppc-dev@...ts.ozlabs.org,
linux-riscv@...ts.infradead.org, linux-s390@...r.kernel.org,
linux-sh@...r.kernel.org, sparclinux@...r.kernel.org,
linux-um@...ts.infradead.org, linux-perf-users@...r.kernel.org,
virtualization@...ts.linux-foundation.org,
linux-xtensa@...ux-xtensa.org, linux-acpi@...r.kernel.org,
linux-pm@...r.kernel.org, linux-clk@...r.kernel.org,
linux-arm-msm@...r.kernel.org, linux-tegra@...r.kernel.org,
linux-arch@...r.kernel.org, linux-mm@...ck.org,
linux-trace-kernel@...r.kernel.org, kasan-dev@...glegroups.com,
"Rafael J. Wysocki" <rafael.j.wysocki@...el.com>,
Ulf Hansson <ulf.hansson@...aro.org>
Subject: [PATCH v3 13/51] cpuidle: Fix ct_idle_*() usage
The whole disable-RCU, enable-IRQS dance is very intricate since
changing IRQ state is traced, which depends on RCU.
Add two helpers for the cpuidle case that mirror the entry code.
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
Acked-by: Frederic Weisbecker <frederic@...nel.org>
Tested-by: Tony Lindgren <tony@...mide.com>
Tested-by: Ulf Hansson <ulf.hansson@...aro.org>
---
arch/arm/mach-imx/cpuidle-imx6q.c | 4 +--
arch/arm/mach-imx/cpuidle-imx6sx.c | 4 +--
arch/arm/mach-omap2/cpuidle34xx.c | 4 +--
arch/arm/mach-omap2/cpuidle44xx.c | 8 +++---
drivers/acpi/processor_idle.c | 8 ++++--
drivers/cpuidle/cpuidle-big_little.c | 4 +--
drivers/cpuidle/cpuidle-mvebu-v7.c | 4 +--
drivers/cpuidle/cpuidle-psci.c | 4 +--
drivers/cpuidle/cpuidle-riscv-sbi.c | 4 +--
drivers/cpuidle/cpuidle-tegra.c | 8 +++---
drivers/cpuidle/cpuidle.c | 11 ++++----
include/linux/clockchips.h | 4 +--
include/linux/cpuidle.h | 34 ++++++++++++++++++++++++--
kernel/sched/idle.c | 45 ++++++++++-------------------------
kernel/time/tick-broadcast.c | 6 +++-
15 files changed, 86 insertions(+), 66 deletions(-)
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -25,9 +25,9 @@ static int imx6q_enter_wait(struct cpuid
imx6_set_lpm(WAIT_UNCLOCKED);
raw_spin_unlock(&cpuidle_lock);
- ct_idle_enter();
+ ct_cpuidle_enter();
cpu_do_idle();
- ct_idle_exit();
+ ct_cpuidle_exit();
raw_spin_lock(&cpuidle_lock);
if (num_idle_cpus-- == num_online_cpus())
--- a/arch/arm/mach-imx/cpuidle-imx6sx.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sx.c
@@ -47,9 +47,9 @@ static int imx6sx_enter_wait(struct cpui
cpu_pm_enter();
cpu_cluster_pm_enter();
- ct_idle_enter();
+ ct_cpuidle_enter();
cpu_suspend(0, imx6sx_idle_finish);
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_cluster_pm_exit();
cpu_pm_exit();
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -133,9 +133,9 @@ static int omap3_enter_idle(struct cpuid
}
/* Execute ARM wfi */
- ct_idle_enter();
+ ct_cpuidle_enter();
omap_sram_idle();
- ct_idle_exit();
+ ct_cpuidle_exit();
/*
* Call idle CPU PM enter notifier chain to restore
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -105,9 +105,9 @@ static int omap_enter_idle_smp(struct cp
}
raw_spin_unlock_irqrestore(&mpu_lock, flag);
- ct_idle_enter();
+ ct_cpuidle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
- ct_idle_exit();
+ ct_cpuidle_exit();
raw_spin_lock_irqsave(&mpu_lock, flag);
if (cx->mpu_state_vote == num_online_cpus())
@@ -186,10 +186,10 @@ static int omap_enter_idle_coupled(struc
}
}
- ct_idle_enter();
+ ct_cpuidle_enter();
omap4_enter_lowpower(dev->cpu, cx->cpu_state);
cpu_done[dev->cpu] = true;
- ct_idle_exit();
+ ct_cpuidle_exit();
/* Wakeup CPU1 only if it is not offlined */
if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) {
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -642,6 +642,8 @@ static int __cpuidle acpi_idle_enter_bm(
*/
bool dis_bm = pr->flags.bm_control;
+ instrumentation_begin();
+
/* If we can skip BM, demote to a safe state. */
if (!cx->bm_sts_skip && acpi_idle_bm_check()) {
dis_bm = false;
@@ -663,11 +665,11 @@ static int __cpuidle acpi_idle_enter_bm(
raw_spin_unlock(&c3_lock);
}
- ct_idle_enter();
+ ct_cpuidle_enter();
acpi_idle_do_entry(cx);
- ct_idle_exit();
+ ct_cpuidle_exit();
/* Re-enable bus master arbitration */
if (dis_bm) {
@@ -677,6 +679,8 @@ static int __cpuidle acpi_idle_enter_bm(
raw_spin_unlock(&c3_lock);
}
+ instrumentation_end();
+
return index;
}
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -126,13 +126,13 @@ static int bl_enter_powerdown(struct cpu
struct cpuidle_driver *drv, int idx)
{
cpu_pm_enter();
- ct_idle_enter();
+ ct_cpuidle_enter();
cpu_suspend(0, bl_powerdown_finisher);
/* signals the MCPM core that CPU is out of low power state */
mcpm_cpu_powered_up();
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_pm_exit();
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -36,9 +36,9 @@ static int mvebu_v7_enter_idle(struct cp
if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
deepidle = true;
- ct_idle_enter();
+ ct_cpuidle_enter();
ret = mvebu_v7_cpu_suspend(deepidle);
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_pm_exit();
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -74,7 +74,7 @@ static int __psci_enter_domain_idle_stat
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_idle_enter();
+ ct_cpuidle_enter();
state = psci_get_domain_state();
if (!state)
@@ -82,7 +82,7 @@ static int __psci_enter_domain_idle_stat
ret = psci_cpu_suspend_enter(state) ? -1 : idx;
- ct_idle_exit();
+ ct_cpuidle_exit();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
--- a/drivers/cpuidle/cpuidle-riscv-sbi.c
+++ b/drivers/cpuidle/cpuidle-riscv-sbi.c
@@ -126,7 +126,7 @@ static int __sbi_enter_domain_idle_state
else
pm_runtime_put_sync_suspend(pd_dev);
- ct_idle_enter();
+ ct_cpuidle_enter();
if (sbi_is_domain_state_available())
state = sbi_get_domain_state();
@@ -135,7 +135,7 @@ static int __sbi_enter_domain_idle_state
ret = sbi_suspend(state) ? -1 : idx;
- ct_idle_exit();
+ ct_cpuidle_exit();
if (s2idle)
dev_pm_genpd_resume(pd_dev);
--- a/drivers/cpuidle/cpuidle-tegra.c
+++ b/drivers/cpuidle/cpuidle-tegra.c
@@ -183,7 +183,7 @@ static int tegra_cpuidle_state_enter(str
tegra_pm_set_cpu_in_lp2();
cpu_pm_enter();
- ct_idle_enter();
+ ct_cpuidle_enter();
switch (index) {
case TEGRA_C7:
@@ -199,7 +199,7 @@ static int tegra_cpuidle_state_enter(str
break;
}
- ct_idle_exit();
+ ct_cpuidle_exit();
cpu_pm_exit();
tegra_pm_clear_cpu_in_lp2();
@@ -240,10 +240,10 @@ static int tegra_cpuidle_enter(struct cp
if (index == TEGRA_C1) {
if (do_rcu)
- ct_idle_enter();
+ ct_cpuidle_enter();
ret = arm_cpuidle_simple_enter(dev, drv, index);
if (do_rcu)
- ct_idle_exit();
+ ct_cpuidle_exit();
} else
ret = tegra_cpuidle_state_enter(dev, index, cpu);
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
#include <linux/notifier.h>
#include <linux/pm_qos.h>
#include <linux/cpu.h>
@@ -152,12 +153,12 @@ static void enter_s2idle_proper(struct c
*/
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ ct_cpuidle_enter();
target_state->enter_s2idle(dev, drv, index);
if (WARN_ON_ONCE(!irqs_disabled()))
- local_irq_disable();
+ raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+ ct_cpuidle_exit();
tick_unfreeze();
start_critical_timings();
@@ -235,14 +236,14 @@ int cpuidle_enter_state(struct cpuidle_d
stop_critical_timings();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_enter();
+ ct_cpuidle_enter();
entered_state = target_state->enter(dev, drv, index);
if (WARN_ONCE(!irqs_disabled(), "%ps leaked IRQ state", target_state->enter))
raw_local_irq_disable();
if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
- ct_idle_exit();
+ ct_cpuidle_exit();
start_critical_timings();
sched_clock_idle_wakeup_event();
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -211,7 +211,7 @@ extern int tick_receive_broadcast(void);
extern void tick_setup_hrtimer_broadcast(void);
extern int tick_check_broadcast_expired(void);
# else
-static inline int tick_check_broadcast_expired(void) { return 0; }
+static __always_inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) { }
# endif
@@ -219,7 +219,7 @@ static inline void tick_setup_hrtimer_br
static inline void clockevents_suspend(void) { }
static inline void clockevents_resume(void) { }
-static inline int tick_check_broadcast_expired(void) { return 0; }
+static __always_inline int tick_check_broadcast_expired(void) { return 0; }
static inline void tick_setup_hrtimer_broadcast(void) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -14,6 +14,7 @@
#include <linux/percpu.h>
#include <linux/list.h>
#include <linux/hrtimer.h>
+#include <linux/context_tracking.h>
#define CPUIDLE_STATE_MAX 10
#define CPUIDLE_NAME_LEN 16
@@ -115,6 +116,35 @@ struct cpuidle_device {
DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
+static __always_inline void ct_cpuidle_enter(void)
+{
+ lockdep_assert_irqs_disabled();
+ /*
+ * Idle is allowed to (temporary) enable IRQs. It
+ * will return with IRQs disabled.
+ *
+ * Trace IRQs enable here, then switch off RCU, and have
+ * arch_cpu_idle() use raw_local_irq_enable(). Note that
+ * ct_idle_enter() relies on lockdep IRQ state, so switch that
+ * last -- this is very similar to the entry code.
+ */
+ trace_hardirqs_on_prepare();
+ lockdep_hardirqs_on_prepare();
+ instrumentation_end();
+ ct_idle_enter();
+ lockdep_hardirqs_on(_RET_IP_);
+}
+
+static __always_inline void ct_cpuidle_exit(void)
+{
+ /*
+ * Carefully undo the above.
+ */
+ lockdep_hardirqs_off(_RET_IP_);
+ ct_idle_exit();
+ instrumentation_begin();
+}
+
/****************************
* CPUIDLE DRIVER INTERFACE *
****************************/
@@ -289,9 +319,9 @@ extern s64 cpuidle_governor_latency_req(
if (!is_retention) \
__ret = cpu_pm_enter(); \
if (!__ret) { \
- ct_idle_enter(); \
+ ct_cpuidle_enter(); \
__ret = low_level_idle_enter(state); \
- ct_idle_exit(); \
+ ct_cpuidle_exit(); \
if (!is_retention) \
cpu_pm_exit(); \
} \
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -51,18 +51,22 @@ __setup("hlt", cpu_idle_nopoll_setup);
static noinline int __cpuidle cpu_idle_poll(void)
{
+ instrumentation_begin();
trace_cpu_idle(0, smp_processor_id());
stop_critical_timings();
- ct_idle_enter();
- local_irq_enable();
+ ct_cpuidle_enter();
+ raw_local_irq_enable();
while (!tif_need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
+ raw_local_irq_disable();
- ct_idle_exit();
+ ct_cpuidle_exit();
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
+ local_irq_enable();
+ instrumentation_end();
return 1;
}
@@ -85,44 +89,21 @@ void __weak arch_cpu_idle(void)
*/
void __cpuidle default_idle_call(void)
{
- if (current_clr_polling_and_test()) {
- local_irq_enable();
- } else {
-
+ instrumentation_begin();
+ if (!current_clr_polling_and_test()) {
trace_cpu_idle(1, smp_processor_id());
stop_critical_timings();
- /*
- * arch_cpu_idle() is supposed to enable IRQs, however
- * we can't do that because of RCU and tracing.
- *
- * Trace IRQs enable here, then switch off RCU, and have
- * arch_cpu_idle() use raw_local_irq_enable(). Note that
- * ct_idle_enter() relies on lockdep IRQ state, so switch that
- * last -- this is very similar to the entry code.
- */
- trace_hardirqs_on_prepare();
- lockdep_hardirqs_on_prepare();
- ct_idle_enter();
- lockdep_hardirqs_on(_THIS_IP_);
-
+ ct_cpuidle_enter();
arch_cpu_idle();
-
- /*
- * OK, so IRQs are enabled here, but RCU needs them disabled to
- * turn itself back on.. funny thing is that disabling IRQs
- * will cause tracing, which needs RCU. Jump through hoops to
- * make it 'work'.
- */
raw_local_irq_disable();
- lockdep_hardirqs_off(_THIS_IP_);
- ct_idle_exit();
- lockdep_hardirqs_on(_THIS_IP_);
- raw_local_irq_enable();
+ ct_cpuidle_exit();
start_critical_timings();
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
}
+ local_irq_enable();
+ instrumentation_end();
}
static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -622,9 +622,13 @@ struct cpumask *tick_get_broadcast_onesh
* to avoid a deep idle transition as we are about to get the
* broadcast IPI right away.
*/
-int tick_check_broadcast_expired(void)
+noinstr int tick_check_broadcast_expired(void)
{
+#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
+ return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask));
+#else
return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
+#endif
}
/*
Powered by blists - more mailing lists