[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org>
Date: Thu, 26 Sep 2024 16:17:37 +0100
From: David Woodhouse <dwmw2@...radead.org>
To: Thomas Gleixner <tglx@...utronix.de>, Peter Zijlstra
<peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, Will Deacon
<will@...nel.org>, Waiman Long <longman@...hat.com>, Boqun Feng
<boqun.feng@...il.com>, Kent Overstreet <kent.overstreet@...ux.dev>, Arnd
Bergmann <arnd@...db.de>, Sebastian Andrzej Siewior
<bigeasy@...utronix.de>, David Woodhouse <dwmw@...zon.co.uk>,
linux-kernel@...r.kernel.org
Subject: [PATCH v2] lockdep: add lockdep_cleanup_dead_cpu()
From: David Woodhouse <dwmw@...zon.co.uk>
Add a function to check that an offline CPU has left the tracing
infrastructure in a sane state.
Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in
acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead()
function called safe_halt() instead of raw_safe_halt(), which had the
side-effect of setting the hardirqs_enabled flag for the offline CPU.
On x86 this triggered warnings from lockdep_assert_irqs_disabled() when
the CPU was brought back online again later. These warnings were too
early for the exception to be handled correctly.
So lockdep turned a perfectly harmless bug into a system crash with a
triple-fault.
Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode,
print the events leading up to it, and correct it so that the CPU can
come online again correctly. Re-introducing the original bug now merely
results in this warning instead:
[ 61.556652] smpboot: CPU 1 is now offline
[ 61.556769] CPU 1 left hardirqs enabled!
[ 61.556915] irq event stamp: 128149
[ 61.556965] hardirqs last enabled at (128149): [<ffffffff81720a36>] acpi_idle_play_dead+0x46/0x70
[ 61.557055] hardirqs last disabled at (128148): [<ffffffff81124d50>] do_idle+0x90/0xe0
[ 61.557117] softirqs last enabled at (128078): [<ffffffff81cec74c>] __do_softirq+0x31c/0x423
[ 61.557199] softirqs last disabled at (128065): [<ffffffff810baae1>] __irq_exit_rcu+0x91/0x100
Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
Reviewed-by: Thomas Gleixner <tglx@...utronix.de>
---
v2: Update commit message to reflect the fact that the original bug
fix is now merged as commit 9bb69ba4c177.
include/linux/irqflags.h | 6 ++++++
kernel/cpu.c | 1 +
kernel/locking/lockdep.c | 24 ++++++++++++++++++++++++
3 files changed, 31 insertions(+)
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 3f003d5fde53..57b074e0cfbb 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -18,6 +18,8 @@
#include <asm/irqflags.h>
#include <asm/percpu.h>
+struct task_struct;
+
/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
extern void lockdep_softirqs_on(unsigned long ip);
@@ -25,12 +27,16 @@
extern void lockdep_hardirqs_on_prepare(void);
extern void lockdep_hardirqs_on(unsigned long ip);
extern void lockdep_hardirqs_off(unsigned long ip);
+ extern void lockdep_cleanup_dead_cpu(unsigned int cpu,
+ struct task_struct *idle);
#else
static inline void lockdep_softirqs_on(unsigned long ip) { }
static inline void lockdep_softirqs_off(unsigned long ip) { }
static inline void lockdep_hardirqs_on_prepare(void) { }
static inline void lockdep_hardirqs_on(unsigned long ip) { }
static inline void lockdep_hardirqs_off(unsigned long ip) { }
+ static inline void lockdep_cleanup_dead_cpu(unsigned int cpu,
+ struct task_struct *idle) {}
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/kernel/cpu.c b/kernel/cpu.c
index d293d52a3e00..c4aaf73dec9e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1338,6 +1338,7 @@ static int takedown_cpu(unsigned int cpu)
cpuhp_bp_sync_dead(cpu);
+ lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu));
tick_cleanup_dead_cpu(cpu);
/*
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 7963deac33c3..42b07c3b8862 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4583,6 +4583,30 @@ void lockdep_softirqs_off(unsigned long ip)
debug_atomic_inc(redundant_softirqs_off);
}
+/**
+ * lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped
+ *
+ * @cpu: index of offlined CPU
+ * @idle: task pointer for offlined CPU's idle thread
+ *
+ * Invoked after the CPU is dead. Ensures that the tracing infrastructure
+ * is left in a suitable state for the CPU to be subsequently brought
+ * online again.
+ */
+void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle)
+{
+ if (unlikely(!debug_locks))
+ return;
+
+ if (unlikely(per_cpu(hardirqs_enabled, cpu))) {
+ pr_warn("CPU %u left hardirqs enabled!", cpu);
+ if (idle)
+ print_irqtrace_events(idle);
+ /* Clean it up for when the CPU comes online again. */
+ per_cpu(hardirqs_enabled, cpu) = 0;
+ }
+}
+
static int
mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
{
--
2.44.0
Download attachment "smime.p7s" of type "application/pkcs7-signature" (5965 bytes)
Powered by blists - more mailing lists