>From 5bba47bcb62b52a0a1eb78fbea4ac25a5a2852bf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Oct 2015 10:51:42 +0200 Subject: [PATCH] panic: release stale console lock to always get the logbuf printed out In some cases we may end up killing the CPU holding the console lock while still having valuable data in logbuf. E.g. Vitaly is observing the following: - A crash is happening on one CPU and console_unlock() is being called on some other. - console_unlock() tries to print out the buffer before releasing the lock and on slow console it takes time. - in the meanwhile crashing CPU does lots of printk()-s with valuable data (which go to the logbuf) and sends IPIs to all other CPUs. - console_unlock() finishes printing previous chunk and enables interrupts before trying to print out the rest, the CPU catches the IPI and never releases console lock. This is not the only possible case: in VT/fb subsystems we have many other console_lock()/console_unlock() users. Non-masked interrupts (or receiving NMI in case of extreme slowness) will have the same result. Getting the whole console buffer printed out on crash should be top priority. Base on original patch by Vitaly Kuznetsov. Reported-by: Vitaly Kuznetsov Signed-off-by: Jan Kara --- include/linux/console.h | 4 ++-- kernel/panic.c | 8 ++++++++ kernel/printk/printk.c | 5 ++++- kernel/stop_machine.c | 2 +- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/console.h b/include/linux/console.h index 96da462cdfeb..f40084802f3f 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -151,13 +151,13 @@ extern void console_unlock(void); extern void console_conditional_schedule(void); extern void console_unblank(void); #ifdef CONFIG_SMP -extern void printk_log_buf_drain(void); +extern void printk_log_buf_drain(bool panic); #else /* * In non-SMP kernels there won't be much to drain so save some code for tiny * kernels. */ -static inline void printk_log_buf_drain(void) +static inline void printk_log_buf_drain(bool panic) { } #endif diff --git a/kernel/panic.c b/kernel/panic.c index 04e91ff7560b..d07ed830a9fb 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -23,6 +23,7 @@ #include #include #include +#include #define PANIC_TIMER_STEP 100 #define PANIC_BLINK_SPD 18 @@ -147,6 +148,13 @@ void panic(const char *fmt, ...) bust_spinlocks(0); + /* + * We may have ended up stopping the CPU doing printing (in + * smp_send_stop()) while still having some valuable data in the + * console buffer. Flush it out. + */ + printk_log_buf_drain(true); + if (!panic_blink) panic_blink = no_blink; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8e125e98f523..62be2890e6a0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2401,11 +2401,14 @@ struct tty_driver *console_device(int *index) * console. Note that as soon as this function returns, new messages may be * added to the printk buffer by other CPUs. */ -void printk_log_buf_drain(void) +void printk_log_buf_drain(bool panic) { bool retry; unsigned long flags; + if (panic) + zap_locks(); + while (1) { raw_spin_lock_irqsave(&logbuf_lock, flags); retry = console_seq != log_next_seq; diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index e9496b4a3825..50a03735893e 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -550,7 +550,7 @@ static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cp * finish thus triggering NMI watchdog, RCU lockups etc. Wait for the * printing here to avoid these. */ - printk_log_buf_drain(); + printk_log_buf_drain(false); /* Set the initial state and stop all online cpus. */ set_state(&msdata, MULTI_STOP_PREPARE); -- 2.1.4