lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170329092511.3958-3-sergey.senozhatsky@gmail.com>
Date:   Wed, 29 Mar 2017 18:25:05 +0900
From:   Sergey Senozhatsky <sergey.senozhatsky@...il.com>
To:     Steven Rostedt <rostedt@...dmis.org>,
        Petr Mladek <pmladek@...e.com>
Cc:     Jan Kara <jack@...e.cz>, Andrew Morton <akpm@...ux-foundation.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Peter Zijlstra <peterz@...radead.org>,
        "Rafael J . Wysocki" <rjw@...ysocki.net>,
        Eric Biederman <ebiederm@...ssion.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Jiri Slaby <jslaby@...e.com>, Pavel Machek <pavel@....cz>,
        Len Brown <len.brown@...el.com>, linux-kernel@...r.kernel.org,
        Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>
Subject: [RFC][PATCHv2 2/8] printk: introduce printing kernel thread

printk() is quite complex internally and, basically, it does two
slightly independent things:
 a) adds a new message to a kernel log buffer (log_store())
 b) prints kernel log messages to serial consoles (console_unlock())

while (a) is guaranteed to be executed by printk(), (b) is not, for a
variety of reasons, and, unlike log_store(), it comes at a price:

 1) console_unlock() attempts to flush all pending kernel log messages
to the console. Thus, it can loop indefinitely.

 2) while console_unlock() is executed on one particular CPU, printing
pending kernel log messages, other CPUs can simultaneously append new
messages to the kernel log buffer.

 3) the time it takes console_unlock() to print kernel messages also
depends on the speed of the console -- which may not be fast at all.

 4) console_unlock() is executed in the same context as printk(), so
it may be non-preemptible/atomic, which makes 1)-3) dangerous.

As a result, nobody knows how long a printk() call will take, so
it's not really safe to call printk() in a number of situations,
including atomic context, RCU critical sections, interrupt context,
and more.

This patch introduces a dedicated printing kernel thread - printk_kthread.
The main purpose of this kthread is to offload printing to a non-atomic
and always scheduleable context, which eliminates 4) and makes 1)-3) less
critical. printk() now just appends log messages to the kernel log buffer
and wake_up()s printk_kthread instead of locking console_sem and calling
into potentially unsafe console_unlock().

This, however, is not always safe on its own. For example, we can't call
into the scheduler from panic(), because this may cause deadlock. That's
why we introduce a concept of printk_emergency() mode, when printk()
switches back to the old behaviour (console_unlock() from vprintk_emit())
in those cases. In general, this switch happens automatically once a EMERG
log level message appears in the log buffer. Another cases when wake_up()
might not work as expected are suspend, hibernate, etc. For those situations
we provide two new functions:
 -- printk_emergency_begin()
    Disables printk offloading. All printk() calls will attempt
    to lock the console_sem and, if successful, flush kernel log
    messages.

 -- printk_emergency_end()
    Enables printk offloading.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@...il.com>
Signed-off-by: Jan Kara <jack@...e.cz>
---
 include/linux/console.h |   3 ++
 kernel/printk/printk.c  | 107 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index 5949d1855589..f1a86944072e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -187,6 +187,9 @@ extern bool console_suspend_enabled;
 extern void suspend_console(void);
 extern void resume_console(void);
 
+extern void printk_emergency_begin(void);
+extern void printk_emergency_end(void);
+
 int mda_console_init(void);
 void prom_con_init(void);
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2d07678e9ff9..ab6b3b2a68c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -48,6 +48,7 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/kthread.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
@@ -402,7 +403,8 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
 	} while (0)
 
 /*
- * Delayed printk version, for scheduler-internal messages:
+ * Used both for deferred printk version (scheduler-internal messages)
+ * and printk_kthread control.
  */
 #define PRINTK_PENDING_WAKEUP	0x01
 #define PRINTK_PENDING_OUTPUT	0x02
@@ -445,6 +447,42 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+static struct task_struct *printk_kthread __read_mostly;
+/*
+ * We can't call into the scheduler (wake_up() printk kthread) during
+ * suspend/kexec/etc. This temporarily switches printk to old behaviour.
+ */
+static atomic_t printk_emergency __read_mostly;
+/*
+ * Disable printk_kthread permanently. Unlike `oops_in_progress'
+ * it doesn't go back to 0.
+ */
+static bool printk_kthread_disabled __read_mostly;
+
+static inline bool printk_kthread_enabled(void)
+{
+	return !printk_kthread_disabled &&
+		printk_kthread && atomic_read(&printk_emergency) == 0;
+}
+
+/*
+ * This disables printing offloading and instead attempts
+ * to do the usual console_trylock()->console_unlock().
+ *
+ * Note, this does not stop the printk_kthread if it's already
+ * printing logbuf messages.
+ */
+void printk_emergency_begin(void)
+{
+	atomic_inc(&printk_emergency);
+}
+
+/* This re-enables printk_kthread offloading. */
+void printk_emergency_end(void)
+{
+	atomic_dec(&printk_emergency);
+}
+
 /* Return log buffer address */
 char *log_buf_addr_get(void)
 {
@@ -1765,17 +1803,40 @@ asmlinkage int vprintk_emit(int facility, int level,
 
 	printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len);
 
+	/*
+	 * Emergency level indicates that the system is unstable and, thus,
+	 * we better stop relying on wake_up(printk_kthread) and try to do
+	 * a direct printing.
+	 */
+	if (level == LOGLEVEL_EMERG)
+		printk_kthread_disabled = true;
+
+	set_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
 	logbuf_unlock_irqrestore(flags);
 
 	/* If called from the scheduler, we can not call up(). */
 	if (!in_sched) {
 		/*
-		 * Try to acquire and then immediately release the console
-		 * semaphore.  The release will print out buffers and wake up
-		 * /dev/kmsg and syslog() users.
+		 * Under heavy printing load/slow serial console/etc
+		 * console_unlock() can stall CPUs, which can result in
+		 * soft/hard-lockups, lost interrupts, RCU stalls, etc.
+		 * Therefore we attempt to print the messages to console
+		 * from a dedicated printk_kthread, which always runs in
+		 * schedulable context.
 		 */
-		if (console_trylock())
-			console_unlock();
+		if (printk_kthread_enabled()) {
+			printk_safe_enter_irqsave(flags);
+			wake_up_process(printk_kthread);
+			printk_safe_exit_irqrestore(flags);
+		} else {
+			/*
+			 * Try to acquire and then immediately release the
+			 * console semaphore. The release will print out
+			 * buffers and wake up /dev/kmsg and syslog() users.
+			 */
+			if (console_trylock())
+				console_unlock();
+		}
 	}
 
 	return printed_len;
@@ -1882,6 +1943,9 @@ static size_t msg_print_text(const struct printk_log *msg,
 			     bool syslog, char *buf, size_t size) { return 0; }
 static bool suppress_message_printing(int level) { return false; }
 
+void printk_emergency_begin(void) {}
+void printk_emergency_end(void) {}
+
 #endif /* CONFIG_PRINTK */
 
 #ifdef CONFIG_EARLY_PRINTK
@@ -2164,6 +2228,13 @@ void console_unlock(void)
 	bool do_cond_resched, retry;
 
 	if (console_suspended) {
+		/*
+		 * Avoid an infinite loop in printk_kthread function
+		 * when console_unlock() cannot flush messages because
+		 * we suspended consoles. Someone else will print the
+		 * messages from resume_console().
+		 */
+		clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
 		up_console_sem();
 		return;
 	}
@@ -2182,6 +2253,7 @@ void console_unlock(void)
 	console_may_schedule = 0;
 
 again:
+	clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
 	/*
 	 * We released the console_sem lock, so we need to recheck if
 	 * cpu is online and (if not) is there at least one CON_ANYTIME
@@ -2664,8 +2736,11 @@ late_initcall(printk_late_init);
 #if defined CONFIG_PRINTK
 static void wake_up_klogd_work_func(struct irq_work *irq_work)
 {
-	if (test_and_clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending)) {
-		/* If trylock fails, someone else is doing the printing */
+	if (test_bit(PRINTK_PENDING_OUTPUT, &printk_pending)) {
+		/*
+		 * If trylock fails, someone else is doing the printing.
+		 * PRINTK_PENDING_OUTPUT bit is cleared by console_unlock().
+		 */
 		if (console_trylock())
 			console_unlock();
 	}
@@ -2679,6 +2754,22 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
 	.flags = IRQ_WORK_LAZY,
 };
 
+static int printk_kthread_func(void *data)
+{
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!test_bit(PRINTK_PENDING_OUTPUT, &printk_pending))
+			schedule();
+
+		__set_current_state(TASK_RUNNING);
+
+		console_lock();
+		console_unlock();
+	}
+
+	return 0;
+}
+
 void wake_up_klogd(void)
 {
 	preempt_disable();
-- 
2.12.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ