linux-kernel - Re: [RFC][PATCH v4 1/2] printk: Make printk() completely async

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160315140738.GA773@swordfish>
Date:	Tue, 15 Mar 2016 23:07:38 +0900
From:	Sergey Senozhatsky <sergey.senozhatsky@...il.com>
To:	Jan Kara <jack@...e.cz>
Cc:	Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Jan Kara <jack@...e.com>, Petr Mladek <pmladek@...e.com>,
	Tejun Heo <tj@...nel.org>,
	Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>,
	linux-kernel@...r.kernel.org,
	Byungchul Park <byungchul.park@....com>,
	Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>
Subject: Re: [RFC][PATCH v4 1/2] printk: Make printk() completely async

On (03/15/16 11:03), Jan Kara wrote:
[..]
> > +	if (!sync_print) {
> > +		if (printk_thread && !in_panic) {
> > +			/*
> > +			 * This will wakeup the printing kthread and offload
> > +			 * printing to a schedulable context.
> > +			 */
> > +			__this_cpu_or(printk_pending,
> > +					PRINTK_PENDING_KTHREAD_OUTPUT);
> > +			irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > +		} else if (in_sched) {
> > +			/*
> > +			 * @in_sched messages may come too early, when we don't
> > +			 * yet have @printk_thread. We can't print deferred
> > +			 * messages directly, because this may deadlock, route
> > +			 * them via IRQ context.
> > +			 */
> > +			__this_cpu_or(printk_pending,
> > +					PRINTK_PENDING_IRQ_OUTPUT);
> > +			irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
> > +		} else {
> > +			sync_print = true;
> > +		}
> > +	}
> 
> I'm a bit undecided whether we want to go through irq work even for the
> common case of !in_sched messages or whether we want to directly call
> wake_up() in that case. Maybe I'd do it like:

thanks for review. I like the change that you have suggested. just
updated wake_up_klogd_work_func() check with '!printk_sync && printk_thread',
user can change `printk_sync' via sysfs.


what I wanted to do with this "irq indirection" is to move wake_up() out of
vprintk_emit(), but I didn't think good enough, I guess.

so, __wake_up() takes the spin_lock_irqsave(&q->lock, flags), which with
spin_debug enabled, can recursively call printk(). since we wake_up()
the printing thread after we release logbuf_lock and UINT_MAX logbuf_cpu,
we have no chance to survive this recursion.

/* theretical possibilities of having printk() in __wake_up_common() or down
 * the call chain are not real concerns. */

so I guess what I want to do now -- is to move part of this wake_up()
decision making under the logbuf_lock and logbuf_cpu area. so recursion
detection code will be able to catch this. and we also need to switch
back to sync printk once the recursion happened; just to shut up the
faulty wake_up() in case if it was the root cause.


something like this (*minimally tested so far*).

-- move wake_up() and friends under the logbuf section; so we can detect
   printk() recursion from wake_up()

-- in recursion_bug branch switch to sync printk. we don't know why did we
   recurse, may be because of wake_up()->spin_lock(). doing
   kthread_stop()->wake_up_process() can be unsafe, I guess, just set
   `printk_sync' to true.


Signed-off-by: Jan Kara <jack@...e.cz>
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@...il.com>
---
 Documentation/kernel-parameters.txt |  10 ++
 kernel/printk/printk.c              | 202 +++++++++++++++++++++++++-----------
 2 files changed, 154 insertions(+), 58 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1e58ae9..454999e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3114,6 +3114,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	printk.time=	Show timing data prefixed to each printk message line
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
+	printk.synchronous=
+			By default kernel messages are printed to console
+			asynchronously (except during early boot or when oops
+			is happening). That avoids kernel stalling behind slow
+			serial console and thus avoids softlockups, interrupt
+			timeouts, or userspace timing out during heavy printing.
+			However for debugging problems, printing messages to
+			console immediately may be desirable. This option
+			enables such behavior.
+
 	processor.max_cstate=	[HW,ACPI]
 			Limit processor to maximum C-state
 			max_cstate=9 overrides any DMI blacklist limit.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index d5fd844..38baed1 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,6 +46,7 @@
 #include <linux/utsname.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 #include <asm-generic/sections.h>
@@ -284,6 +285,105 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+/*
+ * When true, printing to console will happen synchronously unless someone else
+ * is already printing messages.
+ *
+ * The default value on UP systems is 'true'.
+ */
+static bool __read_mostly printk_sync = !IS_ENABLED(CONFIG_SMP);
+module_param_named(synchronous, printk_sync, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(synchronous, "make printing to console synchronous");
+
+/* Printing kthread for async vprintk_emit() */
+static struct task_struct *printk_thread;
+/* Wait for printing wakeups from async vprintk_emit() */
+static DECLARE_WAIT_QUEUE_HEAD(printing_wait);
+
+static int printing_func(void *data)
+{
+	while (1) {
+		DECLARE_WAITQUEUE(wait, current);
+		set_current_state(TASK_INTERRUPTIBLE);
+		add_wait_queue(&printing_wait, &wait);
+
+		schedule();
+		remove_wait_queue(&printing_wait, &wait);
+
+		console_lock();
+		console_unlock();
+	}
+
+	return 0;
+}
+
+static int __init init_printk_thread(void)
+{
+	if (printk_sync)
+		return 0;
+
+	printk_thread = kthread_run(printing_func, NULL, "printk");
+	BUG_ON(IS_ERR(printk_thread));
+	return 0;
+}
+late_initcall(init_printk_thread);
+
+/*
+ * Delayed printk version, for scheduler-internal messages:
+ */
+#define PRINTK_PENDING_WAKEUP	(1<<0)
+#define PRINTK_PENDING_OUTPUT	(1<<1)
+
+static DEFINE_PER_CPU(int, printk_pending);
+
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
+{
+	int pending = __this_cpu_xchg(printk_pending, 0);
+
+	if (pending & PRINTK_PENDING_OUTPUT) {
+		if (!printk_sync && printk_thread) {
+			wake_up(&printing_wait);
+		} else {
+			/*
+			 * If trylock fails, someone else is doing
+			 * the printing
+			 */
+			if (console_trylock())
+				console_unlock();
+		}
+	}
+
+	if (pending & PRINTK_PENDING_WAKEUP)
+		wake_up_interruptible(&log_wait);
+}
+
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+	.func = wake_up_klogd_work_func,
+	.flags = IRQ_WORK_LAZY,
+};
+
+void wake_up_klogd(void)
+{
+	preempt_disable();
+	if (waitqueue_active(&log_wait)) {
+		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+	}
+	preempt_enable();
+}
+
+int printk_deferred(const char *fmt, ...)
+{
+	va_list args;
+	int r;
+
+	va_start(args, fmt);
+	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
+	va_end(args);
+
+	return r;
+}
+
 /* Return log buffer address */
 char *log_buf_addr_get(void)
 {
@@ -1609,6 +1709,8 @@ asmlinkage int vprintk_emit(int facility, int level,
 			    const char *dict, size_t dictlen,
 			    const char *fmt, va_list args)
 {
+	/* cpu currently holding logbuf_lock in this function */
+	static unsigned int logbuf_cpu = UINT_MAX;
 	static bool recursion_bug;
 	static char textbuf[LOG_LINE_MAX];
 	char *text = textbuf;
@@ -1618,12 +1720,17 @@ asmlinkage int vprintk_emit(int facility, int level,
 	int this_cpu;
 	int printed_len = 0;
 	bool in_sched = false;
-	/* cpu currently holding logbuf_lock in this function */
-	static unsigned int logbuf_cpu = UINT_MAX;
+	bool in_panic = console_loglevel == CONSOLE_LOGLEVEL_MOTORMOUTH;
+	bool sync_print = printk_sync;
 
 	if (level == LOGLEVEL_SCHED) {
 		level = LOGLEVEL_DEFAULT;
+		/*
+		 * Deferred sched messages must not be printed
+		 * synchronously regardless the @printk_sync or @in_panic.
+		 */
 		in_sched = true;
+		sync_print = false;
 	}
 
 	boot_delay_msec(level);
@@ -1660,6 +1767,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 		static const char recursion_msg[] =
 			"BUG: recent printk recursion!";
 
+		printk_sync = true;
 		recursion_bug = false;
 		/* emit KERN_CRIT message */
 		printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
@@ -1749,13 +1857,45 @@ asmlinkage int vprintk_emit(int facility, int level,
 						 dict, dictlen, text, text_len);
 	}
 
+	/*
+	 * By default we print message to console asynchronously so that kernel
+	 * doesn't get stalled due to slow serial console. That can lead to
+	 * softlockups, lost interrupts, or userspace timing out under heavy
+	 * printing load.
+	 *
+	 * However we resort to synchronous printing of messages during early
+	 * boot, when synchronous printing was explicitly requested by
+	 * kernel parameter, or when console_verbose() was called to print
+	 * everything during panic / oops.
+	 */
+	if (!sync_print) {
+		if (in_sched) {
+			/*
+			 * @in_sched messages may come too early, when we don't
+			 * yet have @printk_thread. We can't print deferred
+			 * messages directly, because this may deadlock, route
+			 * them via IRQ context.
+			 */
+			__this_cpu_or(printk_pending,
+					PRINTK_PENDING_OUTPUT);
+			irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+		} else if (printk_thread && !in_panic) {
+			/*
+			 * This will wakeup the printing kthread and offload
+			 * printing to a schedulable context.
+			 */
+			wake_up(&printing_wait);
+		} else {
+			sync_print = true;
+		}
+	}
+
 	logbuf_cpu = UINT_MAX;
 	raw_spin_unlock(&logbuf_lock);
 	lockdep_on();
 	local_irq_restore(flags);
 
-	/* If called from the scheduler, we can not call up(). */
-	if (!in_sched) {
+	if (sync_print) {
 		lockdep_off();
 		/*
 		 * Try to acquire and then immediately release the console
@@ -2724,60 +2864,6 @@ late_initcall(printk_late_init);
 
 #if defined CONFIG_PRINTK
 /*
- * Delayed printk version, for scheduler-internal messages:
- */
-#define PRINTK_PENDING_WAKEUP	0x01
-#define PRINTK_PENDING_OUTPUT	0x02
-
-static DEFINE_PER_CPU(int, printk_pending);
-
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
-{
-	int pending = __this_cpu_xchg(printk_pending, 0);
-
-	if (pending & PRINTK_PENDING_OUTPUT) {
-		/* If trylock fails, someone else is doing the printing */
-		if (console_trylock())
-			console_unlock();
-	}
-
-	if (pending & PRINTK_PENDING_WAKEUP)
-		wake_up_interruptible(&log_wait);
-}
-
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
-	.func = wake_up_klogd_work_func,
-	.flags = IRQ_WORK_LAZY,
-};
-
-void wake_up_klogd(void)
-{
-	preempt_disable();
-	if (waitqueue_active(&log_wait)) {
-		this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
-		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
-	}
-	preempt_enable();
-}
-
-int printk_deferred(const char *fmt, ...)
-{
-	va_list args;
-	int r;
-
-	preempt_disable();
-	va_start(args, fmt);
-	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
-	va_end(args);
-
-	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
-	irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
-	preempt_enable();
-
-	return r;
-}
-
-/*
  * printk rate limiting, lifted from the networking subsystem.
  *
  * This enforces a rate limit: not more than 10 kernel messages
-- 
2.8.0.rc0