lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1445835169-8203-2-git-send-email-jack@suse.com>
Date:	Mon, 26 Oct 2015 05:52:44 +0100
From:	Jan Kara <jack@...e.com>
To:	Andrew Morton <akpm@...ux-foundation.org>
Cc:	LKML <linux-kernel@...r.kernel.org>, pmladek@...e.com,
	KY Srinivasan <kys@...rosoft.com>, rostedt@...dmis.org,
	Jan Kara <jack@...e.cz>
Subject: [PATCH 1/7] printk: Hand over printing to console if printing too long

From: Jan Kara <jack@...e.cz>

Currently, console_unlock() prints messages from kernel printk buffer to
console while the buffer is non-empty. When serial console is attached,
printing is slow and thus other CPUs in the system have plenty of time
to append new messages to the buffer while one CPU is printing. Thus the
CPU can spend unbounded amount of time doing printing in console_unlock().
This is especially serious problem if the printk() calling
console_unlock() was called with interrupts disabled.

In practice users have observed a CPU can spend tens of seconds printing
in console_unlock() (usually during boot when hundreds of SCSI devices
are discovered) resulting in RCU stalls (CPU doing printing doesn't
reach quiescent state for a long time), softlockup reports (IPIs for the
printing CPU don't get served and thus other CPUs are spinning waiting
for the printing CPU to process IPIs), and eventually a machine death
(as messages from stalls and lockups append to printk buffer faster than
we are able to print). So these machines are unable to boot with serial
console attached. Also during artificial stress testing SATA disk
disappears from the system because its interrupts aren't served for too
long.

This patch implements a mechanism where after printing specified number
of characters (tunable as a kernel parameter printk.offload_chars), CPU
doing printing asks for help by waking up one of dedicated kthreads.  As
soon as the printing CPU notices kthread got scheduled and is spinning
on print_lock dedicated for that purpose, it drops console_sem,
print_lock, and exits console_unlock(). Kthread then takes over printing
instead. This way no CPU should spend printing too long even if there
is heavy printk traffic.

Signed-off-by: Jan Kara <jack@...e.cz>
---
 Documentation/kernel-parameters.txt |  15 ++++
 kernel/printk/printk.c              | 173 ++++++++++++++++++++++++++++++++----
 2 files changed, 171 insertions(+), 17 deletions(-)

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 22a4b687ea5b..df8adee975ba 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2958,6 +2958,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 			default: disabled
 
+	printk.offload_chars=
+			Printing to console can be relatively slow especially
+			in case of serial console. When there is intensive
+			printing happening from several cpus (as is the case
+			during boot), a cpu can be spending significant time
+			(seconds or more) doing printing. To avoid softlockups,
+			lost interrupts, and similar problems other cpus
+			will take over printing after the currently printing
+			cpu has printed 'printk.offload_chars' characters.
+			Higher value means possibly longer interrupt and other
+			latencies but lower overhead of printing due to handing
+			over of printing.
+			Format: <number> (0 = disabled)
+			default: 1000
+
 	printk.time=	Show timing data prefixed to each printk message line
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8f0324ef72ab..1b26263edfa7 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -46,6 +46,7 @@
 #include <linux/utsname.h>
 #include <linux/ctype.h>
 #include <linux/uio.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
@@ -78,6 +79,29 @@ static DEFINE_SEMAPHORE(console_sem);
 struct console *console_drivers;
 EXPORT_SYMBOL_GPL(console_drivers);
 
+/*
+ * This spinlock is taken when printing to console. It is used only so that
+ * we can spin on it when some other thread wants to take over printing to
+ * console.
+ */
+static DEFINE_SPINLOCK(print_lock);
+
+/*
+ * Number of printing threads spinning on print_lock. Can go away once
+ * spin_is_contended() is reliable.
+ */
+static atomic_t printing_tasks_spinning = ATOMIC_INIT(0);
+
+/*
+ * Number of kernel threads for offloading printing. We need at least two so
+ * that they can hand over printing from one to another one and thus switch
+ * CPUs.
+ */
+#define PRINTING_TASKS 2
+
+/* Wait queue printing kthreads sleep on when idle */
+static DECLARE_WAIT_QUEUE_HEAD(print_queue);
+
 #ifdef CONFIG_LOCKDEP
 static struct lockdep_map console_lock_dep_map = {
 	.name = "console_lock"
@@ -279,6 +303,18 @@ static u32 clear_idx;
 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
+/*
+ * How many characters can we print in one call of printk before asking
+ * other cpus to continue printing. 0 means infinity. Tunable via
+ * printk.offload_chars kernel parameter. Our default 1000 means about
+ * 0.1s maximum latency due to printing.
+ */
+static unsigned int __read_mostly printk_offload_chars = 1000;
+
+module_param_named(offload_chars, printk_offload_chars, uint,
+		   S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(offload_chars, "offload printing to console to a different"
+	" cpu after this number of characters");
 
 /* Return log buffer address */
 char *log_buf_addr_get(void)
@@ -2208,15 +2244,41 @@ out:
 	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 }
 
+/*
+ * Returns true iff there is other cpu waiting to take over printing. This
+ * function also takes are of setting PRINTK_HANDOVER_B if we want to hand over
+ * printing to some other cpu.
+ */
+static bool cpu_stop_printing(int printed_chars)
+{
+	/* Oops? Print everything now to maximize chances user will see it */
+	if (oops_in_progress)
+		return false;
+	if (!printk_offload_chars || printed_chars < printk_offload_chars)
+		return false;
+	/*
+	 * Make sure we load fresh value of printing_tasks_spinning. Matches
+	 * the barrier in printing_task()
+	 */
+	smp_rmb();
+	if (atomic_read(&printing_tasks_spinning))
+		return true;
+	wake_up(&print_queue);
+
+	return false;
+}
+
 /**
  * console_unlock - unlock the console system
  *
  * Releases the console_lock which the caller holds on the console system
  * and the console driver list.
  *
- * While the console_lock was held, console output may have been buffered
- * by printk().  If this is the case, console_unlock(); emits
- * the output prior to releasing the lock.
+ * While the console_lock was held, console output may have been buffered by
+ * printk(). If this is the case, console_unlock() emits the output prior to
+ * releasing the lock. However we need not write all the data in the buffer if
+ * we would hog the CPU for too long. In such case we try to hand over printing
+ * to a different cpu.
  *
  * If there is output waiting, we wake /dev/kmsg and syslog() users.
  *
@@ -2230,6 +2292,8 @@ void console_unlock(void)
 	unsigned long flags;
 	bool wake_klogd = false;
 	bool retry;
+	bool hand_over = false;
+	int printed_chars = 0;
 
 	if (console_suspended) {
 		up_console_sem();
@@ -2241,13 +2305,20 @@ void console_unlock(void)
 	/* flush buffered message fragment immediately to console */
 	console_cont_flush(text, sizeof(text));
 again:
+	retry = false;
+	spin_lock_irqsave(&print_lock, flags);
 	for (;;) {
 		struct printk_log *msg;
 		size_t ext_len = 0;
 		size_t len;
 		int level;
 
-		raw_spin_lock_irqsave(&logbuf_lock, flags);
+		if (cpu_stop_printing(printed_chars)) {
+			hand_over = true;
+			break;
+		}
+
+		raw_spin_lock(&logbuf_lock);
 		if (seen_seq != log_next_seq) {
 			wake_klogd = true;
 			seen_seq = log_next_seq;
@@ -2265,8 +2336,10 @@ again:
 			len = 0;
 		}
 skip:
-		if (console_seq == log_next_seq)
+		if (console_seq == log_next_seq) {
+			raw_spin_unlock(&logbuf_lock);
 			break;
+		}
 
 		msg = log_from_idx(console_idx);
 		if (msg->flags & LOG_NOCONS) {
@@ -2306,28 +2379,38 @@ skip:
 		stop_critical_timings();	/* don't trace print latency */
 		call_console_drivers(level, ext_text, ext_len, text, len);
 		start_critical_timings();
-		local_irq_restore(flags);
+		printed_chars += len;
 	}
-	console_locked = 0;
 
 	/* Release the exclusive_console once it is used */
 	if (unlikely(exclusive_console))
 		exclusive_console = NULL;
 
-	raw_spin_unlock(&logbuf_lock);
-
+	console_locked = 0;
 	up_console_sem();
 
+	if (!hand_over) {
+		/*
+		 * Someone could have filled up the buffer again, so re-check
+		 * if there's something to flush. We perform the check under
+		 * print_lock to save one cli / sti pair in the fast path.
+		 */
+		raw_spin_lock(&logbuf_lock);
+		retry = console_seq != log_next_seq;
+		raw_spin_unlock(&logbuf_lock);
+	}
+
 	/*
-	 * Someone could have filled up the buffer again, so re-check if there's
-	 * something to flush. In case we cannot trylock the console_sem again,
-	 * there's a new owner and the console_unlock() from them will do the
-	 * flush, no worries.
-	 */
-	raw_spin_lock(&logbuf_lock);
-	retry = console_seq != log_next_seq;
-	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+	 * Release print_lock after console_sem so that printing_task()
+	 * succeeds in getting console_sem (unless someone else takes it and
+	 * then he'll be responsible for printing).
+         */
+	spin_unlock_irqrestore(&print_lock, flags);
 
+	/*
+	 * In case we cannot trylock the console_sem again, there's a new owner
+	 * and the console_unlock() from them will do the flush, no worries.
+	 */
 	if (retry && console_trylock())
 		goto again;
 
@@ -2654,9 +2737,52 @@ int unregister_console(struct console *console)
 }
 EXPORT_SYMBOL(unregister_console);
 
+/* Kthread which takes over printing from a CPU which asks for help */
+static int printing_task(void *arg)
+{
+	unsigned long flags;
+	DEFINE_WAIT(wait);
+
+	while (1) {
+		prepare_to_wait_exclusive(&print_queue, &wait,
+					  TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&print_queue, &wait);
+		if (kthread_should_stop())
+			break;
+		/*
+		 * Disable preemption so that we do not get scheduled away from
+		 * the CPU after we get print_lock and before we are finished
+		 * with printing.
+		 */
+		preempt_disable();
+		atomic_inc(&printing_tasks_spinning);
+		/*
+		 * Store printing_tasks_spinning value before we spin. Matches
+		 * the barrier in cpu_stop_printing().
+		 */
+		smp_mb__after_atomic();
+		/*
+		 * Wait for currently printing thread to complete. We spin on
+		 * print_lock instead of waiting on console_sem since we don't
+		 * want to sleep once we got scheduled to make sure we take
+		 * over printing without depending on the scheduler.
+		 */
+		spin_lock_irqsave(&print_lock, flags);
+		atomic_dec(&printing_tasks_spinning);
+		spin_unlock_irqrestore(&print_lock, flags);
+		if (console_trylock())
+			console_unlock();
+		preempt_enable();
+	}
+	return 0;
+}
+
 static int __init printk_late_init(void)
 {
 	struct console *con;
+	int i;
+	struct task_struct *task;
 
 	for_each_console(con) {
 		if (!keep_bootcon && con->flags & CON_BOOT) {
@@ -2664,6 +2790,19 @@ static int __init printk_late_init(void)
 		}
 	}
 	hotcpu_notifier(console_cpu_notify, 0);
+
+	/* Does any handover of printing have any sence? */
+	if (num_possible_cpus() <= 1)
+		return 0;
+
+	for (i = 0; i < PRINTING_TASKS; i++) {
+		task = kthread_run(printing_task, NULL, "print/%d", i);
+		if (IS_ERR(task)) {
+			pr_err("printk: Cannot create printing thread: %ld\n",
+			       PTR_ERR(task));
+		}
+	}
+
 	return 0;
 }
 late_initcall(printk_late_init);
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ