lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170815025625.1977-14-sergey.senozhatsky@gmail.com>
Date:   Tue, 15 Aug 2017 11:56:25 +0900
From:   Sergey Senozhatsky <sergey.senozhatsky@...il.com>
To:     Petr Mladek <pmladek@...e.com>,
        Steven Rostedt <rostedt@...dmis.org>
Cc:     Jan Kara <jack@...e.cz>, Andrew Morton <akpm@...ux-foundation.org>,
        Peter Zijlstra <peterz@...radead.org>,
        "Rafael J . Wysocki" <rjw@...ysocki.net>,
        Eric Biederman <ebiederm@...ssion.com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Jiri Slaby <jslaby@...e.com>, Pavel Machek <pavel@....cz>,
        Andreas Mohr <andi@...as.de>,
        Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>,
        linux-kernel@...r.kernel.org,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>
Subject: [RFC][PATCHv5 13/13] printk: move offloading logic to per-cpu

We have a global offloading state and make the offloading
decision based on printing task pointer and timestamp. If
we keep seeing the same task performing printing for too
long (`atomic_print_limit') we request offloading. Similarly
when we see that printing is now performed by another task,
we reset the timestamp counter.

This, however, will not work in the following case:

===============================================================================

CPU0						CPU1
//taskA						//taskB
preempt_disable()				preempt_disable()

 printk()
  console_trylock()
  console_unlock()
   printing_task = taskA
  up()
						printk()
						 console_trylock()
						 console_unlock()
						  printing_task = taskB
						  ^^^ reset offloading control
						up()
 printk()
  console_trylock()
  console_unlock()
   printing_task = taskA
   ^^^ reset offloading control
  up()
						printk()
						 console_trylock()
						 console_unlock()
						  printing_task = taskB
						  ^^^ reset offloading control
						up()

/*
 * X seconds later
 */

 printk()
  console_trylock()
  console_unlock()
   printing_task = taskA
   ^^^ reset offloading control
  up()
						printk()
						 console_trylock()
						 console_unlock()
						  printing_task = taskB
						  ^^^ reset offloading control
						up()

 lockup!					lockup!

===============================================================================

So this printk ping-pong confuses our offloading control logic.
Move it to per-CPU area and have a separate offloading control
on every CPU.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@...il.com>
---
 kernel/printk/printk.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ba82152ce5d9..f9799616e9fc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -558,13 +558,14 @@ static inline void adj_atomic_print_limit(void)
 #endif
 }
 
-static inline unsigned long emergency_timeout(unsigned long ts)
+static inline unsigned long emergency_timeout(unsigned long now,
+						unsigned long ts)
 {
 #ifdef CONFIG_LOCKUP_DETECTOR
 	if (watchdog_thresh)
-		return ts + 2 * watchdog_thresh;
+		return time_after_eq(now, ts + 2 * watchdog_thresh);
 #endif
-	return ts + 10 * atomic_print_limit;
+	return time_after_eq(now, ts + 10 * atomic_print_limit);
 }
 
 /*
@@ -574,13 +575,13 @@ static inline unsigned long emergency_timeout(unsigned long ts)
  * amount of time a process can print from console_unlock().
  *
  * This function must be called from 'printk_safe' context under
- * console_sem lock.
+ * console_sem lock with preemption disabled.
  */
 static inline bool console_offload_printing(void)
 {
-	static struct task_struct *printing_task;
-	static unsigned long printing_start_ts;
-	static unsigned long saved_csw;
+	static DEFINE_PER_CPU(struct task_struct, *printing_task);
+	static DEFINE_PER_CPU(unsigned long, printing_start_ts);
+	static DEFINE_PER_CPU(unsigned long, saved_csw);
 	unsigned long now = local_clock() >> 30LL; /* seconds */
 
 	if (printk_kthread_should_stop())
@@ -600,16 +601,17 @@ static inline bool console_offload_printing(void)
 		goto offload;
 
 	/* A new task - reset the counters. */
-	if (printing_task != current) {
-		printing_start_ts = local_clock() >> 30LL;
-		saved_csw = current->nvcsw + current->nivcsw;
-		printing_task = current;
+	if (this_cpu_read(printing_task) != current) {
+		this_cpu_write(printing_start_ts, local_clock() >> 30LL);
+		this_cpu_write(saved_csw, current->nvcsw + current->nivcsw);
+		this_cpu_write(printing_task, current);
 		return false;
 	}
 
 	adj_atomic_print_limit();
 
-	if (!time_after_eq(now, printing_start_ts + atomic_print_limit))
+	if (!time_after_eq(now, this_cpu_read(printing_start_ts) +
+				atomic_print_limit))
 		return false;
 
 	if (current == printk_kthread) {
@@ -626,7 +628,7 @@ static inline bool console_offload_printing(void)
 		 * back to console_unlock(), it will have another full
 		 * `atomic_print_limit' time slice.
 		 */
-		printing_start_ts = local_clock() >> 30LL;
+		this_cpu_write(printing_start_ts, local_clock() >> 30LL);
 		return true;
 	}
 
@@ -634,10 +636,12 @@ static inline bool console_offload_printing(void)
 	 * A trivial emergency enforcement - give up on printk_kthread if
 	 * we can't wake it up.
 	 */
-	if (time_after_eq(now, emergency_timeout(printing_start_ts)) &&
-			saved_csw == (current->nvcsw + current->nivcsw)) {
+	if (this_cpu_read(saved_csw) == (current->nvcsw + current->nivcsw)
+		&& emergency_timeout(now, this_cpu_read(printing_start_ts))) {
+
 		printk_enforce_emergency = true;
-		pr_crit("Declaring printk emergency mode.\n");
+		pr_crit("CPU%d declared a printk emergency mode.\n",
+				smp_processor_id());
 		return true;
 	}
 
-- 
2.14.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ