linux-kernel - [RFC][PATCH 3/7] printk: introduce per-cpu alt

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20160927142237.5539-4-sergey.senozhatsky@gmail.com>
Date:   Tue, 27 Sep 2016 23:22:33 +0900
From:   Sergey Senozhatsky <sergey.senozhatsky@...il.com>
To:     Petr Mladek <pmladek@...e.com>, Jan Kara <jack@...e.cz>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Tejun Heo <tj@...nel.org>, Calvin Owens <calvinowens@...com>,
        linux-kernel@...r.kernel.org,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>
Subject: [RFC][PATCH 3/7] printk: introduce per-cpu alt_print seq buffer

This patch extends the idea of NMI per-cpu buffers to regions
that may cause recursive printk() calls and possible deadlocks.
Namely, printk() can't handle printk calls from schedule code
or printk() calls from lock debugging code (spin_dump() for instance);
because those may be called with `sem->lock' already taken or any
of critical scheduler locks (p->pi_lock, etc.). An example of
a deadlock can be

 vprintk_emit()
  console_unlock()
   up()                        << raw_spin_lock_irqsave(&sem->lock, flags);
    wake_up_process()
     try_to_wake_up()
      ttwu_queue()
       ttwu_do_activate()
        ttwu_activate()
         activate_task()
          enqueue_task()
           enqueue_task_fair()
            cfs_rq_of()
             task_of()
              WARN_ON_ONCE(!entity_is_task(se))
               vprintk_emit()
                console_trylock()
                 down_trylock()
                  raw_spin_lock_irqsave(&sem->lock, flags)
                  ^^^^ deadlock

and some other cases.

Just like in NMI implementation, the solution uses a per-cpu
`printk_func' pointer to 'redirect' printk() calls to a 'safe'
callback, that store messages in a per-cpu buffer and flushes
them back to logbuf buffer later.

Usage example:

 printk()
  local_irq_save()
  alt_printk_enter()
  //
  //  any printk() call from here will endup in vprintk_alt(),
  //  that stores messages in a special per-CPU buffer.
  //
  alt_printk_exit()
  local_irq_restore()

The patch only adds a alt_printk support, we don't use it yet.

Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@...il.com>
---
 kernel/printk/alt_printk.c | 160 +++++++++++++++++++++++++++++++++++++++++----
 kernel/printk/internal.h   |  12 ++++
 2 files changed, 158 insertions(+), 14 deletions(-)

diff --git a/kernel/printk/alt_printk.c b/kernel/printk/alt_printk.c
index 7178661..8978390 100644
--- a/kernel/printk/alt_printk.c
+++ b/kernel/printk/alt_printk.c
@@ -1,5 +1,5 @@
 /*
- * alt_printk.c - Safe printk in NMI context
+ * alt_printk.c - Safe printk for printk-deadlock-prone contexts
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -53,24 +53,39 @@ struct alt_printk_seq_buf {
 static DEFINE_PER_CPU(struct alt_printk_seq_buf, nmi_print_seq);
 
 /*
- * Safe printk() for NMI context. It uses a per-CPU buffer to
- * store the message. NMIs are not nested, so there is always only
- * one writer running. But the buffer might get flushed from another
- * CPU, so we need to be careful.
+ * There can be two alt_printk contexts at most - a `normal' alt_printk
+ * and NMI alt_printk context. Normal alt_printk context is the one that
+ * direct caller of printk() setups (either a process or IRQ) and it can
+ * be preempted only by NMI (if the platform supports NMI). NMI context
+ * can preempt normal alt_printk context, but cannot be preempted on its
+ * own.
  */
-static int vprintk_nmi(const char *fmt, va_list args)
+#ifdef CONFIG_PRINTK_NMI
+#define MAX_ALT_PRINTK_CTX	2
+#else
+#define MAX_ALT_PRINTK_CTX	1
+#endif
+
+struct alt_printk_ctx {
+	atomic_t	idx;
+	unsigned int	entry_count;
+	printk_func_t	saved_printk_func[MAX_ALT_PRINTK_CTX];
+};
+
+static DEFINE_PER_CPU(struct alt_printk_seq_buf, alt_print_seq);
+static DEFINE_PER_CPU(struct alt_printk_ctx, alt_printk_ctx);
+
+static int alt_printk_log_store(struct alt_printk_seq_buf *s,
+		const char *fmt, va_list args)
 {
-	struct alt_printk_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
-	int add = 0;
+	int add;
 	size_t len;
 
 again:
 	len = atomic_read(&s->len);
 
-	if (len >= sizeof(s->buffer)) {
-		atomic_inc(&nmi_message_lost);
+	if (len >= sizeof(s->buffer))
 		return 0;
-	}
 
 	/*
 	 * Make sure that all old data have been read before the buffer was
@@ -99,6 +114,96 @@ static int vprintk_nmi(const char *fmt, va_list args)
 	return add;
 }
 
+/*
+ * Lockless printk(), to avoid deadlocks should the printk() recurse
+ * into itself. It uses a per-CPU buffer to store the message, just like
+ * NMI.
+ */
+static int vprintk_alt(const char *fmt, va_list args)
+{
+	struct alt_printk_seq_buf *s = this_cpu_ptr(&alt_print_seq);
+
+	return alt_printk_log_store(s, fmt, args);
+}
+
+/*
+ * We must keep the track of `printk_func' because alt_printk
+ * context can be preempted by NMI alt_printk context.
+ *
+ * Consider the following example:
+ *
+ * vprintk_emit()
+ * 	alt_printk_enter()
+ * 		printk_func = vprintk_alt;
+ *
+ * -> NMI
+ *  	printk_nmi_enter()
+ *		printk_func = vprintk_nmi;
+ *	printk_nmi_exit()
+ *		printk_func = vprintk_default;
+ *		^^^^^^^^^^^
+ * <- NMI
+ *
+ * 	printk("foo") -> vprintk_default();
+ *
+ * Thus we must restore the orignal `printk_func' value, the one
+ * NMI saw at printk_nmi_enter() time.
+ */
+static void __lockless_printk_enter(printk_func_t new_func)
+{
+	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
+	int idx = atomic_inc_return(&ctx->idx) - 1;
+
+	ctx->saved_printk_func[idx] = this_cpu_read(printk_func);
+	this_cpu_write(printk_func, new_func);
+}
+
+static void __lockless_printk_exit(void)
+{
+	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
+	int idx = atomic_read(&ctx->idx) - 1;
+
+	this_cpu_write(printk_func, ctx->saved_printk_func[idx]);
+	atomic_dec(&ctx->idx);
+}
+
+/* Local IRQs must be disabled; can be preempted by NMI. */
+void alt_printk_enter(void)
+{
+	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
+
+	/*
+	 * We can't switch `printk_func' while the CPU is flushing its
+	 * alternative buffers. At the same time, we leave flushing
+	 * `unprotected', because we always use vprintk_default() there.
+	 *
+	 * ->entry_count can detect printk() recursion from flushing context:
+	 *  -- alt_printk_flush() sets ->entry_count to 1
+	 *  -- every vprintk_default() call from alt_printk_flush() increments
+	 *     ->entry_count to 2 when it enters the recursion un-safe region
+	 *     and decrements it back to 1 when it leaves that region
+	 *  -- thus, if printk() will recurs from recursion un-safe region, we
+	 *     will see ->entry_count > 2.
+	 */
+	ctx->entry_count++;
+	if (ctx->entry_count > 1)
+		return;
+
+	/* @TODO: do something sensible in case of printk() recursion */
+
+	__lockless_printk_enter(vprintk_alt);
+}
+
+/* Local IRQs must be disabled; can be preempted by NMI. */
+void alt_printk_exit(void)
+{
+	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
+
+	if (ctx->entry_count == 1)
+		__lockless_printk_exit();
+	ctx->entry_count--;
+}
+
 static void alt_printk_flush_line(const char *text, int len)
 {
 	/*
@@ -110,7 +215,6 @@ static void alt_printk_flush_line(const char *text, int len)
 		printk_deferred("%.*s", len, text);
 	else
 		printk("%.*s", len, text);
-
 }
 
 /*
@@ -135,6 +239,7 @@ static void __alt_printk_flush(struct irq_work *work)
 		__RAW_SPIN_LOCK_INITIALIZER(read_lock);
 	struct alt_printk_seq_buf *s = container_of(work,
 			struct alt_printk_seq_buf, work);
+	struct alt_printk_ctx *ctx = this_cpu_ptr(&alt_printk_ctx);
 	unsigned long flags;
 	size_t len, size;
 	int i, last_i;
@@ -147,6 +252,11 @@ static void __alt_printk_flush(struct irq_work *work)
 	 * a backtrace.
 	 */
 	raw_spin_lock_irqsave(&read_lock, flags);
+	/*
+	 * Forbid the alt_printk on this CPU, we want to flush messages to
+	 * logbuf, not to alt_printk buffer again.
+	 */
+	ctx->entry_count++;
 
 	i = 0;
 more:
@@ -195,6 +305,7 @@ static void __alt_printk_flush(struct irq_work *work)
 		goto more;
 
 out:
+	ctx->entry_count--;
 	raw_spin_unlock_irqrestore(&read_lock, flags);
 }
 
@@ -248,6 +359,9 @@ void __init alt_printk_init(void)
 		struct alt_printk_seq_buf *s = &per_cpu(nmi_print_seq, cpu);
 
 		init_irq_work(&s->work, __alt_printk_flush);
+
+		s = &per_cpu(alt_print_seq, cpu);
+		init_irq_work(&s->work, __alt_printk_flush);
 	}
 
 	/* Make sure that IRQ works are initialized before enabling. */
@@ -258,12 +372,30 @@ void __init alt_printk_init(void)
 	alt_printk_flush();
 }
 
+/*
+ * Safe printk() for NMI context. It uses a per-CPU buffer to
+ * store the message. NMIs are not nested, so there is always only
+ * one writer running. But the buffer might get flushed from another
+ * CPU, so we need to be careful.
+ */
+static int vprintk_nmi(const char *fmt, va_list args)
+{
+	struct alt_printk_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
+	int add;
+
+	add = alt_printk_log_store(s, fmt, args);
+	if (!add)
+		atomic_inc(&nmi_message_lost);
+
+	return add;
+}
+
 void printk_nmi_enter(void)
 {
-	this_cpu_write(printk_func, vprintk_nmi);
+	__lockless_printk_enter(vprintk_nmi);
 }
 
 void printk_nmi_exit(void)
 {
-	this_cpu_write(printk_func, vprintk_default);
+	__lockless_printk_exit();
 }
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 7fd2838..c5f970e 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -20,6 +20,18 @@ typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
 
 int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
 
+#ifdef CONFIG_PRINTK
+
+void alt_printk_enter(void);
+void alt_printk_exit(void);
+
+#else
+
+void alt_printk_enter(void) { }
+void alt_printk_exit(void) { }
+
+#endif /* CONFIG_PRINTK */
+
 #ifdef CONFIG_PRINTK_NMI
 
 extern raw_spinlock_t logbuf_lock;
-- 
2.10.0.372.g6fe1b14