lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180828135647.6d516048@imladris.surriel.com>
Date:   Tue, 28 Aug 2018 13:56:47 -0400
From:   Rik van Riel <riel@...riel.com>
To:     Andy Lutomirski <luto@...nel.org>
Cc:     x86@...nel.org, Borislav Petkov <bp@...en8.de>,
        Jann Horn <jannh@...gle.com>,
        LKML <linux-kernel@...r.kernel.org>, stable@...r.kernel.org,
        Peter Zijlstra <peterz@...radead.org>,
        Nadav Amit <nadav.amit@...il.com>
Subject: [PATCH v2] x86/nmi: Fix some races in NMI uaccess

On Mon, 27 Aug 2018 16:04:16 -0700
Andy Lutomirski <luto@...nel.org> wrote:

> The 0day bot is still chewing on this, but I've tested it a bit locally
> and it seems to do the right thing.

Hi Andy,

the version of the patch below should fix the bug we talked about
in email yesterday.  It should automatically cover kernel threads
in lazy TLB mode, because current->mm will be NULL, while the
cpu_tlbstate.loaded_mm should never be NULL.

---8<---
From: Andy Lutomirski <luto@...nel.org>
Subject: x86/nmi: Fix some races in NMI uaccess

In NMI context, we might be in the middle of context switching or in
the middle of switch_mm_irqs_off().  In either case, CR3 might not
match current->mm, which could cause copy_from_user_nmi() and
friends to read the wrong memory.

Fix it by adding a new nmi_uaccess_okay() helper and checking it in
copy_from_user_nmi() and in __copy_from_user_nmi()'s callers.

Cc: stable@...r.kernel.org
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Nadav Amit <nadav.amit@...il.com>
Signed-off-by: Rik van Riel <riel@...riel.com>
Signed-off-by: Andy Lutomirski <luto@...nel.org>
---
 arch/x86/events/core.c          |  2 +-
 arch/x86/include/asm/tlbflush.h | 15 +++++++++++++++
 arch/x86/lib/usercopy.c         |  5 +++++
 arch/x86/mm/tlb.c               |  9 ++++++++-
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 5f4829f10129..dfb2f7c0d019 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2465,7 +2465,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs
 
 	perf_callchain_store(entry, regs->ip);
 
-	if (!current->mm)
+	if (!nmi_uaccess_okay())
 		return;
 
 	if (perf_callchain_user32(regs, entry))
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 29c9da6c62fc..dafe649b18e1 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -246,6 +246,21 @@ struct tlb_state {
 };
 DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
 
+/*
+ * Blindly accessing user memory from NMI context can be dangerous
+ * if we're in the middle of switching the current user task or
+ * switching the loaded mm.  It can also be dangerous if we
+ * interrupted some kernel code that was temporarily using a
+ * different mm.
+ */
+static inline bool nmi_uaccess_okay(void)
+{
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	struct mm_struct *current_mm = current->mm;
+
+	return (loaded_mm == current_mm);
+}
+
 /* Initialize cr4 shadow for this CPU. */
 static inline void cr4_init_shadow(void)
 {
diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c
index c8c6ad0d58b8..3f435d7fca5e 100644
--- a/arch/x86/lib/usercopy.c
+++ b/arch/x86/lib/usercopy.c
@@ -7,6 +7,8 @@
 #include <linux/uaccess.h>
 #include <linux/export.h>
 
+#include <asm/tlbflush.h>
+
 /*
  * We rely on the nested NMI work to allow atomic faults from the NMI path; the
  * nested NMI paths are careful to preserve CR2.
@@ -19,6 +21,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
 	if (__range_not_ok(from, n, TASK_SIZE))
 		return n;
 
+	if (!nmi_uaccess_okay())
+		return n;
+
 	/*
 	 * Even though this function is typically called from NMI/IRQ context
 	 * disable pagefaults so that its behaviour is consistent even when
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 9517d1b2a281..5b75e2fed2b6 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -305,6 +305,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 
 		choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
 
+		/*
+		 * Ensure cpu_tlbstate.loaded_mm differs from current.mm
+		 * until the context switch is complete, so NMI handlers
+		 * do not try to access userspace. See nmi_uaccess_okay.
+		 */
+		this_cpu_write(cpu_tlbstate.loaded_mm, next);
+		smp_wmb();
+
 		if (need_flush) {
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 			this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
@@ -335,7 +343,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		if (next != &init_mm)
 			this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
 
-		this_cpu_write(cpu_tlbstate.loaded_mm, next);
 		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
 	}
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ