linux-kernel - [PATCH v2 4/5] x86/fault: Decode page fault OOPSes better

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <11212acb25980cd1b3030875cd9502414fbb214d.1542841400.git.luto@kernel.org>
Date:   Wed, 21 Nov 2018 15:11:25 -0800
From:   Andy Lutomirski <luto@...nel.org>
To:     x86@...nel.org
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Yu-cheng Yu <yu-cheng.yu@...el.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Borislav Petkov <bp@...en8.de>,
        Andy Lutomirski <luto@...nel.org>
Subject: [PATCH v2 4/5] x86/fault: Decode page fault OOPSes better

One of Linus' favorite hobbies seems to be looking at OOPSes and
decoding the error code in his head.  This is not one of my favorite
hobbies :)

Teach the page fault OOPS hander to decode the error code.  If it's
a !USER fault from user mode, print an explicit note to that effect
and print out the addresses of various tables that might cause such
an error.

With this patch applied, if I intentionally point the LDT at 0x0 and
run the x86 selftests, I get:

BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
HW error: normal kernel read fault
This was a system access from user code
IDT: 0xfffffe0000000000 (limit=0xfff) GDT: 0xfffffe0000001000 (limit=0x7f)
LDTR: 0x50 -- base=0x0 limit=0xfff7
TR: 0x40 -- base=0xfffffe0000003000 limit=0x206f
PGD 800000000456e067 P4D 800000000456e067 PUD 4623067 PMD 0
SMP PTI
CPU: 0 PID: 153 Comm: ldt_gdt_64 Not tainted 4.19.0+ #1317
Hardware name: ...
RIP: 0033:0x401454

Signed-off-by: Andy Lutomirski <luto@...nel.org>
---
 arch/x86/mm/fault.c | 84 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ca38bd0472f2..f5efbdba2b6d 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -27,6 +27,7 @@
 #include <asm/vm86.h>			/* struct vm86			*/
 #include <asm/mmu_context.h>		/* vma_pkey()			*/
 #include <asm/efi.h>			/* efi_recover_from_page_fault()*/
+#include <asm/desc.h>			/* store_idt(), ...		*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -571,10 +572,53 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 	return 0;
 }
 
+static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
+{
+	u32 offset = (index >> 3) * sizeof(struct desc_struct);
+	unsigned long addr;
+	struct ldttss_desc desc;
+
+	if (index == 0) {
+		pr_alert("%s: NULL\n", name);
+		return;
+	}
+
+	if (offset + sizeof(struct ldttss_desc) >= gdt->size) {
+		pr_alert("%s: 0x%hx -- out of bounds\n", name, index);
+		return;
+	}
+
+	if (probe_kernel_read(&desc, (void *)(gdt->address + offset),
+			      sizeof(struct ldttss_desc))) {
+		pr_alert("%s: 0x%hx -- GDT entry is not readable\n",
+			 name, index);
+		return;
+	}
+
+	addr = desc.base0 | (desc.base1 << 16) | (desc.base2 << 24);
+#ifdef CONFIG_X86_64
+	addr |= ((u64)desc.base3 << 32);
+#endif
+	pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n",
+		 name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
+}
+
+static void errstr(unsigned long ec, char *buf, unsigned long mask,
+		   const char *txt)
+{
+	if (ec & mask) {
+		if (buf[0])
+			strcat(buf, " ");
+		strcat(buf, txt);
+	}
+}
+
 static void
 show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		unsigned long address)
 {
+	char errtxt[64];
+
 	if (!oops_may_print())
 		return;
 
@@ -602,6 +646,46 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 		 address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
 		 (void *)address);
 
+	errtxt[0] = 0;
+	errstr(error_code, errtxt, X86_PF_PROT, "PROT");
+	errstr(error_code, errtxt, X86_PF_WRITE, "WRITE");
+	errstr(error_code, errtxt, X86_PF_USER, "USER");
+	errstr(error_code, errtxt, X86_PF_RSVD, "RSVD");
+	errstr(error_code, errtxt, X86_PF_INSTR, "INSTR");
+	errstr(error_code, errtxt, X86_PF_PK, "PK");
+	pr_alert("HW error: %s\n", error_code ? errtxt :
+		 "normal kernel read fault");
+	if (!(error_code & X86_PF_USER) && user_mode(regs)) {
+		struct desc_ptr idt, gdt;
+		u16 ldtr, tr;
+
+		pr_alert("This was a system access from user code\n");
+
+		/*
+		 * This can happen for quite a few reasons.  The more obvious
+		 * ones are faults accessing the GDT, or LDT.  Perhaps
+		 * surprisingly, if the CPU tries to deliver a benign or
+		 * contributory exception from user code and gets a page fault
+		 * during delivery, the page fault can be delivered as though
+		 * it originated directly from user code.  This could happen
+		 * due to wrong permissions on the IDT, GDT, LDT, TSS, or
+		 * kernel or IST stack.
+		 */
+		store_idt(&idt);
+
+		/* Usable even on Xen PV -- it's just slow. */
+		native_store_gdt(&gdt);
+
+		pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n",
+			 idt.address, idt.size, gdt.address, gdt.size);
+
+		store_ldt(ldtr);
+		show_ldttss(&gdt, "LDTR", ldtr);
+
+		store_tr(tr);
+		show_ldttss(&gdt, "TR", tr);
+	}
+
 	dump_pagetable(address);
 }
 
-- 
2.17.2