lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 20 Jun 2019 14:22:39 +0300
From:   "Kirill A. Shutemov" <kirill@...temov.name>
To:     Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        "H. Peter Anvin" <hpa@...or.com>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Andy Lutomirski <luto@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>
Cc:     x86@...nel.org, linux-kernel@...r.kernel.org,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
        Kyle Pelton <kyle.d.pelton@...el.com>,
        Baoquan He <bhe@...hat.com>
Subject: [PATCH] x86/mm: Handle physical-virtual alignment mismatch in phys_p4d_init()

Kyle has reported that kernel crashes sometimes when it boots in
5-level paging mode with KASLR enabled:

[    0.000000] WARNING: CPU: 0 PID: 0 at arch/x86/mm/init_64.c:87 phys_p4d_init+0x1d4/0x1ea
[    0.000000] Modules linked in:
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.2.0-rc5+ #27
[    0.000000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.4
[    0.000000] RIP: 0010:phys_p4d_init+0x1d4/0x1ea
[    0.000000] Code: 20 be 43 94 ff 45 cc 4c 89 e6 e9 90 fe ff ff 48 89 d8 eb 1d 48 8b 4d d0 48 8b 7d c0 45 0f b6 3
[    0.000000] RSP: 0000:ffffffff94403c70 EFLAGS: 00000046 ORIG_RAX: 0000000000000000
[    0.000000] RAX: 0000000000000000 RBX: 0000000340000000 RCX: 0000000000000007
[    0.000000] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[    0.000000] RBP: ffffffff94403cb8 R08: 0000000000000023 R09: ffffffff947fa680
[    0.000000] R10: 0000000000000008 R11: 0000000000000031 R12: 0000010000000000
[    0.000000] R13: 0000000340000000 R14: ffffffff94403d01 R15: ff484d01962014d0
[    0.000000] FS:  0000000000000000(0000) GS:ffffffff9468a000(0000) knlGS:0000000000000000
[    0.000000] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.000000] CR2: ff484d0196201000 CR3: 0000000315b3e000 CR4: 00000000000016b0
[    0.000000] Call Trace:
[    0.000000]  __kernel_physical_mapping_init+0x10a/0x35c
[    0.000000]  kernel_physical_mapping_init+0xe/0x10
[    0.000000]  init_memory_mapping+0x1aa/0x3b0
[    0.000000]  init_range_memory_mapping+0xc8/0x116
[    0.000000]  init_mem_mapping+0x225/0x2eb
[    0.000000]  setup_arch+0x6ff/0xcf5
[    0.000000]  start_kernel+0x64/0x53b
[    0.000000]  ? copy_bootdata+0x1f/0xce
[    0.000000]  x86_64_start_reservations+0x24/0x26
[    0.000000]  x86_64_start_kernel+0x8a/0x8d
[    0.000000]  secondary_startup_64+0xb6/0xc0
[    0.000000] random: get_random_bytes called from print_oops_end_marker+0x3f/0x60 with crng_init=0
...
[    0.000000] BUG: unable to handle page fault for address: ff484d019580eff8
[    0.000000] #PF: supervisor read access in kernel mode
[    0.000000] #PF: error_code(0x0000) - not-present page
[    0.000000] BAD
[    0.000000] Oops: 0000 [#1] SMP NOPTI
[    0.000000] CPU: 0 PID: 0 Comm: swapper Tainted: G        W         5.2.0-rc5+ #27
[    0.000000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.4
[    0.000000] RIP: 0010:fill_pud+0x13/0x130
[    0.000000] Code: 48 83 c4 08 b8 f4 ff ff ff 5b 41 5c 41 5d 41 5e 41 5f 5d c3 31 c0 c3 e8 2b ee d8 00 55 48 89 4
[    0.000000] RSP: 0000:ffffffff94403dc0 EFLAGS: 00000006
[    0.000000] RAX: 00000000000001ff RBX: ffffffffff5fd000 RCX: 0000000000000030
[    0.000000] RDX: 00000000000001ff RSI: ffffffffff5fd000 RDI: ff484d019580eff8
[    0.000000] RBP: ffffffff94403de0 R08: 0000000000000078 R09: 0000000000000002
[    0.000000] R10: ffffffff94403cb0 R11: 000000000000005b R12: ff484d019580eff8
[    0.000000] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[    0.000000] FS:  0000000000000000(0000) GS:ffffffff9468a000(0000) knlGS:0000000000000000
[    0.000000] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    0.000000] CR2: ff484d019580aa40 CR3: 000000031580a000 CR4: 00000000000016b0
[    0.000000] Call Trace:
[    0.000000]  set_pte_vaddr_p4d+0x2e/0x50
[    0.000000]  set_pte_vaddr+0x6f/0xb0
[    0.000000]  __native_set_fixmap+0x28/0x40
[    0.000000]  native_set_fixmap+0x39/0x70
[    0.000000]  register_lapic_address+0x49/0xb6
[    0.000000]  early_acpi_boot_init+0xa5/0xde
[    0.000000]  setup_arch+0x944/0xcf5
[    0.000000]  start_kernel+0x64/0x53b
[    0.000000]  ? copy_bootdata+0x1f/0xce
[    0.000000]  x86_64_start_reservations+0x24/0x26
[    0.000000]  x86_64_start_kernel+0x8a/0x8d
[    0.000000]  secondary_startup_64+0xb6/0xc0
[    0.000000] Modules linked in:
[    0.000000] CR2: ff484d019580eff8

Kyle bisected the issue to commmit b569c1843498 ("x86/mm/KASLR: Reduce
randomization granularity for 5-level paging to 1GB")

The commit relaxes KASLR alignment requirements and it can lead to
mismatch bentween 'i' and 'p4d_index(vaddr)' inside the loop in
phys_p4d_init(). The mismatch in its turn leads to clearing wrong p4d
entry and eventually to the oops.

The fix is to make phys_p4d_init() walk virtual address space, not
physical one.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
Reported-and-tested-by: Kyle Pelton <kyle.d.pelton@...el.com>
Fixes: b569c1843498 ("x86/mm/KASLR: Reduce randomization granularity for 5-level paging to 1GB")
Cc: Baoquan He <bhe@...hat.com>
---
 arch/x86/mm/init_64.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 693aaf28d5fe..4628ac9105a2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -671,41 +671,34 @@ static unsigned long __meminit
 phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 	      unsigned long page_size_mask, bool init)
 {
-	unsigned long paddr_next, paddr_last = paddr_end;
-	unsigned long vaddr = (unsigned long)__va(paddr);
-	int i = p4d_index(vaddr);
+	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
+
+	paddr_last = paddr_end;
+	vaddr = (unsigned long)__va(paddr);
+	vaddr_end = (unsigned long)__va(paddr_end);
+	vaddr_start = vaddr;
 
 	if (!pgtable_l5_enabled())
 		return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
 				     page_size_mask, init);
 
-	for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
-		p4d_t *p4d;
+	for (; vaddr < vaddr_end; vaddr = vaddr_next) {
+		p4d_t *p4d = p4d_page + p4d_index(vaddr);
 		pud_t *pud;
 
-		vaddr = (unsigned long)__va(paddr);
-		p4d = p4d_page + p4d_index(vaddr);
-		paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
+		vaddr_next = (vaddr & P4D_MASK) + P4D_SIZE;
 
-		if (paddr >= paddr_end) {
-			if (!after_bootmem &&
-			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
-					     E820_TYPE_RAM) &&
-			    !e820__mapped_any(paddr & P4D_MASK, paddr_next,
-					     E820_TYPE_RESERVED_KERN))
-				set_p4d_init(p4d, __p4d(0), init);
-			continue;
-		}
-
-		if (!p4d_none(*p4d)) {
-			pud = pud_offset(p4d, 0);
-			paddr_last = phys_pud_init(pud, paddr, paddr_end,
-						   page_size_mask, init);
+		if (p4d_val(*p4d)) {
+			pud = (pud_t *)p4d_page_vaddr(*p4d);
+			paddr_last = phys_pud_init(pud, __pa(vaddr),
+						   __pa(vaddr_end),
+						   page_size_mask,
+						   init);
 			continue;
 		}
 
 		pud = alloc_low_page();
-		paddr_last = phys_pud_init(pud, paddr, paddr_end,
+		paddr_last = phys_pud_init(pud, __pa(vaddr), __pa(vaddr_end),
 					   page_size_mask, init);
 
 		spin_lock(&init_mm.page_table_lock);
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ