lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <lsq.1587683028.710020660@decadent.org.uk>
Date:   Fri, 24 Apr 2020 00:04:47 +0100
From:   Ben Hutchings <ben@...adent.org.uk>
To:     linux-kernel@...r.kernel.org, stable@...r.kernel.org
CC:     akpm@...ux-foundation.org, Denis Kirjanov <kda@...ux-powerpc.org>,
        "Greg Kroah-Hartman" <gregkh@...uxfoundation.org>,
        "Steven Sistare" <steven.sistare@...cle.com>,
        "Pavel Tatashin" <pasha.tatashin@...cle.com>
Subject: [PATCH 3.16 060/245] x86/pti/efi: broken conversion from efi to
 kernel page table

3.16.83-rc1 review patch.  If anyone has any objections, please let me know.

------------------

From: Pavel Tatashin <pasha.tatashin@...cle.com>

In entry_64.S we have code like this:

    /* Unconditionally use kernel CR3 for do_nmi() */
    /* %rax is saved above, so OK to clobber here */
    ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
    /* If PCID enabled, NOFLUSH now and NOFLUSH on return */
    ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
    pushq   %rax
    /* mask off "user" bit of pgd address and 12 PCID bits: */
    andq    $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
    movq    %rax, %cr3
2:

    /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
    call    do_nmi

With this instruction:
    andq    $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax

We unconditionally switch from whatever our CR3 was to kernel page table.
But, in arch/x86/platform/efi/efi_64.c We temporarily set a different page
table, that does not have the kernel page table with 0x1000 offset from it.

Look in efi_thunk() and efi_thunk_set_virtual_address_map().

So, while CR3 points to the other page table, we get an NMI interrupt,
and clear 0x1000 from CR3, resulting in a bogus CR3 if the 0x1000 bit was
set.

The efi page table comes from realmode/rm/trampoline_64.S:

arch/x86/realmode/rm/trampoline_64.S

141 .bss
142 .balign PAGE_SIZE
143 GLOBAL(trampoline_pgd) .space PAGE_SIZE

Notice: alignment is PAGE_SIZE, so after applying KAISER_SHADOW_PGD_OFFSET
which equal to PAGE_SIZE, we can get a different page table.

But, even if we fix alignment, here the trampoline binary is later copied
into dynamically allocated memory in reserve_real_mode(), so we need to
fix that place as well.

Fixes: f9a1666f97b3 ("KAISER: Kernel Address Isolation")
Signed-off-by: Pavel Tatashin <pasha.tatashin@...cle.com>
Reviewed-by: Steven Sistare <steven.sistare@...cle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@...uxfoundation.org>
[bwh: Adjust the Fixes field for 3.16]
Signed-off-by: Ben Hutchings <ben@...adent.org.uk>
---
 arch/x86/include/asm/kaiser.h        | 10 ++++++++++
 arch/x86/realmode/init.c             |  4 +++-
 arch/x86/realmode/rm/trampoline_64.S |  3 ++-
 3 files changed, 15 insertions(+), 2 deletions(-)

--- a/arch/x86/include/asm/kaiser.h
+++ b/arch/x86/include/asm/kaiser.h
@@ -19,6 +19,16 @@
 
 #define KAISER_SHADOW_PGD_OFFSET 0x1000
 
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+/*
+ *  A page table address must have this alignment to stay the same when
+ *  KAISER_SHADOW_PGD_OFFSET mask is applied
+ */
+#define KAISER_KERNEL_PGD_ALIGNMENT (KAISER_SHADOW_PGD_OFFSET << 1)
+#else
+#define KAISER_KERNEL_PGD_ALIGNMENT PAGE_SIZE
+#endif
+
 #ifdef __ASSEMBLY__
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -4,6 +4,7 @@
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/realmode.h>
+#include <asm/kaiser.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
@@ -15,7 +16,8 @@ void __init reserve_real_mode(void)
 	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
 
 	/* Has to be under 1M so we can execute real-mode AP code. */
-	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+	mem = memblock_find_in_range(0, 1 << 20, size,
+				     KAISER_KERNEL_PGD_ALIGNMENT);
 	if (!mem)
 		panic("Cannot allocate trampoline\n");
 
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -30,6 +30,7 @@
 #include <asm/msr.h>
 #include <asm/segment.h>
 #include <asm/processor-flags.h>
+#include <asm/kaiser.h>
 #include "realmode.h"
 
 	.text
@@ -139,7 +140,7 @@ tr_gdt:
 tr_gdt_end:
 
 	.bss
-	.balign	PAGE_SIZE
+	.balign	KAISER_KERNEL_PGD_ALIGNMENT
 GLOBAL(trampoline_pgd)		.space	PAGE_SIZE
 
 	.balign	8

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ