lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <6b7540535dccc043cee1a2d2e399e1eebf9eb332.1512195450.git.luto@kernel.org>
Date:   Fri,  1 Dec 2017 22:20:11 -0800
From:   Andy Lutomirski <luto@...nel.org>
To:     X86 ML <x86@...nel.org>
Cc:     Borislav Petkov <bpetkov@...e.de>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Brian Gerst <brgerst@...il.com>,
        Dave Hansen <dave.hansen@...el.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Andy Lutomirski <luto@...nel.org>
Subject: [PATCH 2/2] x86/kpti: Reference all cpu_entry_area pagetables in the usermode tables

We were manually configuring cpu_entry_area in the usermode tables.
This was error-prone and wasted memory.  (Not much memory, but
still.)  Instead, just reference the same pagetables.

This avoids needing to keep the KPTI code and the normal
cpu_entry_area code in sync, since the KPTI code no longer cares
what's in cpu_entry_area.

[This does *not* work on the current KPTI series.  It requires that
 all the kernelmode cpu_entry_tables are pre-allocated.  That
 happens in the series as I submitted it, but tglx changed it for
 reasons that I haven't figured out.]

Signed-off-by: Andy Lutomirski <luto@...nel.org>
---
 arch/x86/include/asm/fixmap.h | 14 +++++---
 arch/x86/include/asm/kpti.h   |  8 +++--
 arch/x86/kernel/cpu/common.c  |  3 --
 arch/x86/mm/kpti.c            | 82 ++++++++++++++++++++++++++-----------------
 4 files changed, 64 insertions(+), 43 deletions(-)

diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 839addd1eaec..a630cd2861f7 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -142,16 +142,20 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
 	FIX_PARAVIRT_BOOTMAP,
 #endif
-	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
-	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 #ifdef	CONFIG_X86_INTEL_MID
 	FIX_LNW_VRTC,
 #endif
-	/* Fixmap entries to remap the GDTs, one per processor. */
-	FIX_CPU_ENTRY_AREA_TOP,
+	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
+	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
+
+	/*
+	 * Fixmap entries to remap the GDTs, one per processor.  Align
+	 * to a PMD boundary.
+	 */
+	FIX_CPU_ENTRY_AREA_TOP = round_up(FIX_TEXT_POKE0 + 1, PTRS_PER_PMD),
 	FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
 
-	__end_of_permanent_fixed_addresses,
+	__end_of_permanent_fixed_addresses = round_up(FIX_CPU_ENTRY_AREA_BOTTOM + 1, PTRS_PER_PMD),
 
 	/*
 	 * 512 temporary boot-time mappings, used by early_ioremap(),
diff --git a/arch/x86/include/asm/kpti.h b/arch/x86/include/asm/kpti.h
index 0c10e86ae3f8..df52cec2a53b 100644
--- a/arch/x86/include/asm/kpti.h
+++ b/arch/x86/include/asm/kpti.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_X86_KPTI_H
 #define _ASM_X86_KPTI_H
+
+#include <linux/init.h>
+
 /*
  * Copyright(c) 2017 Intel Corporation. All rights reserved.
  *
@@ -34,10 +37,9 @@ extern int kpti_add_mapping(unsigned long addr, unsigned long size,
 			      unsigned long flags);
 
 /**
- *  kpti_add_mapping_cpu_entry - map the cpu entry area
- *  @cpu: the CPU for which the entry area is being mapped
+ *  kpti_clone_cpu_entry_areas - clone cpu_entry_areas to the usermode tables
  */
-extern void kpti_add_mapping_cpu_entry(int cpu);
+extern void __init kpti_clone_cpu_entry_areas(void);
 
 /**
  *  kpti_remove_mapping - remove a kernel mapping from the userpage tables
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 00697119f983..3dc814519c92 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -606,9 +606,6 @@ void __init setup_cpu_entry_area(int cpu)
 				sizeof(struct debug_store) / PAGE_SIZE,
 				PAGE_KERNEL);
 #endif
-	/* CPU 0's mapping is done in kpti_init() */
-	if (cpu)
-		kpti_add_mapping_cpu_entry(cpu);
 }
 
 /* Load the original GDT from the per-cpu structure */
diff --git a/arch/x86/mm/kpti.c b/arch/x86/mm/kpti.c
index 52fd833845ba..cd81a7432f49 100644
--- a/arch/x86/mm/kpti.c
+++ b/arch/x86/mm/kpti.c
@@ -240,7 +240,7 @@ static pmd_t *kpti_shadow_pagetable_walk_pmd(unsigned long address,
  * Returns a pointer to a PTE on success, or NULL on failure.
  */
 static pte_t *kpti_shadow_pagetable_walk(unsigned long address,
-					   unsigned long flags)
+					 unsigned long flags)
 {
 	pmd_t *pmd = kpti_shadow_pagetable_walk_pmd(address, flags);
 	pte_t *pte;
@@ -401,28 +401,55 @@ static void __init kpti_init_all_pgds(void)
 	WARN_ON(__ret);							\
 } while (0)
 
-void kpti_add_mapping_cpu_entry(int cpu)
+void __init kpti_clone_cpu_entry_areas(void)
 {
-	kpti_add_user_map_early(get_cpu_gdt_ro(cpu), PAGE_SIZE,
-				__PAGE_KERNEL_RO);
-
-	kpti_add_user_map_early(&get_cpu_entry_area(cpu)->tss,
-				sizeof(get_cpu_entry_area(cpu)->tss),
-				__PAGE_KERNEL | _PAGE_GLOBAL);
-
-	/* entry stack */
-	kpti_add_user_map_early(&get_cpu_entry_area(cpu)->SYSENTER_stack_page,
-				sizeof(get_cpu_entry_area(cpu)->SYSENTER_stack_page),
-				__PAGE_KERNEL | _PAGE_GLOBAL);
-
-	/* Entry code, so needs to be EXEC */
-	kpti_add_user_map_early(&get_cpu_entry_area(cpu)->entry_trampoline,
-				sizeof(get_cpu_entry_area(cpu)->entry_trampoline),
-				__PAGE_KERNEL_RX | _PAGE_GLOBAL);
-
-	kpti_add_user_map_early(&get_cpu_entry_area(cpu)->exception_stacks,
-				sizeof(get_cpu_entry_area(cpu)->exception_stacks),
-				__PAGE_KERNEL | _PAGE_GLOBAL);
+	int cpu;
+	unsigned long last_pmd_addr = 0;
+
+	/* The top of the cpu_entry_area block is meant to be PMD-aligned. */
+	WARN_ON((unsigned long)(get_cpu_entry_area(NR_CPUS-1) + 1) & ~PMD_MASK);
+
+	/*
+	 * Iterate over possible CPUs, not addresses: it's possible that
+	 * NR_CPUs is enough larger than the actual number of possible CPUs
+	 * that we have unpopulated PMDs in the cpu_entry_area range.
+	 */
+	for_each_possible_cpu(cpu) {
+		pgd_t *pgd;
+		p4d_t *p4d;
+		pud_t *pud;
+		pmd_t *pmd, *target_pmd;
+		unsigned long addr =
+			(unsigned long)get_cpu_entry_area(cpu) & PMD_MASK;
+
+		if (addr == last_pmd_addr)
+			continue;
+		last_pmd_addr = addr;
+
+		pgd = pgd_offset_k(addr);
+		if (WARN_ON(pgd_none(*pgd)))
+			return;
+		p4d = p4d_offset(pgd, addr);
+		if (WARN_ON(p4d_none(*p4d)))
+		    return;
+		pud = pud_offset(p4d, addr);
+		if (WARN_ON(pud_none(*pud)))
+			return;
+		pmd = pmd_offset(pud, addr);
+		if (WARN_ON(pmd_none(*pmd)))
+			return;
+
+		target_pmd = kpti_shadow_pagetable_walk_pmd(addr, 0);
+		if (WARN_ON(!target_pmd))
+			return;
+
+		/*
+		 * Copy the PMD.  That is, the kernelmode and usermode tables
+		 * will share all last-level page tables containing
+		 * cpu_entry_area mappings.
+		 */
+		*target_pmd = *pmd;
+	}
 }
 
 /*
@@ -459,16 +486,7 @@ void __init kpti_init(void)
 				  sizeof(gate_desc) * NR_VECTORS,
 				  __PAGE_KERNEL_RO | _PAGE_GLOBAL);
 
-	/*
-	 * We delay CPU 0's mappings because these structures are created
-	 * before the page allocator is up.  Deferring it until here lets
-	 * us use the plain page allocator unconditionally in the page
-	 * table code above.
-	 *
-	 * This is OK because kpti_init() is called long before we ever run
-	 * userspace and need the KERNEL_PAGE_TABLE_ISOLATION mappings.
-	 */
-	kpti_add_mapping_cpu_entry(0);
+	kpti_clone_cpu_entry_areas();
 }
 
 int kpti_add_mapping(unsigned long addr, unsigned long size,
-- 
2.13.6

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ