lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <66878233e09ff325b018e1bc5a521e9b20c58214.1512763129.git.luto@kernel.org>
Date:   Fri,  8 Dec 2017 11:59:09 -0800
From:   Andy Lutomirski <luto@...nel.org>
To:     x86@...nel.org
Cc:     linux-kernel@...r.kernel.org, Borislav Petkov <bp@...en8.de>,
        Brian Gerst <brgerst@...il.com>,
        David Laight <David.Laight@...lab.com>,
        Kees Cook <keescook@...omium.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Andy Lutomirski <luto@...nel.org>
Subject: [RFC PTI 1/3] x86/pti: Vastly simplify pgd synchronization

Back when we would dynamically add mappings to the usermode tables,
we needed to preallocate all the high top-level entries in the
usermode tables.  We don't need this in recent versions of PTI, so
get rid of preallocation.

With preallocation gone, the comments in pti_set_user_pgd() make
even less sense.  Rearrange the function to make it entirely obvious
what it does and does not do.  FWIW, I haven't even tried to wrap my
head around the old logic, since it seemed to be somewhere between
incomprehensible and wrong.

I admit that a bit of the earlier complexity was based on my
suggestions.  Mea culpa.

Signed-off-by: Andy Lutomirski <luto@...nel.org>
---
 arch/x86/include/asm/pgtable_64.h | 74 +++++++++++++++------------------------
 arch/x86/mm/pti.c                 | 52 ++++++---------------------
 2 files changed, 39 insertions(+), 87 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index f5adf92091c6..be8d086de927 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -195,14 +195,6 @@ static inline bool pgdp_maps_userspace(void *__ptr)
 }
 
 /*
- * Does this PGD allow access from userspace?
- */
-static inline bool pgd_userspace_access(pgd_t pgd)
-{
-	return pgd.pgd & _PAGE_USER;
-}
-
-/*
  * Take a PGD location (pgdp) and a pgd value that needs to be set there.
  * Populates the user and returns the resulting PGD that must be set in
  * the kernel copy of the page tables.
@@ -213,50 +205,42 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
 	if (!static_cpu_has_bug(X86_BUG_CPU_SECURE_MODE_PTI))
 		return pgd;
 
-	if (pgd_userspace_access(pgd)) {
-		if (pgdp_maps_userspace(pgdp)) {
-			/*
-			 * The user page tables get the full PGD,
-			 * accessible from userspace:
-			 */
-			kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
-			/*
-			 * For the copy of the pgd that the kernel uses,
-			 * make it unusable to userspace.  This ensures on
-			 * in case that a return to userspace with the
-			 * kernel CR3 value, userspace will crash instead
-			 * of running.
-			 *
-			 * Note: NX might be not available or disabled.
-			 */
-			if (__supported_pte_mask & _PAGE_NX)
-				pgd.pgd |= _PAGE_NX;
-		}
-	} else if (pgd_userspace_access(*pgdp)) {
+	if (pgdp_maps_userspace(pgdp)) {
 		/*
-		 * We are clearing a _PAGE_USER PGD for which we presumably
-		 * populated the user PGD.  We must now clear the user PGD
-		 * entry.
+		 * The user page tables get the full PGD,
+		 * accessible from userspace:
 		 */
-		if (pgdp_maps_userspace(pgdp)) {
-			kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
-		} else {
-			/*
-			 * Attempted to clear a _PAGE_USER PGD which is in
-			 * the kernel portion of the address space.  PGDs
-			 * are pre-populated and we never clear them.
-			 */
-			WARN_ON_ONCE(1);
-		}
+		kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
+
+		/*
+		 * If this is normal user memory, make it NX in the kernel
+		 * pagetables so that, if we somehow screw up and return to
+		 * usermode with the kernel CR3 loaded, we'll get a page
+		 * fault instead of allowing user code to execute with
+		 * the wrong CR3.
+		 *
+		 * As exceptions, we don't set NX if:
+		 *  - this is EFI or similar, the kernel may execute from it
+		 *  - we don't have NX support
+		 *  - we're clearing the PGD (i.e. pgd.pgd == 0).
+		 */
+		if ((pgd.pgd & _PAGE_USER) && (__supported_pte_mask & _PAGE_NX))
+			pgd.pgd |= _PAGE_NX;
 	} else {
 		/*
-		 * _PAGE_USER was not set in either the PGD being set or
-		 * cleared.  All kernel PGDs should be pre-populated so
-		 * this should never happen after boot.
+		 * Changes to the high (kernel) portion of the kernelmode
+		 * page tables are not automatically propagated to the
+		 * usermode tables.
+		 *
+		 * Users should keep in mind that, unlike the kernelmode
+		 * tables, there is no vmalloc_fault equivalent for the
+		 * usermode tables.  Top-level entries added to init_mm's
+		 * usermode pgd after boot will not be automatically
+		 * propagated to other mms.
 		 */
-		WARN_ON_ONCE(system_state == SYSTEM_RUNNING);
 	}
 #endif
+
 	/* return the copy of the PGD we want the kernel to use: */
 	return pgd;
 }
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index bd5d042adb3c..b74f3f89f4d9 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -83,8 +83,16 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
 	}
 
 	if (pgd_none(*pgd)) {
-		WARN_ONCE(1, "All user pgds should have been populated\n");
-		return NULL;
+		unsigned long new_p4d_page = __get_free_page(gfp);
+		if (!new_p4d_page)
+			return NULL;
+
+		if (p4d_none(*p4d)) {
+			set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
+			new_p4d_page = 0;
+		}
+		if (new_p4d_page)
+			free_page(new_p4d_page);
 	}
 	BUILD_BUG_ON(pgd_large(*pgd) != 0);
 
@@ -193,45 +201,6 @@ static void __init pti_clone_entry_text(void)
 }
 
 /*
- * Ensure that the top level of the user page tables are entirely
- * populated.  This ensures that all processes that get forked have the
- * same entries.  This way, we do not have to ever go set up new entries in
- * older processes.
- *
- * Note: we never free these, so there are no updates to them after this.
- */
-static void __init pti_init_all_pgds(void)
-{
-	pgd_t *pgd;
-	int i;
-
-	pgd = kernel_to_user_pgdp(pgd_offset_k(0UL));
-	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
-		/*
-		 * Each PGD entry moves up PGDIR_SIZE bytes through the
-		 * address space, so get the first virtual address mapped
-		 * by PGD #i:
-		 */
-		unsigned long addr = i * PGDIR_SIZE;
-#if CONFIG_PGTABLE_LEVELS > 4
-		p4d_t *p4d = p4d_alloc_one(&init_mm, addr);
-		if (!p4d) {
-			WARN_ON(1);
-			break;
-		}
-		set_pgd(pgd + i, __pgd(_KERNPG_TABLE | __pa(p4d)));
-#else /* CONFIG_PGTABLE_LEVELS <= 4 */
-		pud_t *pud = pud_alloc_one(&init_mm, addr);
-		if (!pud) {
-			WARN_ON(1);
-			break;
-		}
-		set_pgd(pgd + i, __pgd(_KERNPG_TABLE | __pa(pud)));
-#endif /* CONFIG_PGTABLE_LEVELS */
-	}
-}
-
-/*
  * Initialize kernel page table isolation
  */
 void __init pti_init(void)
@@ -241,7 +210,6 @@ void __init pti_init(void)
 
 	pr_info("enabled\n");
 
-	pti_init_all_pgds();
 	pti_clone_user_shared();
 	pti_clone_entry_text();
 }
-- 
2.13.6

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ