lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230915105933.495735-11-matteorizzo@google.com>
Date:   Fri, 15 Sep 2023 10:59:29 +0000
From:   Matteo Rizzo <matteorizzo@...gle.com>
To:     cl@...ux.com, penberg@...nel.org, rientjes@...gle.com,
        iamjoonsoo.kim@....com, akpm@...ux-foundation.org, vbabka@...e.cz,
        roman.gushchin@...ux.dev, 42.hyeyoo@...il.com,
        keescook@...omium.org, linux-kernel@...r.kernel.org,
        linux-doc@...r.kernel.org, linux-mm@...ck.org,
        linux-hardening@...r.kernel.org, tglx@...utronix.de,
        mingo@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
        x86@...nel.org, hpa@...or.com, corbet@....net, luto@...nel.org,
        peterz@...radead.org
Cc:     jannh@...gle.com, matteorizzo@...gle.com, evn@...gle.com,
        poprdi@...gle.com, jordyzomer@...gle.com
Subject: [RFC PATCH 10/14] x86: Create virtual memory region for SLUB

From: Jann Horn <jannh@...gle.com>

SLAB_VIRTUAL reserves 512 GiB of virtual memory and uses them for both
struct slab and the actual slab memory. The pointers returned by
kmem_cache_alloc will point to this range of memory.

Signed-off-by: Jann Horn <jannh@...gle.com>
Co-developed-by: Matteo Rizzo <matteorizzo@...gle.com>
Signed-off-by: Matteo Rizzo <matteorizzo@...gle.com>
---
 Documentation/arch/x86/x86_64/mm.rst    |  4 ++--
 arch/x86/include/asm/pgtable_64_types.h | 16 ++++++++++++++++
 arch/x86/mm/init_64.c                   | 19 +++++++++++++++----
 arch/x86/mm/kaslr.c                     |  9 +++++++++
 arch/x86/mm/mm_internal.h               |  4 ++++
 mm/slub.c                               |  4 ++++
 security/Kconfig.hardening              |  2 ++
 7 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/Documentation/arch/x86/x86_64/mm.rst b/Documentation/arch/x86/x86_64/mm.rst
index 35e5e18c83d0..121179537175 100644
--- a/Documentation/arch/x86/x86_64/mm.rst
+++ b/Documentation/arch/x86/x86_64/mm.rst
@@ -57,7 +57,7 @@ Complete virtual memory map with 4-level page tables
    fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
                     |            |                  |         | vaddr_end for KASLR
    fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
-   fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | ... unused hole
+   fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | SLUB virtual memory
    ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
    ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
    ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
@@ -116,7 +116,7 @@ Complete virtual memory map with 5-level page tables
    fffffc0000000000 |   -4    TB | fffffdffffffffff |    2 TB | ... unused hole
                     |            |                  |         | vaddr_end for KASLR
    fffffe0000000000 |   -2    TB | fffffe7fffffffff |  0.5 TB | cpu_entry_area mapping
-   fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | ... unused hole
+   fffffe8000000000 |   -1.5  TB | fffffeffffffffff |  0.5 TB | SLUB virtual memory
    ffffff0000000000 |   -1    TB | ffffff7fffffffff |  0.5 TB | %esp fixup stacks
    ffffff8000000000 | -512    GB | ffffffeeffffffff |  444 GB | ... unused hole
    ffffffef00000000 |  -68    GB | fffffffeffffffff |   64 GB | EFI region mapping space
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 38b54b992f32..e1a91eb084c4 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -6,6 +6,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
+#include <linux/align.h>
 #include <asm/kaslr.h>
 
 /*
@@ -199,6 +200,21 @@ extern unsigned int ptrs_per_p4d;
 #define ESPFIX_PGD_ENTRY	_AC(-2, UL)
 #define ESPFIX_BASE_ADDR	(ESPFIX_PGD_ENTRY << P4D_SHIFT)
 
+#ifdef CONFIG_SLAB_VIRTUAL
+#define SLAB_PGD_ENTRY		_AC(-3, UL)
+#define SLAB_BASE_ADDR		(SLAB_PGD_ENTRY << P4D_SHIFT)
+#define SLAB_END_ADDR		(SLAB_BASE_ADDR + P4D_SIZE)
+
+/*
+ * We need to define this here because we need it to compute SLAB_META_SIZE
+ * and including slab.h causes a dependency cycle.
+ */
+#define STRUCT_SLAB_SIZE (32 * sizeof(void *))
+#define SLAB_VPAGES ((SLAB_END_ADDR - SLAB_BASE_ADDR) / PAGE_SIZE)
+#define SLAB_META_SIZE ALIGN(SLAB_VPAGES * STRUCT_SLAB_SIZE, PAGE_SIZE)
+#define SLAB_DATA_BASE_ADDR (SLAB_BASE_ADDR + SLAB_META_SIZE)
+#endif /* CONFIG_SLAB_VIRTUAL */
+
 #define CPU_ENTRY_AREA_PGD	_AC(-4, UL)
 #define CPU_ENTRY_AREA_BASE	(CPU_ENTRY_AREA_PGD << P4D_SHIFT)
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a190aae8ceaf..d716ddfd9880 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1279,16 +1279,19 @@ static void __init register_page_bootmem_info(void)
 }
 
 /*
- * Pre-allocates page-table pages for the vmalloc area in the kernel page-table.
+ * Pre-allocates page-table pages for the vmalloc and SLUB areas in the kernel
+ * page-table.
  * Only the level which needs to be synchronized between all page-tables is
  * allocated because the synchronization can be expensive.
  */
-static void __init preallocate_vmalloc_pages(void)
+static void __init preallocate_top_level_entries_range(unsigned long start,
+						       unsigned long end)
 {
 	unsigned long addr;
 	const char *lvl;
 
-	for (addr = VMALLOC_START; addr <= VMEMORY_END; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+
+	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
 		pgd_t *pgd = pgd_offset_k(addr);
 		p4d_t *p4d;
 		pud_t *pud;
@@ -1328,6 +1331,14 @@ static void __init preallocate_vmalloc_pages(void)
 	panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl);
 }
 
+static void __init preallocate_top_level_entries(void)
+{
+	preallocate_top_level_entries_range(VMALLOC_START, VMEMORY_END);
+#ifdef CONFIG_SLAB_VIRTUAL
+	preallocate_top_level_entries_range(SLAB_BASE_ADDR, SLAB_END_ADDR - 1);
+#endif
+}
+
 void __init mem_init(void)
 {
 	pci_iommu_alloc();
@@ -1351,7 +1362,7 @@ void __init mem_init(void)
 	if (get_gate_vma(&init_mm))
 		kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
 
-	preallocate_vmalloc_pages();
+	preallocate_top_level_entries();
 }
 
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 37db264866b6..7b297d372a8c 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -136,6 +136,15 @@ void __init kernel_randomize_memory(void)
 		vaddr = round_up(vaddr + 1, PUD_SIZE);
 		remain_entropy -= entropy;
 	}
+
+#ifdef CONFIG_SLAB_VIRTUAL
+	/*
+	 * slub_addr_base is initialized separately from the
+	 * kaslr_memory_regions because it comes after CPU_ENTRY_AREA_BASE.
+	 */
+	prandom_bytes_state(&rand_state, &rand, sizeof(rand));
+	slub_addr_base += (rand & ((1UL << 36) - PAGE_SIZE));
+#endif
 }
 
 void __meminit init_trampoline_kaslr(void)
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 3f37b5c80bb3..fafb79b7e019 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -25,4 +25,8 @@ void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache);
 
 extern unsigned long tlb_single_page_flush_ceiling;
 
+#ifdef CONFIG_SLAB_VIRTUAL
+extern unsigned long slub_addr_base;
+#endif
+
 #endif	/* __X86_MM_INTERNAL_H */
diff --git a/mm/slub.c b/mm/slub.c
index 4f77e5d4fe6c..a731fdc79bff 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -166,6 +166,10 @@
  * 			the fast path and disables lockless freelists.
  */
 
+#ifdef CONFIG_SLAB_VIRTUAL
+unsigned long slub_addr_base = SLAB_DATA_BASE_ADDR;
+#endif /* CONFIG_SLAB_VIRTUAL */
+
 /*
  * We could simply use migrate_disable()/enable() but as long as it's a
  * function call even on !PREEMPT_RT, use inline preempt_disable() there.
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 9f4e6e38aa76..f4a0af424149 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -357,6 +357,8 @@ config GCC_PLUGIN_RANDSTRUCT
 
 config SLAB_VIRTUAL
 	bool "Allocate slab objects from virtual memory"
+	# For virtual memory region allocation
+	depends on X86_64
 	depends on SLUB && !SLUB_TINY
 	# If KFENCE support is desired, it could be implemented on top of our
 	# virtual memory allocation facilities
-- 
2.42.0.459.ge4e396fd5e-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ