[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250520104138.2734372-11-ardb+git@google.com>
Date: Tue, 20 May 2025 12:41:41 +0200
From: Ard Biesheuvel <ardb+git@...gle.com>
To: linux-kernel@...r.kernel.org
Cc: x86@...nel.org, Ard Biesheuvel <ardb@...nel.org>, Ingo Molnar <mingo@...nel.org>,
Linus Torvalds <torvalds@...ux-foundation.org>, Brian Gerst <brgerst@...il.com>,
"Kirill A. Shutemov" <kirill@...temov.name>, Borislav Petkov <bp@...en8.de>
Subject: [PATCH v5 2/7] x86/mm: Use a single cache hot per-CPU variable to
record pgdir_shift
From: Ard Biesheuvel <ardb@...nel.org>
Shrink the global variable 'pgdir_shift' to a single byte, and move it
to the cache hot per-CPU variable section so that it can be accessed
virtually for free.
Set the variable from asm code before entering C so that C code is
guaranteed to always consistently observe the correct value.
For the decompressor, provide an alternative that tests the CR4.LA57
control bit directly - this is more costly in principle but this should
not matter for the early boot code, and the LA57 control bit is
guaranteed to be in sync with the number of paging levels in use.
Signed-off-by: Ard Biesheuvel <ardb@...nel.org>
---
arch/x86/boot/compressed/misc.h | 6 +++++-
arch/x86/boot/compressed/pgtable_64.c | 2 --
arch/x86/boot/startup/map_kernel.c | 1 -
arch/x86/include/asm/page_64_types.h | 2 +-
arch/x86/include/asm/pgtable_64_types.h | 13 +++++++++++--
arch/x86/kernel/head64.c | 2 --
arch/x86/kernel/head_64.S | 5 +++++
arch/x86/mm/pgtable.c | 4 ++++
8 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index db1048621ea2..97b80d08f03c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -28,6 +28,10 @@
#define __pa(x) ((unsigned long)(x))
#define __va(x) ((void *)((unsigned long)(x)))
+#ifdef CONFIG_X86_64
+#define pgdir_shift() (native_read_cr4() & X86_CR4_LA57 ? 48 : 39)
+#endif
+
#include <linux/linkage.h>
#include <linux/screen_info.h>
#include <linux/elf.h>
@@ -189,7 +193,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif
/* ident_map_64.c */
-extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
+extern unsigned int __pgtable_l5_enabled, ptrs_per_p4d;
extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index bdd26050dff7..898a4e66e401 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -12,7 +12,6 @@
/* __pgtable_l5_enabled needs to be in .data to avoid being cleared along with .bss */
unsigned int __section(".data") __pgtable_l5_enabled;
-unsigned int __section(".data") pgdir_shift = 39;
unsigned int __section(".data") ptrs_per_p4d = 1;
/* Buffer to preserve trampoline memory */
@@ -123,7 +122,6 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
/* Initialize variables for 5-level paging */
__pgtable_l5_enabled = 1;
- pgdir_shift = 48;
ptrs_per_p4d = 512;
}
diff --git a/arch/x86/boot/startup/map_kernel.c b/arch/x86/boot/startup/map_kernel.c
index 332dbe6688c4..06306c5d016f 100644
--- a/arch/x86/boot/startup/map_kernel.c
+++ b/arch/x86/boot/startup/map_kernel.c
@@ -24,7 +24,6 @@ static inline bool check_la57_support(void)
return false;
__pgtable_l5_enabled = 1;
- pgdir_shift = 48;
ptrs_per_p4d = 512;
return true;
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 7400dab373fe..e1fccd9116c3 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -48,7 +48,7 @@
/* See Documentation/arch/x86/x86_64/mm.rst for a description of the memory map. */
#define __PHYSICAL_MASK_SHIFT 52
-#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
+#define __VIRTUAL_MASK_SHIFT (pgdir_shift() + 8)
#define TASK_SIZE_MAX task_size_max()
#define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE)
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 1481b234465a..3ee747f596e3 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -7,6 +7,7 @@
#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/kaslr.h>
+#include <asm/percpu.h>
/*
* These are used to make use of C type-checking..
@@ -23,6 +24,15 @@ typedef struct { pmdval_t pmd; } pmd_t;
extern unsigned int __pgtable_l5_enabled;
+#ifndef pgdir_shift
+DECLARE_PER_CPU_CACHE_HOT(u8, __pgdir_shift);
+
+static __always_inline __attribute_const__ u8 pgdir_shift(void)
+{
+ return this_cpu_read_stable(__pgdir_shift);
+}
+#endif /* pgdir_shift */
+
#ifdef USE_EARLY_PGTABLE_L5
/*
* cpu_feature_enabled() is not available in early boot code.
@@ -36,7 +46,6 @@ static inline bool pgtable_l5_enabled(void)
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
-extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
#endif /* !__ASSEMBLER__ */
@@ -44,7 +53,7 @@ extern unsigned int ptrs_per_p4d;
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
-#define PGDIR_SHIFT pgdir_shift
+#define PGDIR_SHIFT pgdir_shift()
#define PTRS_PER_PGD 512
/*
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 533fcf5636fc..e2d9e709ec01 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -52,8 +52,6 @@ SYM_PIC_ALIAS(next_early_pgt);
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
unsigned int __pgtable_l5_enabled __ro_after_init;
-unsigned int pgdir_shift __ro_after_init = 39;
-EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
EXPORT_SYMBOL(ptrs_per_p4d);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 3e9b3a3bd039..d35b75a5f892 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -71,6 +71,11 @@ SYM_CODE_START_NOALIGN(startup_64)
xorl %edx, %edx
wrmsr
+ movq %cr4, %rax
+ btl $X86_CR4_LA57_BIT, %eax
+ jnc 0f
+ movb $48, PER_CPU_VAR(__pgdir_shift)
+0:
call startup_64_setup_gdt_idt
/* Now switch to __KERNEL_CS so IRET works reliably */
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 62777ba4de1a..a7eaeaa595f8 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -46,6 +46,10 @@ void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
}
#if CONFIG_PGTABLE_LEVELS > 4
+DEFINE_PER_CPU_CACHE_HOT(u8, __pgdir_shift) = 39;
+EXPORT_SYMBOL_GPL(__pgdir_shift);
+SYM_PIC_ALIAS(__pgdir_shift);
+
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
{
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
--
2.49.0.1101.gccaa498523-goog
Powered by blists - more mailing lists