lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250709131657.5660-4-harry.yoo@oracle.com>
Date: Wed,  9 Jul 2025 22:16:57 +0900
From: Harry Yoo <harry.yoo@...cle.com>
To: Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
        Borislav Petkov <bp@...en8.de>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Andy Lutomirski <luto@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Andrey Ryabinin <ryabinin.a.a@...il.com>,
        Arnd Bergmann <arnd@...db.de>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Dennis Zhou <dennis@...nel.org>, Tejun Heo <tj@...nel.org>,
        Christoph Lameter <cl@...two.org>
Cc: "H . Peter Anvin" <hpa@...or.com>, Alexander Potapenko <glider@...gle.com>,
        Andrey Konovalov <andreyknvl@...il.com>,
        Dmitry Vyukov <dvyukov@...gle.com>,
        Vincenzo Frascino <vincenzo.frascino@....com>,
        Juergen Gross <jgross@...e.com>, Kevin Brodsky <kevin.brodsky@....com>,
        Muchun Song <muchun.song@...ux.dev>,
        Oscar Salvador <osalvador@...e.de>,
        Joao Martins <joao.m.martins@...cle.com>,
        Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
        Jane Chu <jane.chu@...cle.com>, Alistair Popple <apopple@...dia.com>,
        Mike Rapoport <rppt@...nel.org>,
        Gwan-gyeong Mun <gwan-gyeong.mun@...el.com>,
        "Aneesh Kumar K . V" <aneesh.kumar@...ux.ibm.com>, x86@...nel.org,
        linux-kernel@...r.kernel.org, linux-arch@...r.kernel.org,
        linux-mm@...ck.org, Harry Yoo <harry.yoo@...cle.com>,
        stable@...r.kernel.org
Subject: [RFC V1 PATCH mm-hotfixes 3/3] x86/mm: convert {pgd,p4d}_populate{,_init} to _kernel variant

Introduce {pgd,p4d}_populate_kernel_safe() and convert
{pgd,p4d}_populate{,_init}() to {pgd,p4d}_populate_kernel{,_init}().

By converting them, we no longer need to worry about forgetting to
synchronize top level page tables.

With all {pgd,p4d}_populate{,_init}() converted to
{pgd,p4d}_populate_kernel{,_init}(), it is now safe to drop
sync_global_pgds(). Let's remove it.

Cc: <stable@...r.kernel.org>
Suggested-by: Dave Hansen <dave.hansen@...ux.intel.com>
Signed-off-by: Harry Yoo <harry.yoo@...cle.com>
---
 arch/x86/include/asm/pgalloc.h |  19 +++++
 arch/x86/mm/init_64.c          | 129 ++++++---------------------------
 arch/x86/mm/kasan_init_64.c    |   8 +-
 3 files changed, 46 insertions(+), 110 deletions(-)

diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index d66f2db54b16..98439b9ca293 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -132,6 +132,15 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu
 	set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
+static inline void p4d_populate_kernel_safe(unsigned long addr,
+					    p4d_t *p4d, pud_t *pud)
+{
+	paravirt_alloc_pud(&init_mm, __pa(pud) >> PAGE_SHIFT);
+	set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
+	if (!pgtable_l5_enabled())
+		arch_sync_kernel_pagetables(addr);
+}
+
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
@@ -167,6 +176,16 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
 	set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
 }
 
+static inline void pgd_populate_kernel_safe(unsigned long addr,
+				       pgd_t *pgd, p4d_t *p4d)
+{
+	if (!pgtable_l5_enabled())
+		return;
+	paravirt_alloc_p4d(&init_mm, __pa(p4d) >> PAGE_SHIFT);
+	set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+	arch_sync_kernel_pagetables(addr);
+}
+
 extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
 
 static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index cbddbef434d5..00608ab36936 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -75,6 +75,19 @@ DEFINE_POPULATE(pgd_populate, pgd, p4d, init)
 DEFINE_POPULATE(pud_populate, pud, pmd, init)
 DEFINE_POPULATE(pmd_populate_kernel, pmd, pte, init)
 
+#define DEFINE_POPULATE_KERNEL(fname, type1, type2, init)		\
+static inline void fname##_init(unsigned long addr,		\
+		type1##_t *arg1, type2##_t *arg2, bool init)	\
+{								\
+	if (init)						\
+		fname##_safe(addr, arg1, arg2);			\
+	else							\
+		fname(addr, arg1, arg2);			\
+}
+
+DEFINE_POPULATE_KERNEL(pgd_populate_kernel, pgd, p4d, init)
+DEFINE_POPULATE_KERNEL(p4d_populate_kernel, p4d, pud, init)
+
 #define DEFINE_ENTRY(type1, type2, init)			\
 static inline void set_##type1##_init(type1##_t *arg1,		\
 			type2##_t arg2, bool init)		\
@@ -130,99 +143,6 @@ static int __init nonx32_setup(char *str)
 }
 __setup("noexec32=", nonx32_setup);
 
-static void sync_global_pgds_l5(unsigned long start, unsigned long end)
-{
-	unsigned long addr;
-
-	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
-		const pgd_t *pgd_ref = pgd_offset_k(addr);
-		struct page *page;
-
-		/* Check for overflow */
-		if (addr < start)
-			break;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock(&pgd_lock);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			spinlock_t *pgt_lock;
-
-			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
-			/* the pgt_lock only for Xen */
-			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
-			spin_lock(pgt_lock);
-
-			if (!pgd_none(*pgd_ref) && !pgd_none(*pgd))
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-
-			spin_unlock(pgt_lock);
-		}
-		spin_unlock(&pgd_lock);
-	}
-}
-
-static void sync_global_pgds_l4(unsigned long start, unsigned long end)
-{
-	unsigned long addr;
-
-	for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
-		pgd_t *pgd_ref = pgd_offset_k(addr);
-		const p4d_t *p4d_ref;
-		struct page *page;
-
-		/*
-		 * With folded p4d, pgd_none() is always false, we need to
-		 * handle synchronization on p4d level.
-		 */
-		MAYBE_BUILD_BUG_ON(pgd_none(*pgd_ref));
-		p4d_ref = p4d_offset(pgd_ref, addr);
-
-		if (p4d_none(*p4d_ref))
-			continue;
-
-		spin_lock(&pgd_lock);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			p4d_t *p4d;
-			spinlock_t *pgt_lock;
-
-			pgd = (pgd_t *)page_address(page) + pgd_index(addr);
-			p4d = p4d_offset(pgd, addr);
-			/* the pgt_lock only for Xen */
-			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
-			spin_lock(pgt_lock);
-
-			if (!p4d_none(*p4d_ref) && !p4d_none(*p4d))
-				BUG_ON(p4d_pgtable(*p4d)
-				       != p4d_pgtable(*p4d_ref));
-
-			if (p4d_none(*p4d))
-				set_p4d(p4d, *p4d_ref);
-
-			spin_unlock(pgt_lock);
-		}
-		spin_unlock(&pgd_lock);
-	}
-}
-
-/*
- * When memory was added make sure all the processes MM have
- * suitable PGD entries in the local PGD level page.
- */
-static void sync_global_pgds(unsigned long start, unsigned long end)
-{
-	if (pgtable_l5_enabled())
-		sync_global_pgds_l5(start, end);
-	else
-		sync_global_pgds_l4(start, end);
-}
-
 static void sync_kernel_pagetables_l4(unsigned long addr)
 {
 	pgd_t *pgd_ref = pgd_offset_k(addr);
@@ -295,6 +215,10 @@ static void sync_kernel_pagetables_l5(unsigned long addr)
 	spin_unlock(&pgd_lock);
 }
 
+/*
+ * When memory was added make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
 void arch_sync_kernel_pagetables(unsigned long addr)
 {
 	if (pgtable_l5_enabled())
@@ -330,7 +254,7 @@ static p4d_t *fill_p4d(pgd_t *pgd, unsigned long vaddr)
 {
 	if (pgd_none(*pgd)) {
 		p4d_t *p4d = (p4d_t *)spp_getpage();
-		pgd_populate(&init_mm, pgd, p4d);
+		pgd_populate_kernel(vaddr, pgd, p4d);
 		if (p4d != p4d_offset(pgd, 0))
 			printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
 			       p4d, p4d_offset(pgd, 0));
@@ -342,7 +266,7 @@ static pud_t *fill_pud(p4d_t *p4d, unsigned long vaddr)
 {
 	if (p4d_none(*p4d)) {
 		pud_t *pud = (pud_t *)spp_getpage();
-		p4d_populate(&init_mm, p4d, pud);
+		p4d_populate_kernel(vaddr, p4d, pud);
 		if (pud != pud_offset(p4d, 0))
 			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
 			       pud, pud_offset(p4d, 0));
@@ -795,7 +719,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
 					   page_size_mask, prot, init);
 
 		spin_lock(&init_mm.page_table_lock);
-		p4d_populate_init(&init_mm, p4d, pud, init);
+		p4d_populate_kernel_init(vaddr, p4d, pud, init);
 		spin_unlock(&init_mm.page_table_lock);
 	}
 
@@ -808,7 +732,6 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
 			       unsigned long page_size_mask,
 			       pgprot_t prot, bool init)
 {
-	bool pgd_changed = false;
 	unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
 
 	paddr_last = paddr_end;
@@ -837,18 +760,14 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
 
 		spin_lock(&init_mm.page_table_lock);
 		if (pgtable_l5_enabled())
-			pgd_populate_init(&init_mm, pgd, p4d, init);
+			pgd_populate_kernel_init(vaddr, pgd, p4d, init);
 		else
-			p4d_populate_init(&init_mm, p4d_offset(pgd, vaddr),
-					  (pud_t *) p4d, init);
+			p4d_populate_kernel_init(vaddr, p4d_offset(pgd, vaddr),
+						 (pud_t *) p4d, init);
 
 		spin_unlock(&init_mm.page_table_lock);
-		pgd_changed = true;
 	}
 
-	if (pgd_changed)
-		sync_global_pgds(vaddr_start, vaddr_end - 1);
-
 	return paddr_last;
 }
 
@@ -1642,8 +1561,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		err = -ENOMEM;
 	} else
 		err = vmemmap_populate_basepages(start, end, node, NULL);
-	if (!err)
-		sync_global_pgds(start, end - 1);
 	return err;
 }
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 0539efd0d216..e825952d25b2 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -108,7 +108,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
 	if (p4d_none(*p4d)) {
 		void *p = early_alloc(PAGE_SIZE, nid, true);
 
-		p4d_populate(&init_mm, p4d, p);
+		p4d_populate_kernel(addr, p4d, p);
 	}
 
 	pud = pud_offset(p4d, addr);
@@ -128,7 +128,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
 
 	if (pgd_none(*pgd)) {
 		p = early_alloc(PAGE_SIZE, nid, true);
-		pgd_populate(&init_mm, pgd, p);
+		pgd_populate_kernel(addr, pgd, p);
 	}
 
 	p4d = p4d_offset(pgd, addr);
@@ -255,7 +255,7 @@ static void __init kasan_shallow_populate_p4ds(pgd_t *pgd,
 
 		if (p4d_none(*p4d)) {
 			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
-			p4d_populate(&init_mm, p4d, p);
+			p4d_populate_kernel(addr, p4d, p);
 		}
 	} while (p4d++, addr = next, addr != end);
 }
@@ -273,7 +273,7 @@ static void __init kasan_shallow_populate_pgds(void *start, void *end)
 
 		if (pgd_none(*pgd)) {
 			p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true);
-			pgd_populate(&init_mm, pgd, p);
+			pgd_populate_kernel(addr, pgd, p);
 		}
 
 		/*
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ