[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3e767aa4-c783-4857-b34e-fdf3f20bd94f@neon.tech>
Date: Fri, 13 Jun 2025 21:10:13 +0100
From: Em Sharnoff <sharnoff@...n.tech>
To: linux-kernel@...r.kernel.org, x86@...nel.org, linux-mm@...ck.org
Cc: Ingo Molnar <mingo@...nel.org>, "H. Peter Anvin" <hpa@...or.com>,
Dave Hansen <dave.hansen@...ux.intel.com>, Andy Lutomirski
<luto@...nel.org>, Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>, Borislav Petkov <bp@...en8.de>,
"Edgecombe, Rick P" <rick.p.edgecombe@...el.com>,
Oleg Vasilev <oleg@...n.tech>, Arthur Petukhovsky <arthur@...n.tech>,
Stefan Radig <stefan@...n.tech>, Misha Sakhnov <misha@...n.tech>
Subject: [PATCH v4 1/4] x86/mm: Update mapped addresses in
phys_{pmd,pud}_init()
Currently kernel_physical_mapping_init() and its dependents return the
last physical address mapped ('paddr_last'). This makes it harder to
cleanly handle allocation errors in those functions.
'paddr_last' is used to update 'pfn_mapped'/'max_pfn_mapped', so:
1. Introduce add_paddr_range_mapped() to do the update, translating from
physical addresses to pfns
2. Call add_paddr_range_mapped() in phys_pud_init() where 'paddr_last'
would otherwise be updated due to 1Gi pages.
- Note: this includes places where we set 'paddr_last = paddr_next',
as was added in 20167d3421a0 ("x86-64: Fix accounting in
kernel_physical_mapping_init()")
add_paddr_range_mapped() is probably too expensive to be called every
time a page is updated, so instead, phys_pte_init() continues to return
'paddr_last', and phys_pmd_init() calls add_paddr_range_mapped() only at
the end of the loop (should mean it's called every 1Gi).
Signed-off-by: Em Sharnoff <sharnoff@...n.tech>
---
Changelog:
- v4: Add this patch
---
arch/x86/include/asm/pgtable.h | 3 +-
arch/x86/mm/init.c | 23 +++++----
arch/x86/mm/init_32.c | 6 ++-
arch/x86/mm/init_64.c | 88 +++++++++++++++++-----------------
arch/x86/mm/mm_internal.h | 13 +++--
5 files changed, 69 insertions(+), 64 deletions(-)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7bd6bd6df4a1..138d55f48a4f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1244,8 +1244,7 @@ extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
void __init poking_init(void);
-unsigned long init_memory_mapping(unsigned long start,
- unsigned long end, pgprot_t prot);
+void init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot);
#ifdef CONFIG_X86_64
extern pgd_t trampoline_pgd_entry;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bfa444a7dbb0..1461873b44f1 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -529,16 +529,24 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
return false;
}
+/*
+ * Update max_pfn_mapped and range_pfn_mapped with the range of physical
+ * addresses mapped. The range may overlap with previous calls to this function.
+ */
+void add_paddr_range_mapped(unsigned long start_paddr, unsigned long end_paddr)
+{
+ add_pfn_range_mapped(start_paddr >> PAGE_SHIFT, end_paddr >> PAGE_SHIFT);
+}
+
/*
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
* This runs before bootmem is initialized and gets pages directly from
* the physical memory. To access them they are temporarily mapped.
*/
-unsigned long __ref init_memory_mapping(unsigned long start,
- unsigned long end, pgprot_t prot)
+void __ref init_memory_mapping(unsigned long start,
+ unsigned long end, pgprot_t prot)
{
struct map_range mr[NR_RANGE_MR];
- unsigned long ret = 0;
int nr_range, i;
pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n",
@@ -548,13 +556,10 @@ unsigned long __ref init_memory_mapping(unsigned long start,
nr_range = split_mem_range(mr, 0, start, end);
for (i = 0; i < nr_range; i++)
- ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
- mr[i].page_size_mask,
- prot);
+ kernel_physical_mapping_init(mr[i].start, mr[i].end,
+ mr[i].page_size_mask, prot);
- add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
-
- return ret >> PAGE_SHIFT;
+ return;
}
/*
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ad662cc4605c..4427ac433041 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -245,7 +245,7 @@ static inline int is_x86_32_kernel_text(unsigned long addr)
* of max_low_pfn pages, by creating page tables starting from address
* PAGE_OFFSET:
*/
-unsigned long __init
+void __init
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask,
@@ -382,7 +382,9 @@ kernel_physical_mapping_init(unsigned long start,
mapping_iter = 2;
goto repeat;
}
- return last_map_addr;
+
+ add_paddr_range_mapped(start, last_map_addr);
+ return;
}
#ifdef CONFIG_HIGHMEM
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7c4f6f591f2b..e729108bee30 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -502,13 +502,13 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
/*
* Create PMD level page table mapping for physical addresses. The virtual
* and physical address have to be aligned at this level.
- * It returns the last physical address mapped.
*/
-static unsigned long __meminit
+static void __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot, bool init)
{
unsigned long pages = 0, paddr_next;
+ unsigned long paddr_first = paddr;
unsigned long paddr_last = paddr_end;
int i = pmd_index(paddr);
@@ -579,21 +579,25 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
spin_unlock(&init_mm.page_table_lock);
}
update_page_count(PG_LEVEL_2M, pages);
- return paddr_last;
+ /*
+ * In case of recovery from previous state, add_paddr_range_mapped() may
+ * be called with an overlapping range from previous operations.
+ * It is idempotent, so this is ok.
+ */
+ add_paddr_range_mapped(paddr_first, paddr_last);
+ return;
}
/*
* Create PUD level page table mapping for physical addresses. The virtual
* and physical address do not have to be aligned at this level. KASLR can
* randomize virtual addresses up to this level.
- * It returns the last physical address mapped.
*/
-static unsigned long __meminit
+static void __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t _prot, bool init)
{
unsigned long pages = 0, paddr_next;
- unsigned long paddr_last = paddr_end;
unsigned long vaddr = (unsigned long)__va(paddr);
int i = pud_index(vaddr);
@@ -619,10 +623,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (!pud_none(*pud)) {
if (!pud_leaf(*pud)) {
pmd = pmd_offset(pud, 0);
- paddr_last = phys_pmd_init(pmd, paddr,
- paddr_end,
- page_size_mask,
- prot, init);
+ phys_pmd_init(pmd, paddr, paddr_end,
+ page_size_mask, prot, init);
continue;
}
/*
@@ -640,7 +642,7 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
if (page_size_mask & (1 << PG_LEVEL_1G)) {
if (!after_bootmem)
pages++;
- paddr_last = paddr_next;
+ add_paddr_range_mapped(paddr, paddr_next);
continue;
}
prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
@@ -653,13 +655,13 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)),
init);
spin_unlock(&init_mm.page_table_lock);
- paddr_last = paddr_next;
+ add_paddr_range_mapped(paddr, paddr_next);
continue;
}
pmd = alloc_low_page();
- paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
- page_size_mask, prot, init);
+ phys_pmd_init(pmd, paddr, paddr_end,
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
pud_populate_init(&init_mm, pud, pmd, init);
@@ -668,22 +670,23 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
update_page_count(PG_LEVEL_1G, pages);
- return paddr_last;
+ return;
}
-static unsigned long __meminit
+static void __meminit
phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot, bool init)
{
- unsigned long vaddr, vaddr_end, vaddr_next, paddr_next, paddr_last;
+ unsigned long vaddr, vaddr_end, vaddr_next, paddr_next;
- paddr_last = paddr_end;
vaddr = (unsigned long)__va(paddr);
vaddr_end = (unsigned long)__va(paddr_end);
- if (!pgtable_l5_enabled())
- return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
- page_size_mask, prot, init);
+ if (!pgtable_l5_enabled()) {
+ phys_pud_init((pud_t *) p4d_page, paddr, paddr_end,
+ page_size_mask, prot, init);
+ return;
+ }
for (; vaddr < vaddr_end; vaddr = vaddr_next) {
p4d_t *p4d = p4d_page + p4d_index(vaddr);
@@ -705,33 +708,32 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
if (!p4d_none(*p4d)) {
pud = pud_offset(p4d, 0);
- paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
- page_size_mask, prot, init);
+ phys_pud_init(pud, paddr, __pa(vaddr_end),
+ page_size_mask, prot, init);
continue;
}
pud = alloc_low_page();
- paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
- page_size_mask, prot, init);
+ phys_pud_init(pud, paddr, __pa(vaddr_end),
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
}
- return paddr_last;
+ return;
}
-static unsigned long __meminit
+static void __meminit
__kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask,
pgprot_t prot, bool init)
{
bool pgd_changed = false;
- unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next, paddr_last;
+ unsigned long vaddr, vaddr_start, vaddr_end, vaddr_next;
- paddr_last = paddr_end;
vaddr = (unsigned long)__va(paddr_start);
vaddr_end = (unsigned long)__va(paddr_end);
vaddr_start = vaddr;
@@ -744,16 +746,14 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
if (pgd_val(*pgd)) {
p4d = (p4d_t *)pgd_page_vaddr(*pgd);
- paddr_last = phys_p4d_init(p4d, __pa(vaddr),
- __pa(vaddr_end),
- page_size_mask,
- prot, init);
+ phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
+ page_size_mask, prot, init);
continue;
}
p4d = alloc_low_page();
- paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
- page_size_mask, prot, init);
+ phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
+ page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled())
@@ -769,7 +769,7 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
if (pgd_changed)
sync_global_pgds(vaddr_start, vaddr_end - 1);
- return paddr_last;
+ return;
}
@@ -777,15 +777,15 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
* Create page table mapping for the physical memory for specific physical
* addresses. Note that it can only be used to populate non-present entries.
* The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
+ * down.
*/
-unsigned long __meminit
+void __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask, pgprot_t prot)
{
- return __kernel_physical_mapping_init(paddr_start, paddr_end,
- page_size_mask, prot, true);
+ __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, prot, true);
}
/*
@@ -794,14 +794,14 @@ kernel_physical_mapping_init(unsigned long paddr_start,
* when updating the mapping. The caller is responsible to flush the TLBs after
* the function returns.
*/
-unsigned long __meminit
+void __meminit
kernel_physical_mapping_change(unsigned long paddr_start,
unsigned long paddr_end,
unsigned long page_size_mask)
{
- return __kernel_physical_mapping_init(paddr_start, paddr_end,
- page_size_mask, PAGE_KERNEL,
- false);
+ __kernel_physical_mapping_init(paddr_start, paddr_end,
+ page_size_mask, PAGE_KERNEL,
+ false);
}
#ifndef CONFIG_NUMA
diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h
index 3f37b5c80bb3..6fea5f7edd48 100644
--- a/arch/x86/mm/mm_internal.h
+++ b/arch/x86/mm/mm_internal.h
@@ -10,13 +10,12 @@ static inline void *alloc_low_page(void)
void early_ioremap_page_table_range_init(void);
-unsigned long kernel_physical_mapping_init(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask,
- pgprot_t prot);
-unsigned long kernel_physical_mapping_change(unsigned long start,
- unsigned long end,
- unsigned long page_size_mask);
+void add_paddr_range_mapped(unsigned long start_paddr, unsigned long end_paddr);
+
+void kernel_physical_mapping_init(unsigned long start, unsigned long end,
+ unsigned long page_size_mask, pgprot_t prot);
+void kernel_physical_mapping_change(unsigned long start, unsigned long end,
+ unsigned long page_size_mask);
void zone_sizes_init(void);
extern int after_bootmem;
--
2.39.5
Powered by blists - more mailing lists