[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a404d023-e0bb-4dc8-8952-accba299ab50@neon.tech>
Date: Tue, 10 Jun 2025 11:16:36 +0100
From: Em Sharnoff <sharnoff@...n.tech>
To: linux-kernel@...r.kernel.org, x86@...nel.org, linux-mm@...ck.org
Cc: Ingo Molnar <mingo@...nel.org>, "H. Peter Anvin" <hpa@...or.com>,
Dave Hansen <dave.hansen@...ux.intel.com>, Andy Lutomirski
<luto@...nel.org>, Peter Zijlstra <peterz@...radead.org>,
Thomas Gleixner <tglx@...utronix.de>, Borislav Petkov <bp@...en8.de>,
"Edgecombe, Rick P" <rick.p.edgecombe@...el.com>,
Oleg Vasilev <oleg@...n.tech>, Arthur Petukhovsky <arthur@...n.tech>,
Stefan Radig <stefan@...n.tech>, Misha Sakhnov <misha@...n.tech>
Subject: [PATCH v3 1/2] x86/mm: Handle alloc failure in phys_*_init()
During memory hotplug, allocation failures in phys_*_init() aren't
handled, which results in a null pointer dereference, if they occur.
To handle that, change phys_pud_init() and similar functions to return
allocation errors via ERR_PTR() and check for that in arch_add_memory().
Signed-off-by: Em Sharnoff <sharnoff@...n.tech>
---
Changelog:
- v2: switch from special-casing zero value to using ERR_PTR()
- v3: Fix -Wint-conversion errors
---
arch/x86/mm/init.c | 6 ++++-
arch/x86/mm/init_64.c | 54 +++++++++++++++++++++++++++++++++++++++----
2 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index bfa444a7dbb0..a2665b6fe376 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -533,6 +533,7 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
* This runs before bootmem is initialized and gets pages directly from
* the physical memory. To access them they are temporarily mapped.
+ * Allocation errors are returned with ERR_PTR.
*/
unsigned long __ref init_memory_mapping(unsigned long start,
unsigned long end, pgprot_t prot)
@@ -547,10 +548,13 @@ unsigned long __ref init_memory_mapping(unsigned long start,
memset(mr, 0, sizeof(mr));
nr_range = split_mem_range(mr, 0, start, end);
- for (i = 0; i < nr_range; i++)
+ for (i = 0; i < nr_range; i++) {
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
mr[i].page_size_mask,
prot);
+ if (IS_ERR((void *)ret))
+ return ret;
+ }
add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7c4f6f591f2b..712006afcd6c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -502,7 +502,8 @@ phys_pte_init(pte_t *pte_page, unsigned long paddr, unsigned long paddr_end,
/*
* Create PMD level page table mapping for physical addresses. The virtual
* and physical address have to be aligned at this level.
- * It returns the last physical address mapped.
+ * It returns the last physical address mapped. Allocation errors are
+ * returned with ERR_PTR.
*/
static unsigned long __meminit
phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
@@ -572,7 +573,14 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
}
pte = alloc_low_page();
+ if (!pte)
+ return (unsigned long)ERR_PTR(-ENOMEM);
paddr_last = phys_pte_init(pte, paddr, paddr_end, new_prot, init);
+ /*
+ * phys_{ppmd,pud,p4d}_init return allocation errors via ERR_PTR.
+ * phys_pte_init makes no allocations, so should not error.
+ */
+ BUG_ON(IS_ERR((void *)paddr_last));
spin_lock(&init_mm.page_table_lock);
pmd_populate_kernel_init(&init_mm, pmd, pte, init);
@@ -586,7 +594,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end,
* Create PUD level page table mapping for physical addresses. The virtual
* and physical address do not have to be aligned at this level. KASLR can
* randomize virtual addresses up to this level.
- * It returns the last physical address mapped.
+ * It returns the last physical address mapped. Allocation errors are
+ * returned with ERR_PTR.
*/
static unsigned long __meminit
phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
@@ -623,6 +632,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
paddr_end,
page_size_mask,
prot, init);
+ if (IS_ERR((void *)paddr_last))
+ return paddr_last;
continue;
}
/*
@@ -658,12 +669,22 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end,
}
pmd = alloc_low_page();
+ if (!pmd)
+ return (unsigned long)ERR_PTR(-ENOMEM);
paddr_last = phys_pmd_init(pmd, paddr, paddr_end,
page_size_mask, prot, init);
+ /*
+ * We might have IS_ERR(paddr_last) if allocation failed, but we should
+ * still update pud before bailing, so that subsequent retries can pick
+ * up on progress (here and in phys_pmd_init) without leaking pmd.
+ */
spin_lock(&init_mm.page_table_lock);
pud_populate_init(&init_mm, pud, pmd, init);
spin_unlock(&init_mm.page_table_lock);
+
+ if (IS_ERR((void *)paddr_last))
+ return paddr_last;
}
update_page_count(PG_LEVEL_1G, pages);
@@ -707,16 +728,26 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
pud = pud_offset(p4d, 0);
paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
page_size_mask, prot, init);
+ if (IS_ERR((void *)paddr_last))
+ return paddr_last;
continue;
}
pud = alloc_low_page();
+ if (!pud)
+ return (unsigned long)ERR_PTR(-ENOMEM);
paddr_last = phys_pud_init(pud, paddr, __pa(vaddr_end),
page_size_mask, prot, init);
spin_lock(&init_mm.page_table_lock);
p4d_populate_init(&init_mm, p4d, pud, init);
spin_unlock(&init_mm.page_table_lock);
+
+ /*
+ * Bail only after updating p4d to keep progress from pud across retries.
+ */
+ if (IS_ERR((void *)paddr_last))
+ return paddr_last;
}
return paddr_last;
@@ -748,10 +779,14 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
__pa(vaddr_end),
page_size_mask,
prot, init);
+ if (IS_ERR((void *)paddr_last))
+ return paddr_last;
continue;
}
p4d = alloc_low_page();
+ if (!p4d)
+ return (unsigned long)ERR_PTR(-ENOMEM);
paddr_last = phys_p4d_init(p4d, __pa(vaddr), __pa(vaddr_end),
page_size_mask, prot, init);
@@ -763,6 +798,13 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
(pud_t *) p4d, init);
spin_unlock(&init_mm.page_table_lock);
+
+ /*
+ * Bail only after updating pgd/p4d to keep progress from p4d across retries.
+ */
+ if (IS_ERR((void *)paddr_last))
+ return paddr_last;
+
pgd_changed = true;
}
@@ -777,7 +819,8 @@ __kernel_physical_mapping_init(unsigned long paddr_start,
* Create page table mapping for the physical memory for specific physical
* addresses. Note that it can only be used to populate non-present entries.
* The virtual and physical addresses have to be aligned on PMD level
- * down. It returns the last physical address mapped.
+ * down. It returns the last physical address mapped. Allocation errors are
+ * returned with ERR_PTR.
*/
unsigned long __meminit
kernel_physical_mapping_init(unsigned long paddr_start,
@@ -980,8 +1023,11 @@ int arch_add_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+ unsigned long ret = 0;
- init_memory_mapping(start, start + size, params->pgprot);
+ ret = init_memory_mapping(start, start + size, params->pgprot);
+ if (IS_ERR((void *)ret))
+ return (int)PTR_ERR((void *)ret);
return add_pages(nid, start_pfn, nr_pages, params);
}
--
2.39.5
Powered by blists - more mailing lists