[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z7QaPwI1eMPEwHii@MacBook-Air-5.local>
Date: Tue, 18 Feb 2025 14:27:27 +0900
From: "Harry Yoo (Oracle)" <42.hyeyoo@...il.com>
To: Gwan-gyeong Mun <gwan-gyeong.mun@...el.com>
Cc: linux-kernel@...r.kernel.org, osalvador@...e.de, byungchul@...com,
dave.hansen@...ux.intel.com, luto@...nel.org, peterz@...radead.org,
akpm@...ux-foundation.org, max.byungchul.park@...com,
max.byungchul.park@...il.com
Subject: Re: [PATCH 1/1] x86/vmemmap: Use direct-mapped VA instead of
vmemmap-based VA
On Mon, Feb 17, 2025 at 01:41:33PM +0200, Gwan-gyeong Mun wrote:
> When a process leads the addition of a struct page to vmemmap
> (e.g. hot-plug), the page table update for the newly added vmemmap-based
> virtual address is updated first in init_mm's page table and then
> synchronized later.
>
> If the vmemmap-based virtual address is accessed through the process's
> page table before this sync, a page fault will occur.
>
> This translates vmemmap-based virtual address to direct-mapped virtual
> address and use it, if the current top-level page table is not init_mm's
> page table when accessing a vmemmap-based virtual address before this sync.
>
> Fixes: faf1c0008a33 ("x86/vmemmap: optimize for consecutive sections in partial populated PMDs")
> Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun@...el.com>
> Cc: Oscar Salvador <osalvador@...e.de>
> Cc: Hyeonggon Yoo <42.hyeyoo@...il.com>
I think this fix is reasonable without changing existing code too much.
Any thoughts from x86 folks?
> Cc: Byungchul Park <byungchul@...com>
> Cc: Dave Hansen <dave.hansen@...ux.intel.com>
> Cc: Andy Lutomirski <luto@...nel.org>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: Andrew Morton <akpm@...ux-foundation.org>
Shouldn't we add Cc: <stable@...r.kernel.org>?
--
Harry
> ---
> arch/x86/mm/init_64.c | 17 ++++++++++++++---
> 1 file changed, 14 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 01ea7c6df303..1cb26f692831 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -844,6 +844,17 @@ void __init paging_init(void)
> */
> static unsigned long unused_pmd_start __meminitdata;
>
> +static void * __meminit vmemmap_va_to_kaddr(unsigned long vmemmap_va)
> +{
> + void *kaddr = (void *)vmemmap_va;
> + pgd_t *pgd = __va(read_cr3_pa());
> +
> + if (init_mm.pgd != pgd)
> + kaddr = __va(slow_virt_to_phys(kaddr));
> +
> + return kaddr;
> +}
> +
> static void __meminit vmemmap_flush_unused_pmd(void)
> {
> if (!unused_pmd_start)
> @@ -851,7 +862,7 @@ static void __meminit vmemmap_flush_unused_pmd(void)
> /*
> * Clears (unused_pmd_start, PMD_END]
> */
> - memset((void *)unused_pmd_start, PAGE_UNUSED,
> + memset(vmemmap_va_to_kaddr(unused_pmd_start), PAGE_UNUSED,
> ALIGN(unused_pmd_start, PMD_SIZE) - unused_pmd_start);
> unused_pmd_start = 0;
> }
> @@ -882,7 +893,7 @@ static void __meminit __vmemmap_use_sub_pmd(unsigned long start)
> * case the first memmap never gets initialized e.g., because the memory
> * block never gets onlined).
> */
> - memset((void *)start, 0, sizeof(struct page));
> + memset(vmemmap_va_to_kaddr(start), 0, sizeof(struct page));
> }
>
> static void __meminit vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
> @@ -924,7 +935,7 @@ static void __meminit vmemmap_use_new_sub_pmd(unsigned long start, unsigned long
> * Mark with PAGE_UNUSED the unused parts of the new memmap range
> */
> if (!IS_ALIGNED(start, PMD_SIZE))
> - memset((void *)page, PAGE_UNUSED, start - page);
> + memset(vmemmap_va_to_kaddr(page), PAGE_UNUSED, start - page);
>
> /*
> * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
> --
> 2.48.1
>
Powered by blists - more mailing lists