lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aVfu5EZM1XyJjrYp@e129823.arm.com>
Date: Fri, 2 Jan 2026 16:14:28 +0000
From: Yeoreum Yun <yeoreum.yun@....com>
To: Ryan Roberts <ryan.roberts@....com>
Cc: catalin.marinas@....com, will@...nel.org, akpm@...ux-foundation.org,
	david@...nel.org, kevin.brodsky@....com, quic_zhenhuah@...cinc.com,
	dev.jain@....com, yang@...amperecomputing.com,
	chaitanyas.prakash@....com, bigeasy@...utronix.de,
	clrkwllms@...nel.org, rostedt@...dmis.org,
	lorenzo.stoakes@...cle.com, ardb@...nel.org, jackmanb@...gle.com,
	vbabka@...e.cz, mhocko@...e.com,
	linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org,
	linux-rt-devel@...ts.linux.dev
Subject: Re: [PATCH v4 2/3] arm64: mmu: avoid allocating pages while
 splitting the linear mapping

Hi Ryan,

> > linear_map_split_to_ptes() currently allocates page tables while
> > splitting the linear mapping into PTEs under stop_machine() using GFP_ATOMIC.
> >
> > This is fine for non-PREEMPT_RT configurations.
> > However, it becomes problematic on PREEMPT_RT, because
> > generic memory allocation/free APIs (e.g. pgtable_alloc(), __get_free_pages(), etc.)
> > cannot be called from a non-preemptible context, except for the _nolock() variants.
> > This is because generic memory allocation/free paths are sleepable,
> > as they rely on spin_lock(), which becomes sleepable on PREEMPT_RT.
> >
> > In other words, even calling pgtable_alloc() with GFP_ATOMIC is not permitted
> > in __linear_map_split_to_pte() when it is executed by the stopper thread,
> >  where preemption is disabled on PREEMPT_RT.
> >
> > To address this, the required number of page tables is first collected
> > and preallocated, and the preallocated page tables are then used
> > when splitting the linear mapping in __linear_map_split_to_pte().
> >
> > Fixes: 3df6979d222b ("arm64: mm: split linear mapping if BBML2 unsupported on secondary CPUs")
> > Signed-off-by: Yeoreum Yun <yeoreum.yun@....com>
>
> Looks good from my perspective.
>
> I have a couple more small comments below. With those addressed:
>
> Reviewed-by: Ryan Roberts <ryan.roberts@....com>
>
> > ---
> >  arch/arm64/mm/mmu.c | 204 +++++++++++++++++++++++++++++++++++---------
> >  1 file changed, 166 insertions(+), 38 deletions(-)
> >
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index 4b4908ae189b..cc086e91a506 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -529,18 +529,14 @@ static void early_create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
> >  		panic("Failed to create page tables\n");
> >  }
> >
> > -static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
> > -				       enum pgtable_type pgtable_type)
> > -{
> > -	/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
> > -	struct ptdesc *ptdesc = pagetable_alloc(gfp & ~__GFP_ZERO, 0);
> > -	phys_addr_t pa;
> > -
> > -	if (!ptdesc)
> > -		return INVALID_PHYS_ADDR;
> > -
> > -	pa = page_to_phys(ptdesc_page(ptdesc));
> > +static struct ptdesc **split_pgtables;
> > +static unsigned long split_pgtables_count;
> > +static unsigned long split_pgtables_idx;
>
> I think these could all be __initdata, if you make
> pgd_pgtable_get_preallocated() __init (see below) ?

I don't think so since range_split_to_ptes() couldn't be __init.
That's why there is warning while compiling below:

  WARNING: modpost: vmlinux: section mismatch in reference: range_split_to_ptes+0x3c (section: .text) -> pgd_pgtable_get_preallocated (section: .init.text)
>
> >
> > +static __always_inline void __pgd_pgtable_init(struct mm_struct *mm,
>
> Is there a reason for __always_inline? If not, I think it's preferable to just
> leave it static and let the compiler decide.

Okay. I'll remove __always_inline. Thanks.

>
> > +					       struct ptdesc *ptdesc,
> > +					       enum pgtable_type pgtable_type)
> > +{
> >  	switch (pgtable_type) {
> >  	case TABLE_PTE:
> >  		BUG_ON(!pagetable_pte_ctor(mm, ptdesc));
> > @@ -555,26 +551,49 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
> >  		pagetable_p4d_ctor(ptdesc);
> >  		break;
> >  	}
> > -
> > -	return pa;
> >  }
> >
> > -static phys_addr_t
> > -pgd_pgtable_alloc_init_mm_gfp(enum pgtable_type pgtable_type, gfp_t gfp)
> > +static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
> > +				       enum pgtable_type pgtable_type)
> >  {
> > -	return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type);
> > +	/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
> > +	struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
> > +
> > +	if (!ptdesc)
> > +		return INVALID_PHYS_ADDR;
> > +
> > +	__pgd_pgtable_init(mm, ptdesc, pgtable_type);
> > +
> > +	return page_to_phys(ptdesc_page(ptdesc));
> >  }
> >
> > -static phys_addr_t __maybe_unused
> > +static phys_addr_t
> >  pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
> >  {
> > -	return pgd_pgtable_alloc_init_mm_gfp(pgtable_type, GFP_PGTABLE_KERNEL);
> > +	return __pgd_pgtable_alloc(&init_mm, pgtable_type);
> >  }
> >
> >  static phys_addr_t
> >  pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
> >  {
> > -	return  __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type);
> > +	return  __pgd_pgtable_alloc(NULL, pgtable_type);
> > +}
> > +
> > +static phys_addr_t
> > +pgd_pgtable_get_preallocated(enum pgtable_type pgtable_type)
>
> I think this could probably be __init?

See above.

>
> > +{
> > +	struct ptdesc *ptdesc;
> > +
> > +	if (WARN_ON(split_pgtables_idx >= split_pgtables_count))
> > +		return INVALID_PHYS_ADDR;
> > +
> > +	ptdesc = split_pgtables[split_pgtables_idx++];
> > +	if (!ptdesc)
> > +		return INVALID_PHYS_ADDR;
> > +
> > +	__pgd_pgtable_init(&init_mm, ptdesc, pgtable_type);
> > +
> > +	return page_to_phys(ptdesc_page(ptdesc));
> >  }
> >
> >  static void split_contpte(pte_t *ptep)
> > @@ -586,7 +605,9 @@ static void split_contpte(pte_t *ptep)
> >  		__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
> >  }
> >
> > -static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
> > +static int split_pmd(pmd_t *pmdp, pmd_t pmd,
> > +		     pgtable_alloc_t pgtable_alloc,
> > +		     bool to_cont)
>
> nit: this will easily fit in 2 lines and still be within 80 chars:

Okay. I'll change it.

>
> static int split_pmd(pmd_t *pmdp, pmd_t pmd, pgtable_alloc_t pgtable_alloc,
> 		     bool to_cont)
>
> >  {
> >  	pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
> >  	unsigned long pfn = pmd_pfn(pmd);
> > @@ -595,7 +616,7 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
> >  	pte_t *ptep;
> >  	int i;
> >
> > -	pte_phys = pgd_pgtable_alloc_init_mm_gfp(TABLE_PTE, gfp);
> > +	pte_phys = pgtable_alloc(TABLE_PTE);
> >  	if (pte_phys == INVALID_PHYS_ADDR)
> >  		return -ENOMEM;
> >  	ptep = (pte_t *)phys_to_virt(pte_phys);
> > @@ -630,7 +651,9 @@ static void split_contpmd(pmd_t *pmdp)
> >  		set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
> >  }
> >
> > -static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
> > +static int split_pud(pud_t *pudp, pud_t pud,
> > +		     pgtable_alloc_t pgtable_alloc,
> > +		     bool to_cont)
>
> nit: same comment.

Thanks for your review :D
BTW, except the __init related comments, Could I add R-b tag
after fixing others?

--
Sincerely,
Yeoreum Yun

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ