lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <da766ad7-143b-4c7c-b296-f18df8380643@os.amperecomputing.com>
Date: Tue, 5 Aug 2025 14:28:26 -0700
From: Yang Shi <yang@...amperecomputing.com>
To: Dev Jain <dev.jain@....com>, ryan.roberts@....com, will@...nel.org,
 catalin.marinas@....com, akpm@...ux-foundation.org, Miko.Lenczewski@....com,
 scott@...amperecomputing.com, cl@...two.org
Cc: linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 3/4] arm64: mm: support large block mapping when
 rodata=full



On 7/29/25 5:34 AM, Dev Jain wrote:
>
> On 25/07/25 3:41 am, Yang Shi wrote:
>> [----- snip -----]
>>     #ifdef CONFIG_ARM64_PAN
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 3d5fb37424ab..f63b39613571 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -480,6 +480,8 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, 
>> phys_addr_t phys, unsigned long virt,
>>                    int flags);
>>   #endif
>>   +#define INVALID_PHYS_ADDR    -1
>> +
>>   static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
>>                          enum pgtable_type pgtable_type)
>>   {
>> @@ -487,7 +489,9 @@ static phys_addr_t __pgd_pgtable_alloc(struct 
>> mm_struct *mm,
>>       struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & 
>> ~__GFP_ZERO, 0);
>>       phys_addr_t pa;
>>   -    BUG_ON(!ptdesc);
>> +    if (!ptdesc)
>> +        return INVALID_PHYS_ADDR;
>> +
>>       pa = page_to_phys(ptdesc_page(ptdesc));
>>         switch (pgtable_type) {
>> @@ -509,15 +513,29 @@ static phys_addr_t __pgd_pgtable_alloc(struct 
>> mm_struct *mm,
>>   }
>>     static phys_addr_t __maybe_unused
>> -pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
>> +split_pgtable_alloc(enum pgtable_type pgtable_type)
>>   {
>>       return __pgd_pgtable_alloc(&init_mm, pgtable_type);
>>   }
>>   +static phys_addr_t __maybe_unused
>> +pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
>> +{
>> +    phys_addr_t pa;
>> +
>> +    pa = __pgd_pgtable_alloc(&init_mm, pgtable_type);
>> +    BUG_ON(pa == INVALID_PHYS_ADDR);
>> +    return pa;
>> +}
>> +
>>   static phys_addr_t
>>   pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
>>   {
>> -    return __pgd_pgtable_alloc(NULL, pgtable_type);
>> +    phys_addr_t pa;
>> +
>> +    pa = __pgd_pgtable_alloc(NULL, pgtable_type);
>> +    BUG_ON(pa == INVALID_PHYS_ADDR);
>> +    return pa;
>>   }
>>     /*
>> @@ -552,6 +570,254 @@ void __init create_pgd_mapping(struct mm_struct 
>> *mm, phys_addr_t phys,
>>                    pgd_pgtable_alloc_special_mm, flags);
>>   }
>>   +static DEFINE_MUTEX(pgtable_split_lock);
>
> Thanks for taking a separate lock.
>
>> +
>> +static int split_cont_pte(pmd_t *pmdp, unsigned long addr, unsigned 
>> long end)
>> +{
>> +    pte_t *ptep;
>> +    unsigned long next;
>> +    unsigned int nr;
>> +    unsigned long span;
>> +
>> +    ptep = pte_offset_kernel(pmdp, addr);
>> +
>> +    do {
>> +        pte_t *_ptep;
>> +
>> +        nr = 0;
>> +        next = pte_cont_addr_end(addr, end);
>> +        if (next < end)
>> +            nr = max(nr, ((end - next) / CONT_PTE_SIZE));
>> +        span = nr * CONT_PTE_SIZE;
>> +
>> +        _ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
>> +        ptep += pte_index(next) - pte_index(addr) + nr * CONT_PTES;
>> +
>> +        if (((addr | next) & ~CONT_PTE_MASK) == 0)
>> +            continue;
>> +
>> +        if (!pte_cont(__ptep_get(_ptep)))
>> +            continue;
>> +
>> +        for (int i = 0; i < CONT_PTES; i++, _ptep++)
>> +            __set_pte(_ptep, pte_mknoncont(__ptep_get(_ptep)));
>> +    } while (addr = next + span, addr != end);
>> +
>> +    return 0;
>> +}
>> +
>> +static int split_pmd(pmd_t *pmdp, unsigned long addr, unsigned long 
>> end)
>> +{
>> +    unsigned long next;
>> +    unsigned int nr;
>> +    unsigned long span;
>> +    int ret = 0;
>> +
>> +    do {
>> +        pmd_t pmd;
>> +
>> +        nr = 1;
>> +        next = pmd_addr_end(addr, end);
>> +        if (next < end)
>> +            nr = max(nr, ((end - next) / PMD_SIZE));
>> +        span = (nr - 1) * PMD_SIZE;
>> +
>> +        if (((addr | next) & ~PMD_MASK) == 0)
>> +            continue;
>> +
>> +        pmd = pmdp_get(pmdp);
>> +        if (pmd_leaf(pmd)) {
>> +            phys_addr_t pte_phys;
>> +            pte_t *ptep;
>> +            pmdval_t pmdval = PMD_TYPE_TABLE | PMD_TABLE_UXN |
>> +                      PMD_TABLE_AF;
>> +            unsigned long pfn = pmd_pfn(pmd);
>> +            pgprot_t prot = pmd_pgprot(pmd);
>> +
>> +            pte_phys = split_pgtable_alloc(TABLE_PTE);
>> +            if (pte_phys == INVALID_PHYS_ADDR)
>> +                return -ENOMEM;
>> +
>> +            if (pgprot_val(prot) & PMD_SECT_PXN)
>> +                pmdval |= PMD_TABLE_PXN;
>> +
>> +            prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) |
>> +                    PTE_TYPE_PAGE);
>> +            prot = __pgprot(pgprot_val(prot) | PTE_CONT);
>> +            ptep = (pte_t *)phys_to_virt(pte_phys);
>> +            for (int i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
>> +                __set_pte(ptep, pfn_pte(pfn, prot));
>> +
>> +            dsb(ishst);
>> +
>> +            __pmd_populate(pmdp, pte_phys, pmdval);
>> +        }
>> +
>> +        ret = split_cont_pte(pmdp, addr, next);
>> +        if (ret)
>> +            break;
>> +    } while (pmdp += nr, addr = next + span, addr != end);
>> +
>> +    return ret;
>> +}
>> +
>> +static int split_cont_pmd(pud_t *pudp, unsigned long addr, unsigned 
>> long end)
>> +{
>> +    pmd_t *pmdp;
>> +    unsigned long next;
>> +    unsigned int nr;
>> +    unsigned long span;
>> +    int ret = 0;
>> +
>> +    pmdp = pmd_offset(pudp, addr);
>> +
>> +    do {
>> +        pmd_t *_pmdp;
>> +
>> +        nr = 0;
>> +        next = pmd_cont_addr_end(addr, end);
>> +        if (next < end)
>> +            nr = max(nr, ((end - next) / CONT_PMD_SIZE));
>> +        span = nr * CONT_PMD_SIZE;
>> +
>> +        if (((addr | next) & ~CONT_PMD_MASK) == 0) {
>> +            pmdp += pmd_index(next) - pmd_index(addr) +
>> +                nr * CONT_PMDS;
>> +            continue;
>> +        }
>> +
>> +        _pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
>> +        if (!pmd_cont(pmdp_get(_pmdp)))
>> +            goto split;
>> +
>> +        for (int i = 0; i < CONT_PMDS; i++, _pmdp++)
>> +            set_pmd(_pmdp, pmd_mknoncont(pmdp_get(_pmdp)));
>> +
>> +split:
>> +        ret = split_pmd(pmdp, addr, next);
>> +        if (ret)
>> +            break;
>> +
>> +        pmdp += pmd_index(next) - pmd_index(addr) + nr * CONT_PMDS;
>> +    } while (addr = next + span, addr != end);
>> +
>> +    return ret;
>> +}
>> +
>> +static int split_pud(p4d_t *p4dp, unsigned long addr, unsigned long 
>> end)
>> +{
>> +    pud_t *pudp;
>> +    unsigned long next;
>> +    unsigned int nr;
>> +    unsigned long span;
>> +    int ret = 0;
>> +
>> +    pudp = pud_offset(p4dp, addr);
>> +
>> +    do {
>> +        pud_t pud;
>> +
>> +        nr = 1;
>> +        next = pud_addr_end(addr, end);
>> +        if (next < end)
>> +            nr = max(nr, ((end - next) / PUD_SIZE));
>> +        span = (nr - 1) * PUD_SIZE;
>> +
>> +        if (((addr | next) & ~PUD_MASK) == 0)
>> +            continue;
>> +
>> +        pud = pudp_get(pudp);
>> +        if (pud_leaf(pud)) {
>> +            phys_addr_t pmd_phys;
>> +            pmd_t *pmdp;
>> +            pudval_t pudval = PUD_TYPE_TABLE | PUD_TABLE_UXN |
>> +                      PUD_TABLE_AF;
>> +            unsigned long pfn = pud_pfn(pud);
>> +            pgprot_t prot = pud_pgprot(pud);
>> +            unsigned int step = PMD_SIZE >> PAGE_SHIFT;
>> +
>> +            pmd_phys = split_pgtable_alloc(TABLE_PMD);
>> +            if (pmd_phys == INVALID_PHYS_ADDR)
>> +                return -ENOMEM;
>> +
>> +            if (pgprot_val(prot) & PMD_SECT_PXN)
>> +                pudval |= PUD_TABLE_PXN;
>> +
>> +            prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) |
>> +                    PMD_TYPE_SECT);
>> +            prot = __pgprot(pgprot_val(prot) | PTE_CONT);
>> +            pmdp = (pmd_t *)phys_to_virt(pmd_phys);
>> +            for (int i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
>> +                set_pmd(pmdp, pfn_pmd(pfn, prot));
>> +                pfn += step;
>> +            }
>> +
>> +            dsb(ishst);
>> +
>> +            __pud_populate(pudp, pmd_phys, pudval);
>> +        }
>> +
>> +        ret = split_cont_pmd(pudp, addr, next);
>> +        if (ret)
>> +            break;
>> +    } while (pudp += nr, addr = next + span, addr != end);
>> +
>> +    return ret;
>> +}
>> +
>> +static int split_p4d(pgd_t *pgdp, unsigned long addr, unsigned long 
>> end)
>> +{
>> +    p4d_t *p4dp;
>> +    unsigned long next;
>> +    int ret = 0;
>> +
>> +    p4dp = p4d_offset(pgdp, addr);
>> +
>> +    do {
>> +        next = p4d_addr_end(addr, end);
>> +
>> +        ret = split_pud(p4dp, addr, next);
>> +        if (ret)
>> +            break;
>> +    } while (p4dp++, addr = next, addr != end);
>> +
>> +    return ret;
>> +}
>> +
>> +static int split_pgd(pgd_t *pgdp, unsigned long addr, unsigned long 
>> end)
>> +{
>> +    unsigned long next;
>> +    int ret = 0;
>> +
>> +    do {
>> +        next = pgd_addr_end(addr, end);
>> +        ret = split_p4d(pgdp, addr, next);
>> +        if (ret)
>> +            break;
>> +    } while (pgdp++, addr = next, addr != end);
>> +
>> +    return ret;
>> +}
>> +
>> +int split_kernel_pgtable_mapping(unsigned long start, unsigned long 
>> end)
>> +{
>> +    int ret;
>> +
>> +    if (!system_supports_bbml2_noabort())
>> +        return 0;
>> +
>> +    if (start != PAGE_ALIGN(start) || end != PAGE_ALIGN(end))
>> +        return -EINVAL;
>> +
>> +    mutex_lock(&pgtable_split_lock);
>> +    arch_enter_lazy_mmu_mode();
>> +    ret = split_pgd(pgd_offset_k(start), start, end);
>> +    arch_leave_lazy_mmu_mode();
>> +    mutex_unlock(&pgtable_split_lock);
>> +
>> +    return ret;
>> +}
>> +
>>       /*
>
> --- snip ---
>
> I'm afraid I'll have to agree with Ryan :) Looking at the signature of 
> split_kernel_pgtable_mapping,
> one would expect that this function splits all block mappings in this 
> region. But that is just a
> nit; it does not seem right to me that we are iterating over the 
> entire space when we know *exactly* where
> we have to make the split, just to save on pgd/p4d/pud loads - the 
> effect of which is probably cancelled
> out by unnecessary iterations your approach takes to skip the 
> intermediate blocks.

The implementation is aimed to reuse the split code for repainting. We 
have to split all leaf mappings down to PTEs for repainting.

Now Ryan suggested use pgtable walk API for repainting, it made the 
duplicate code problem gone. We had some discussion in the other series.

>
> If we are concerned that most change_memory_common() operations are 
> for a single page, then
> we can do something like:
>
> unsigned long size = end - start;
> bool end_split, start_split = false;
>
> if (start not aligned to block mapping)
>     start_split = split(start);
>
> /*
>  * split the end only if the start wasn't split, or
>  * if it cannot be guaranteed that start and end lie
>  * on the same contig block
>  */
> if (!start_split || (size > CONT_PTE_SIZE))
>     end_split = split(end);

Thanks for the suggestion. It works for some cases, but I don't think it 
can work if the range is cross page table IIUC. For example, start is in 
a PMD, but end is in another PMD.

Regards,
Yang

>
>
>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ