[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db460533-74ea-4246-84c4-2922f6b64b5a@arm.com>
Date: Mon, 12 Jan 2026 11:52:26 +0000
From: Ryan Roberts <ryan.roberts@....com>
To: Linu Cherian <linu.cherian@....com>
Cc: Will Deacon <will@...nel.org>, Ard Biesheuvel <ardb@...nel.org>,
Catalin Marinas <catalin.marinas@....com>,
Mark Rutland <mark.rutland@....com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Oliver Upton <oliver.upton@...ux.dev>, Marc Zyngier <maz@...nel.org>,
Dev Jain <dev.jain@....com>, linux-arm-kernel@...ts.infradead.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1 11/13] arm64: mm: More flags for __flush_tlb_range()
On 06/01/2026 15:28, Linu Cherian wrote:
> Ryan,
>
> On Tue, Dec 16, 2025 at 02:45:56PM +0000, Ryan Roberts wrote:
>> Refactor function variants with "_nosync", "_local" and "_nonotify" into
>> a single __always_inline implementation that takes flags and rely on
>> constant folding to select the parts that are actually needed at any
>> given callsite, based on the provided flags.
>>
>> Flags all live in the tlbf_t (TLB flags) type; TLBF_NONE (0) continues
>> to provide the strongest semantics (i.e. evict from walk cache,
>> broadcast, synchronise and notify). Each flag reduces the strength in
>> some way; TLBF_NONOTIFY, TLBF_NOSYNC and TLBF_NOBROADCAST are added to
>> complement the existing TLBF_NOWALKCACHE.
>>
>> The result is a clearer, simpler, more powerful API.
>>
>> Signed-off-by: Ryan Roberts <ryan.roberts@....com>
>> ---
>> arch/arm64/include/asm/tlbflush.h | 101 ++++++++++++++++++------------
>> arch/arm64/mm/contpte.c | 9 ++-
>> 2 files changed, 68 insertions(+), 42 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index 9a37a6a014dc..ee747e66bbef 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -107,6 +107,12 @@ static inline unsigned long get_trans_granule(void)
>>
>> typedef void (*tlbi_op)(u64 arg);
>>
>> +static __always_inline void vae1(u64 arg)
>> +{
>> + __tlbi(vae1, arg);
>> + __tlbi_user(vae1, arg);
>> +}
>> +
>> static __always_inline void vae1is(u64 arg)
>> {
>> __tlbi(vae1is, arg);
>> @@ -276,7 +282,10 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
>> * no invalidation may take place. In the case where the level
>> * cannot be easily determined, the value TLBI_TTL_UNKNOWN will
>> * perform a non-hinted invalidation. flags may be TLBF_NONE (0) or
>> - * TLBF_NOWALKCACHE (elide eviction of walk cache entries).
>> + * any combination of TLBF_NOWALKCACHE (elide eviction of walk
>> + * cache entries), TLBF_NONOTIFY (don't call mmu notifiers),
>> + * TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
>> + * (only perform the invalidation for the local cpu).
>> *
>> * local_flush_tlb_page(vma, addr)
>> * Local variant of flush_tlb_page(). Stale TLB entries may
>> @@ -286,12 +295,6 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
>> * Same as local_flush_tlb_page() except MMU notifier will not be
>> * called.
>> *
>> - * local_flush_tlb_contpte(vma, addr)
>> - * Invalidate the virtual-address range
>> - * '[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU
>> - * for the user address space corresponding to 'vma->mm'. Stale
>> - * TLB entries may remain in remote CPUs.
>> - *
>> * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
>> * on top of these routines, since that is our interface to the mmu_gather
>> * API as used by munmap() and friends.
>> @@ -436,6 +439,12 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
>> * operations can only span an even number of pages. We save this for last to
>> * ensure 64KB start alignment is maintained for the LPA2 case.
>> */
>> +static __always_inline void rvae1(u64 arg)
>> +{
>> + __tlbi(rvae1, arg);
>> + __tlbi_user(rvae1, arg);
>> +}
>> +
>> static __always_inline void rvae1is(u64 arg)
>> {
>> __tlbi(rvae1is, arg);
>> @@ -531,16 +540,18 @@ static inline bool __flush_tlb_range_limit_excess(unsigned long pages,
>> typedef unsigned __bitwise tlbf_t;
>> #define TLBF_NONE ((__force tlbf_t)0)
>> #define TLBF_NOWALKCACHE ((__force tlbf_t)BIT(0))
>> +#define TLBF_NOSYNC ((__force tlbf_t)BIT(1))
>> +#define TLBF_NONOTIFY ((__force tlbf_t)BIT(2))
>> +#define TLBF_NOBROADCAST ((__force tlbf_t)BIT(3))
>>
>> -static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
>> - unsigned long start, unsigned long end,
>> - unsigned long stride, int tlb_level,
>> - tlbf_t flags)
>> +static __always_inline void ___flush_tlb_range(struct vm_area_struct *vma,
>> + unsigned long start, unsigned long end,
>> + unsigned long stride, int tlb_level,
>> + tlbf_t flags)
>> {
>> + struct mm_struct *mm = vma->vm_mm;
>> unsigned long asid, pages;
>>
>> - start = round_down(start, stride);
>> - end = round_up(end, stride);
>> pages = (end - start) >> PAGE_SHIFT;
>>
>> if (__flush_tlb_range_limit_excess(pages, stride)) {
>> @@ -548,17 +559,41 @@ static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
>> return;
>> }
>>
>> - dsb(ishst);
>> + if (!(flags & TLBF_NOBROADCAST))
>> + dsb(ishst);
>> + else
>> + dsb(nshst);
>> +
>> asid = ASID(mm);
>>
>> - if (flags & TLBF_NOWALKCACHE)
>> - __flush_s1_tlb_range_op(vale1is, start, pages, stride,
>> - asid, tlb_level);
>> - else
>> + switch (flags & (TLBF_NOWALKCACHE | TLBF_NOBROADCAST)) {
>> + case TLBF_NONE:
>> __flush_s1_tlb_range_op(vae1is, start, pages, stride,
>> - asid, tlb_level);
>> + asid, tlb_level);
>> + break;
>> + case TLBF_NOWALKCACHE:
>> + __flush_s1_tlb_range_op(vale1is, start, pages, stride,
>> + asid, tlb_level);
>> + break;
>> + case TLBF_NOBROADCAST:
>> + __flush_s1_tlb_range_op(vae1, start, pages, stride,
>> + asid, tlb_level);
>> + break;
>> + case TLBF_NOWALKCACHE | TLBF_NOBROADCAST:
>> + __flush_s1_tlb_range_op(vale1, start, pages, stride,
>> + asid, tlb_level);
>> + break;
>> + }
>>
>> - mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
>> + if (!(flags & TLBF_NONOTIFY))
>> + mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
>> +
>> + if (!(flags & TLBF_NOSYNC)) {
>> + if (!(flags & TLBF_NOBROADCAST))
>> + dsb(ish);
>> + else
>> + dsb(nsh);
>> + }
>> }
>>
>> static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> @@ -566,24 +601,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> unsigned long stride, int tlb_level,
>> tlbf_t flags)
>> {
>> - __flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
>> - tlb_level, flags);
>> - dsb(ish);
>> -}
>> -
>> -static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
>> - unsigned long addr)
>> -{
>> - unsigned long asid;
>> -
>> - addr = round_down(addr, CONT_PTE_SIZE);
>> -
>> - dsb(nshst);
>> - asid = ASID(vma->vm_mm);
>> - __flush_s1_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, 3);
>> - mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr,
>> - addr + CONT_PTE_SIZE);
>> - dsb(nsh);
>> + start = round_down(start, stride);
>> + end = round_up(end, stride);
>> + ___flush_tlb_range(vma, start, end, stride, tlb_level, flags);
>> }
>>
>> static inline void flush_tlb_range(struct vm_area_struct *vma,
>> @@ -636,7 +656,10 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
>> static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
>> struct mm_struct *mm, unsigned long start, unsigned long end)
>> {
>> - __flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, 3, TLBF_NOWALKCACHE);
>> + struct vm_area_struct vma = { .vm_mm = mm, .vm_flags = 0 };
>> +
>> + __flush_tlb_range(&vma, start, end, PAGE_SIZE, 3,
>> + TLBF_NOWALKCACHE | TLBF_NOSYNC);
>> }
>>
>> static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
>> diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
>> index 1a12bb728ee1..ec17a0e70415 100644
>> --- a/arch/arm64/mm/contpte.c
>> +++ b/arch/arm64/mm/contpte.c
>> @@ -527,8 +527,8 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
>> * eliding the trailing DSB applies here.
>> */
>> addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
>> - __flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE,
>> - PAGE_SIZE, 3, TLBF_NOWALKCACHE);
>> + __flush_tlb_range(vma, addr, addr + CONT_PTE_SIZE,
>> + PAGE_SIZE, 3, TLBF_NOWALKCACHE | TLBF_NOSYNC);
>> }
>>
>> return young;
>> @@ -623,7 +623,10 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
>> __ptep_set_access_flags(vma, addr, ptep, entry, 0);
>>
>> if (dirty)
>> - local_flush_tlb_contpte(vma, start_addr);
>> + __flush_tlb_range(vma, start_addr,
>> + start_addr + CONT_PTE_SIZE,
>> + PAGE_SIZE, 3,
>> + TLBF_NOWALKCACHE | TLBF_NOBROADCAST);
>
>
>
> local_flush_tlb_contpte used round_down(addr, CONT_PTE_SIZE) but then flush_tlb_range uses
> round_down(start, stride) which is PAGE_SIZE and end up giving smaller alignment than
> required ?
But start_addr (and therefore start_addr + CONT_PTE_SIZE) are already
CONT_PTE_SIZE aligned and CONT_PTE_SIZE is guaranteed to be bigger than
PAGE_SIZE so rounding won't change the values. The same region should be
invalidated (with the same stride) before and after this change, unless I'm
mistaken.
Thanks,
Ryan
>
> --
> Linu Cherian.
>
>
Powered by blists - more mailing lists