lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db460533-74ea-4246-84c4-2922f6b64b5a@arm.com>
Date: Mon, 12 Jan 2026 11:52:26 +0000
From: Ryan Roberts <ryan.roberts@....com>
To: Linu Cherian <linu.cherian@....com>
Cc: Will Deacon <will@...nel.org>, Ard Biesheuvel <ardb@...nel.org>,
 Catalin Marinas <catalin.marinas@....com>,
 Mark Rutland <mark.rutland@....com>,
 Linus Torvalds <torvalds@...ux-foundation.org>,
 Oliver Upton <oliver.upton@...ux.dev>, Marc Zyngier <maz@...nel.org>,
 Dev Jain <dev.jain@....com>, linux-arm-kernel@...ts.infradead.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1 11/13] arm64: mm: More flags for __flush_tlb_range()

On 06/01/2026 15:28, Linu Cherian wrote:
> Ryan,
> 
> On Tue, Dec 16, 2025 at 02:45:56PM +0000, Ryan Roberts wrote:
>> Refactor function variants with "_nosync", "_local" and "_nonotify" into
>> a single __always_inline implementation that takes flags and rely on
>> constant folding to select the parts that are actually needed at any
>> given callsite, based on the provided flags.
>>
>> Flags all live in the tlbf_t (TLB flags) type; TLBF_NONE (0) continues
>> to provide the strongest semantics (i.e. evict from walk cache,
>> broadcast, synchronise and notify). Each flag reduces the strength in
>> some way; TLBF_NONOTIFY, TLBF_NOSYNC and TLBF_NOBROADCAST are added to
>> complement the existing TLBF_NOWALKCACHE.
>>
>> The result is a clearer, simpler, more powerful API.
>>
>> Signed-off-by: Ryan Roberts <ryan.roberts@....com>
>> ---
>>  arch/arm64/include/asm/tlbflush.h | 101 ++++++++++++++++++------------
>>  arch/arm64/mm/contpte.c           |   9 ++-
>>  2 files changed, 68 insertions(+), 42 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index 9a37a6a014dc..ee747e66bbef 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -107,6 +107,12 @@ static inline unsigned long get_trans_granule(void)
>>  
>>  typedef void (*tlbi_op)(u64 arg);
>>  
>> +static __always_inline void vae1(u64 arg)
>> +{
>> +	__tlbi(vae1, arg);
>> +	__tlbi_user(vae1, arg);
>> +}
>> +
>>  static __always_inline void vae1is(u64 arg)
>>  {
>>  	__tlbi(vae1is, arg);
>> @@ -276,7 +282,10 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
>>   *		no invalidation may take place. In the case where the level
>>   *		cannot be easily determined, the value TLBI_TTL_UNKNOWN will
>>   *		perform a non-hinted invalidation. flags may be TLBF_NONE (0) or
>> - *		TLBF_NOWALKCACHE (elide eviction of walk cache entries).
>> + *		any combination of TLBF_NOWALKCACHE (elide eviction of walk
>> + *		cache entries), TLBF_NONOTIFY (don't call mmu notifiers),
>> + *		TLBF_NOSYNC (don't issue trailing dsb) and TLBF_NOBROADCAST
>> + *		(only perform the invalidation for the local cpu).
>>   *
>>   *	local_flush_tlb_page(vma, addr)
>>   *		Local variant of flush_tlb_page().  Stale TLB entries may
>> @@ -286,12 +295,6 @@ static inline void __tlbi_level(tlbi_op op, u64 addr, u32 level)
>>   *		Same as local_flush_tlb_page() except MMU notifier will not be
>>   *		called.
>>   *
>> - *	local_flush_tlb_contpte(vma, addr)
>> - *		Invalidate the virtual-address range
>> - *		'[addr, addr+CONT_PTE_SIZE)' mapped with contpte on local CPU
>> - *		for the user address space corresponding to 'vma->mm'.  Stale
>> - *		TLB entries may remain in remote CPUs.
>> - *
>>   *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
>>   *	on top of these routines, since that is our interface to the mmu_gather
>>   *	API as used by munmap() and friends.
>> @@ -436,6 +439,12 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
>>   *    operations can only span an even number of pages. We save this for last to
>>   *    ensure 64KB start alignment is maintained for the LPA2 case.
>>   */
>> +static __always_inline void rvae1(u64 arg)
>> +{
>> +	__tlbi(rvae1, arg);
>> +	__tlbi_user(rvae1, arg);
>> +}
>> +
>>  static __always_inline void rvae1is(u64 arg)
>>  {
>>  	__tlbi(rvae1is, arg);
>> @@ -531,16 +540,18 @@ static inline bool __flush_tlb_range_limit_excess(unsigned long pages,
>>  typedef unsigned __bitwise tlbf_t;
>>  #define TLBF_NONE		((__force tlbf_t)0)
>>  #define TLBF_NOWALKCACHE	((__force tlbf_t)BIT(0))
>> +#define TLBF_NOSYNC		((__force tlbf_t)BIT(1))
>> +#define TLBF_NONOTIFY		((__force tlbf_t)BIT(2))
>> +#define TLBF_NOBROADCAST	((__force tlbf_t)BIT(3))
>>  
>> -static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
>> -				     unsigned long start, unsigned long end,
>> -				     unsigned long stride, int tlb_level,
>> -				     tlbf_t flags)
>> +static __always_inline void ___flush_tlb_range(struct vm_area_struct *vma,
>> +					unsigned long start, unsigned long end,
>> +					unsigned long stride, int tlb_level,
>> +					tlbf_t flags)
>>  {
>> +	struct mm_struct *mm = vma->vm_mm;
>>  	unsigned long asid, pages;
>>  
>> -	start = round_down(start, stride);
>> -	end = round_up(end, stride);
>>  	pages = (end - start) >> PAGE_SHIFT;
>>  
>>  	if (__flush_tlb_range_limit_excess(pages, stride)) {
>> @@ -548,17 +559,41 @@ static inline void __flush_tlb_range_nosync(struct mm_struct *mm,
>>  		return;
>>  	}
>>  
>> -	dsb(ishst);
>> +	if (!(flags & TLBF_NOBROADCAST))
>> +		dsb(ishst);
>> +	else
>> +		dsb(nshst);
>> +
>>  	asid = ASID(mm);
>>  
>> -	if (flags & TLBF_NOWALKCACHE)
>> -		__flush_s1_tlb_range_op(vale1is, start, pages, stride,
>> -				     asid, tlb_level);
>> -	else
>> +	switch (flags & (TLBF_NOWALKCACHE | TLBF_NOBROADCAST)) {
>> +	case TLBF_NONE:
>>  		__flush_s1_tlb_range_op(vae1is, start, pages, stride,
>> -				     asid, tlb_level);
>> +					asid, tlb_level);
>> +		break;
>> +	case TLBF_NOWALKCACHE:
>> +		__flush_s1_tlb_range_op(vale1is, start, pages, stride,
>> +					asid, tlb_level);
>> +		break;
>> +	case TLBF_NOBROADCAST:
>> +		__flush_s1_tlb_range_op(vae1, start, pages, stride,
>> +					asid, tlb_level);
>> +		break;
>> +	case TLBF_NOWALKCACHE | TLBF_NOBROADCAST:
>> +		__flush_s1_tlb_range_op(vale1, start, pages, stride,
>> +					asid, tlb_level);
>> +		break;
>> +	}
>>  
>> -	mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
>> +	if (!(flags & TLBF_NONOTIFY))
>> +		mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
>> +
>> +	if (!(flags & TLBF_NOSYNC)) {
>> +		if (!(flags & TLBF_NOBROADCAST))
>> +			dsb(ish);
>> +		else
>> +			dsb(nsh);
>> +	}
>>  }
>>  
>>  static inline void __flush_tlb_range(struct vm_area_struct *vma,
>> @@ -566,24 +601,9 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
>>  				     unsigned long stride, int tlb_level,
>>  				     tlbf_t flags)
>>  {
>> -	__flush_tlb_range_nosync(vma->vm_mm, start, end, stride,
>> -				 tlb_level, flags);
>> -	dsb(ish);
>> -}
>> -
>> -static inline void local_flush_tlb_contpte(struct vm_area_struct *vma,
>> -					   unsigned long addr)
>> -{
>> -	unsigned long asid;
>> -
>> -	addr = round_down(addr, CONT_PTE_SIZE);
>> -
>> -	dsb(nshst);
>> -	asid = ASID(vma->vm_mm);
>> -	__flush_s1_tlb_range_op(vale1, addr, CONT_PTES, PAGE_SIZE, asid, 3);
>> -	mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, addr,
>> -						    addr + CONT_PTE_SIZE);
>> -	dsb(nsh);
>> +	start = round_down(start, stride);
>> +	end = round_up(end, stride);
>> +	___flush_tlb_range(vma, start, end, stride, tlb_level, flags);
>>  }
>>  
>>  static inline void flush_tlb_range(struct vm_area_struct *vma,
>> @@ -636,7 +656,10 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
>>  static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
>>  		struct mm_struct *mm, unsigned long start, unsigned long end)
>>  {
>> -	__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, 3, TLBF_NOWALKCACHE);
>> +	struct vm_area_struct vma = { .vm_mm = mm, .vm_flags = 0 };
>> +
>> +	__flush_tlb_range(&vma, start, end, PAGE_SIZE, 3,
>> +			  TLBF_NOWALKCACHE | TLBF_NOSYNC);
>>  }
>>  
>>  static inline bool __pte_flags_need_flush(ptdesc_t oldval, ptdesc_t newval)
>> diff --git a/arch/arm64/mm/contpte.c b/arch/arm64/mm/contpte.c
>> index 1a12bb728ee1..ec17a0e70415 100644
>> --- a/arch/arm64/mm/contpte.c
>> +++ b/arch/arm64/mm/contpte.c
>> @@ -527,8 +527,8 @@ int contpte_ptep_clear_flush_young(struct vm_area_struct *vma,
>>  		 * eliding the trailing DSB applies here.
>>  		 */
>>  		addr = ALIGN_DOWN(addr, CONT_PTE_SIZE);
>> -		__flush_tlb_range_nosync(vma->vm_mm, addr, addr + CONT_PTE_SIZE,
>> -					 PAGE_SIZE, 3, TLBF_NOWALKCACHE);
>> +		__flush_tlb_range(vma, addr, addr + CONT_PTE_SIZE,
>> +				  PAGE_SIZE, 3, TLBF_NOWALKCACHE | TLBF_NOSYNC);
>>  	}
>>  
>>  	return young;
>> @@ -623,7 +623,10 @@ int contpte_ptep_set_access_flags(struct vm_area_struct *vma,
>>  			__ptep_set_access_flags(vma, addr, ptep, entry, 0);
>>  
>>  		if (dirty)
>> -			local_flush_tlb_contpte(vma, start_addr);
>> +			__flush_tlb_range(vma, start_addr,
>> +					  start_addr + CONT_PTE_SIZE,
>> +					  PAGE_SIZE, 3,
>> +					  TLBF_NOWALKCACHE | TLBF_NOBROADCAST);
> 
> 
>  
> local_flush_tlb_contpte used round_down(addr, CONT_PTE_SIZE) but then flush_tlb_range uses
> round_down(start, stride) which is PAGE_SIZE and end up giving smaller alignment than 
> required ?

But start_addr (and therefore start_addr + CONT_PTE_SIZE) are already
CONT_PTE_SIZE aligned and CONT_PTE_SIZE is guaranteed to be bigger than
PAGE_SIZE so rounding won't change the values. The same region should be
invalidated (with the same stride) before and after this change, unless I'm
mistaken.

Thanks,
Ryan


> 
> --
> Linu Cherian.
> 
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ