linux-kernel - Re: [PATCH v1 1/4] mm: convert FPB_IGNORE_* into FPB_HONOR

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5c3428c6-25be-4a94-811a-6bb6718f6c58@arm.com>
Date: Sat, 28 Jun 2025 09:07:00 +0530
From: Dev Jain <dev.jain@....com>
To: David Hildenbrand <david@...hat.com>, linux-kernel@...r.kernel.org
Cc: linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
 "Liam R. Howlett" <Liam.Howlett@...cle.com>,
 Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
 Vlastimil Babka <vbabka@...e.cz>, Jann Horn <jannh@...gle.com>,
 Mike Rapoport <rppt@...nel.org>, Suren Baghdasaryan <surenb@...gle.com>,
 Michal Hocko <mhocko@...e.com>, Zi Yan <ziy@...dia.com>,
 Matthew Brost <matthew.brost@...el.com>,
 Joshua Hahn <joshua.hahnjy@...il.com>, Rakie Kim <rakie.kim@...com>,
 Byungchul Park <byungchul@...com>, Gregory Price <gourry@...rry.net>,
 Ying Huang <ying.huang@...ux.alibaba.com>,
 Alistair Popple <apopple@...dia.com>, Pedro Falcato <pfalcato@...e.de>,
 Rik van Riel <riel@...riel.com>, Harry Yoo <harry.yoo@...cle.com>,
 Ryan Roberts <ryan.roberts@....com>
Subject: Re: [PATCH v1 1/4] mm: convert FPB_IGNORE_* into FPB_HONOR_*


On 27/06/25 5:25 pm, David Hildenbrand wrote:
> Honoring these PTE bits is the exception, so let's invert the meaning.
>
> With this change, most callers don't have to pass any flags.
>
> No functional change intended.

FWIW I had proposed this kind of change earlier to Ryan (CCed) and
he pointed out: "Doesn't that argument apply in reverse if you want
to ignore something new in future?

By default we are comparing all the bits in the pte when determining the batch.
The flags request to ignore certain bits. If we want to ignore extra bits in
future, we add new flags and the existing callers don't need to be updated.

With your approach, every time you want the ability to ignore extra bits, you
need to update all the callers, which is error prone."

>
> Signed-off-by: David Hildenbrand <david@...hat.com>
> ---
>   mm/internal.h  | 16 ++++++++--------
>   mm/madvise.c   |  3 +--
>   mm/memory.c    | 11 +++++------
>   mm/mempolicy.c |  4 +---
>   mm/mlock.c     |  3 +--
>   mm/mremap.c    |  3 +--
>   mm/rmap.c      |  3 +--
>   7 files changed, 18 insertions(+), 25 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index e84217e27778d..9690c75063881 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -202,17 +202,17 @@ static inline void vma_close(struct vm_area_struct *vma)
>   /* Flags for folio_pte_batch(). */
>   typedef int __bitwise fpb_t;
>   
> -/* Compare PTEs after pte_mkclean(), ignoring the dirty bit. */
> -#define FPB_IGNORE_DIRTY		((__force fpb_t)BIT(0))
> +/* Compare PTEs honoring the dirty bit. */
> +#define FPB_HONOR_DIRTY		((__force fpb_t)BIT(0))
>   
> -/* Compare PTEs after pte_clear_soft_dirty(), ignoring the soft-dirty bit. */
> -#define FPB_IGNORE_SOFT_DIRTY		((__force fpb_t)BIT(1))
> +/* Compare PTEs honoring the soft-dirty bit. */
> +#define FPB_HONOR_SOFT_DIRTY		((__force fpb_t)BIT(1))
>   
>   static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
>   {
> -	if (flags & FPB_IGNORE_DIRTY)
> +	if (!(flags & FPB_HONOR_DIRTY))
>   		pte = pte_mkclean(pte);
> -	if (likely(flags & FPB_IGNORE_SOFT_DIRTY))
> +	if (likely(!(flags & FPB_HONOR_SOFT_DIRTY)))
>   		pte = pte_clear_soft_dirty(pte);
>   	return pte_wrprotect(pte_mkold(pte));
>   }
> @@ -236,8 +236,8 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)
>    * pages of the same large folio.
>    *
>    * All PTEs inside a PTE batch have the same PTE bits set, excluding the PFN,
> - * the accessed bit, writable bit, dirty bit (with FPB_IGNORE_DIRTY) and
> - * soft-dirty bit (with FPB_IGNORE_SOFT_DIRTY).
> + * the accessed bit, writable bit, dirty bit (unless FPB_HONOR_DIRTY is set) and
> + * soft-dirty bit (unless FPB_HONOR_SOFT_DIRTY is set).
>    *
>    * start_ptep must map any page of the folio. max_nr must be at least one and
>    * must be limited by the caller so scanning cannot exceed a single page table.
> diff --git a/mm/madvise.c b/mm/madvise.c
> index e61e32b2cd91f..661bb743d2216 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -347,10 +347,9 @@ static inline int madvise_folio_pte_batch(unsigned long addr, unsigned long end,
>   					  pte_t pte, bool *any_young,
>   					  bool *any_dirty)
>   {
> -	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>   	int max_nr = (end - addr) / PAGE_SIZE;
>   
> -	return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
> +	return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL,
>   			       any_young, any_dirty);
>   }
>   
> diff --git a/mm/memory.c b/mm/memory.c
> index 0f9b32a20e5b7..ab2d6c1425691 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -990,10 +990,10 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
>   	 * by keeping the batching logic separate.
>   	 */
>   	if (unlikely(!*prealloc && folio_test_large(folio) && max_nr != 1)) {
> -		if (src_vma->vm_flags & VM_SHARED)
> -			flags |= FPB_IGNORE_DIRTY;
> -		if (!vma_soft_dirty_enabled(src_vma))
> -			flags |= FPB_IGNORE_SOFT_DIRTY;
> +		if (!(src_vma->vm_flags & VM_SHARED))
> +			flags |= FPB_HONOR_DIRTY;
> +		if (vma_soft_dirty_enabled(src_vma))
> +			flags |= FPB_HONOR_SOFT_DIRTY;
>   
>   		nr = folio_pte_batch(folio, addr, src_pte, pte, max_nr, flags,
>   				     &any_writable, NULL, NULL);
> @@ -1535,7 +1535,6 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
>   		struct zap_details *details, int *rss, bool *force_flush,
>   		bool *force_break, bool *any_skipped)
>   {
> -	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>   	struct mm_struct *mm = tlb->mm;
>   	struct folio *folio;
>   	struct page *page;
> @@ -1565,7 +1564,7 @@ static inline int zap_present_ptes(struct mmu_gather *tlb,
>   	 * by keeping the batching logic separate.
>   	 */
>   	if (unlikely(folio_test_large(folio) && max_nr != 1)) {
> -		nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, fpb_flags,
> +		nr = folio_pte_batch(folio, addr, pte, ptent, max_nr, 0,
>   				     NULL, NULL, NULL);
>   
>   		zap_present_folio_ptes(tlb, vma, folio, page, pte, ptent, nr,
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 1ff7b2174eb77..2a25eedc3b1c0 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -675,7 +675,6 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk)
>   static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
>   			unsigned long end, struct mm_walk *walk)
>   {
> -	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>   	struct vm_area_struct *vma = walk->vma;
>   	struct folio *folio;
>   	struct queue_pages *qp = walk->private;
> @@ -713,8 +712,7 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr,
>   			continue;
>   		if (folio_test_large(folio) && max_nr != 1)
>   			nr = folio_pte_batch(folio, addr, pte, ptent,
> -					     max_nr, fpb_flags,
> -					     NULL, NULL, NULL);
> +					     max_nr, 0, NULL, NULL, NULL);
>   		/*
>   		 * vm_normal_folio() filters out zero pages, but there might
>   		 * still be reserved folios to skip, perhaps in a VDSO.
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 3cb72b579ffd3..2238cdc5eb1c1 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -307,14 +307,13 @@ void munlock_folio(struct folio *folio)
>   static inline unsigned int folio_mlock_step(struct folio *folio,
>   		pte_t *pte, unsigned long addr, unsigned long end)
>   {
> -	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>   	unsigned int count = (end - addr) >> PAGE_SHIFT;
>   	pte_t ptent = ptep_get(pte);
>   
>   	if (!folio_test_large(folio))
>   		return 1;
>   
> -	return folio_pte_batch(folio, addr, pte, ptent, count, fpb_flags, NULL,
> +	return folio_pte_batch(folio, addr, pte, ptent, count, 0, NULL,
>   			       NULL, NULL);
>   }
>   
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 36585041c760d..d4d3ffc931502 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -173,7 +173,6 @@ static pte_t move_soft_dirty_pte(pte_t pte)
>   static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr,
>   		pte_t *ptep, pte_t pte, int max_nr)
>   {
> -	const fpb_t flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>   	struct folio *folio;
>   
>   	if (max_nr == 1)
> @@ -183,7 +182,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr
>   	if (!folio || !folio_test_large(folio))
>   		return 1;
>   
> -	return folio_pte_batch(folio, addr, ptep, pte, max_nr, flags, NULL,
> +	return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL,
>   			       NULL, NULL);
>   }
>   
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 3b74bb19c11dd..a29d7d29c7283 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1849,7 +1849,6 @@ void folio_remove_rmap_pud(struct folio *folio, struct page *page,
>   static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
>   			struct folio *folio, pte_t *ptep)
>   {
> -	const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY;
>   	int max_nr = folio_nr_pages(folio);
>   	pte_t pte = ptep_get(ptep);
>   
> @@ -1860,7 +1859,7 @@ static inline bool can_batch_unmap_folio_ptes(unsigned long addr,
>   	if (pte_pfn(pte) != folio_pfn(folio))
>   		return false;
>   
> -	return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL,
> +	return folio_pte_batch(folio, addr, ptep, pte, max_nr, 0, NULL,
>   			       NULL, NULL) == max_nr;
>   }
>