[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f95f7dc3201d62cdbe56ea397201974f5a586bb4.1761288179.git.lorenzo.stoakes@oracle.com>
Date: Fri, 24 Oct 2025 08:41:18 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: Christian Borntraeger <borntraeger@...ux.ibm.com>,
Janosch Frank <frankja@...ux.ibm.com>,
Claudio Imbrenda <imbrenda@...ux.ibm.com>,
David Hildenbrand <david@...hat.com>,
Alexander Gordeev <agordeev@...ux.ibm.com>,
Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
Heiko Carstens <hca@...ux.ibm.com>, Vasily Gorbik <gor@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>, Zi Yan <ziy@...dia.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Nico Pache <npache@...hat.com>, Ryan Roberts <ryan.roberts@....com>,
Dev Jain <dev.jain@....com>, Barry Song <baohua@...nel.org>,
Lance Yang <lance.yang@...ux.dev>,
Kemeng Shi <shikemeng@...weicloud.com>,
Kairui Song <kasong@...cent.com>, Nhat Pham <nphamcs@...il.com>,
Baoquan He <bhe@...hat.com>, Chris Li <chrisl@...nel.org>,
Peter Xu <peterx@...hat.com>, Matthew Wilcox <willy@...radead.org>,
Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>,
Muchun Song <muchun.song@...ux.dev>,
Oscar Salvador <osalvador@...e.de>, Vlastimil Babka <vbabka@...e.cz>,
Mike Rapoport <rppt@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Jann Horn <jannh@...gle.com>, Matthew Brost <matthew.brost@...el.com>,
Joshua Hahn <joshua.hahnjy@...il.com>, Rakie Kim <rakie.kim@...com>,
Byungchul Park <byungchul@...com>, Gregory Price <gourry@...rry.net>,
Ying Huang <ying.huang@...ux.alibaba.com>,
Alistair Popple <apopple@...dia.com>, Pedro Falcato <pfalcato@...e.de>,
Pasha Tatashin <pasha.tatashin@...een.com>,
Rik van Riel <riel@...riel.com>, Harry Yoo <harry.yoo@...cle.com>,
kvm@...r.kernel.org, linux-s390@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org
Subject: [RFC PATCH 02/12] mm: avoid unnecessary uses of is_swap_pte()
There's an established convention in the kernel that we treat PTEs as
containing swap entries (and the unfortunately named non-swap swap entries)
should they be neither empty (i.e. pte_none() evaluating true) nor present
(i.e. pte_present() evaluating true).
However, there is some inconsistency in how this is applied, as we also
have the is_swap_pte() helper which explicitly performs this check:
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
return !pte_none(pte) && !pte_present(pte);
}
As this represents a predicate, and it's logical to assume that in order to
establish that a PTE entry can correctly be manipulated as a swap/non-swap
entry, this predicate seems as if it must first be checked.
But we instead, we far more often utilise the established convention of
checking pte_none() / pte_present() before operating on entries as if they
were swap/non-swap.
This patch works towards correcting this inconsistency by removing all uses
of is_swap_pte() where we are already in a position where we perform
pte_none()/pte_present() checks anyway or otherwise it is clearly logical
to do so.
We also take advantage of the fact that pte_swp_uffd_wp() is only set on
swap entries.
Additionally, update comments referencing to is_swap_pte() and
non_swap_entry().
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
fs/proc/task_mmu.c | 49 ++++++++++++++++++++++++-----------
include/linux/userfaultfd_k.h | 9 ++++---
mm/hugetlb.c | 6 ++---
mm/internal.h | 6 ++---
mm/khugepaged.c | 29 +++++++++++----------
mm/migrate.c | 2 +-
mm/mprotect.c | 43 ++++++++++++++----------------
mm/mremap.c | 7 +++--
mm/page_table_check.c | 13 ++++++----
mm/page_vma_mapped.c | 27 ++++++++++---------
10 files changed, 106 insertions(+), 85 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index a7c8501266f4..5475acfa1a33 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1017,7 +1017,9 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
young = pte_young(ptent);
dirty = pte_dirty(ptent);
present = true;
- } else if (is_swap_pte(ptent)) {
+ } else if (pte_none(ptent)) {
+ smaps_pte_hole_lookup(addr, walk);
+ } else {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (!non_swap_entry(swpent)) {
@@ -1038,9 +1040,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
present = true;
page = pfn_swap_entry_to_page(swpent);
}
- } else {
- smaps_pte_hole_lookup(addr, walk);
- return;
}
if (!page)
@@ -1611,6 +1610,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
*/
pte_t ptent = ptep_get(pte);
+ if (pte_none(ptent))
+ return;
+
if (pte_present(ptent)) {
pte_t old_pte;
@@ -1620,7 +1622,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
+ } else {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
@@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
struct page *page = NULL;
struct folio *folio;
+ if (pte_none(pte))
+ goto out;
+
if (pte_present(pte)) {
if (pm->show_pfn)
frame = pte_pfn(pte);
@@ -1932,8 +1937,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SOFT_DIRTY;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
- } else if (is_swap_pte(pte)) {
+ } else {
swp_entry_t entry;
+
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
if (pte_swp_uffd_wp(pte))
@@ -1941,6 +1947,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
entry = pte_to_swp_entry(pte);
if (pm->show_pfn) {
pgoff_t offset;
+
/*
* For PFN swap offsets, keeping the offset field
* to be PFN only to be compatible with old smaps.
@@ -1969,6 +1976,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
__folio_page_mapped_exclusively(folio, page))
flags |= PM_MMAP_EXCLUSIVE;
}
+
+out:
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -2310,12 +2319,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
- unsigned long categories = 0;
+ unsigned long categories;
+
+ if (pte_none(pte))
+ return 0;
if (pte_present(pte)) {
struct page *page;
- categories |= PAGE_IS_PRESENT;
+ categories = PAGE_IS_PRESENT;
+
if (!pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
@@ -2329,10 +2342,11 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
swp_entry_t swp;
- categories |= PAGE_IS_SWAPPED;
+ categories = PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
@@ -2360,12 +2374,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
old_pte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_mkuffd_wp(old_pte);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
- ptent = pte_swp_mkuffd_wp(ptent);
- set_pte_at(vma->vm_mm, addr, pte, ptent);
- } else {
+ } else if (pte_none(ptent)) {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
+ } else {
+ ptent = pte_swp_mkuffd_wp(ptent);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
@@ -2434,6 +2448,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pte_none(pte))
+ return categories;
+
/*
* According to pagemap_hugetlb_range(), file-backed HugeTLB
* page cannot be swapped. So PAGE_IS_FILE is not checked for
@@ -2441,6 +2458,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
*/
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
+
if (!huge_pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
@@ -2449,8 +2467,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
categories |= PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
if (pte_swp_soft_dirty(pte))
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 4c65adff2e7a..a362e1619b95 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -462,13 +462,14 @@ static inline bool pte_marker_uffd_wp(pte_t pte)
static inline bool pte_swp_uffd_wp_any(pte_t pte)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
- if (!is_swap_pte(pte))
- return false;
+ swp_entry_t entry;
+ if (pte_present(pte))
+ return false;
if (pte_swp_uffd_wp(pte))
return true;
-
- if (pte_marker_uffd_wp(pte))
+ entry = pte_to_swp_entry(pte);
+ if (pte_marker_entry_uffd_wp(entry))
return true;
#endif
return false;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 86e672fcb305..4510029761ed 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5798,13 +5798,13 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
- if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
+ if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) {
huge_pte_clear(mm, new_addr, dst_pte, sz);
- else {
+ } else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = huge_pte_clear_uffd_wp(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_clear_uffd_wp(pte);
}
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
diff --git a/mm/internal.h b/mm/internal.h
index cbd3d897b16c..b855a4412878 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -325,8 +325,7 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
/**
* pte_move_swp_offset - Move the swap entry offset field of a swap pte
* forward or backward by delta
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- * non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry
* @delta: The direction and the offset we are moving; forward if delta
* is positive; backward if delta is negative
*
@@ -352,8 +351,7 @@ static inline pte_t pte_move_swp_offset(pte_t pte, long delta)
/**
* pte_next_swp_offset - Increment the swap entry offset field of a swap pte.
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- * non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry.
*
* Increments the swap offset, while maintaining all other fields, including
* swap type, and any swp pte bits. The resulting pte is returned.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 5b7276bc14b1..2079f270196f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1019,7 +1019,8 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
}
vmf.orig_pte = ptep_get_lockless(pte);
- if (!is_swap_pte(vmf.orig_pte))
+ if (pte_none(vmf.orig_pte) ||
+ pte_present(vmf.orig_pte))
continue;
vmf.pte = pte;
@@ -1276,7 +1277,19 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, addr += PAGE_SIZE) {
pte_t pteval = ptep_get(_pte);
- if (is_swap_pte(pteval)) {
+ if (pte_none_or_zero(pteval)) {
+ ++none_or_zero;
+ if (!userfaultfd_armed(vma) &&
+ (!cc->is_khugepaged ||
+ none_or_zero <= khugepaged_max_ptes_none)) {
+ continue;
+ } else {
+ result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ goto out_unmap;
+ }
+ }
+ if (!pte_present(pteval)) {
++unmapped;
if (!cc->is_khugepaged ||
unmapped <= khugepaged_max_ptes_swap) {
@@ -1296,18 +1309,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
goto out_unmap;
}
}
- if (pte_none_or_zero(pteval)) {
- ++none_or_zero;
- if (!userfaultfd_armed(vma) &&
- (!cc->is_khugepaged ||
- none_or_zero <= khugepaged_max_ptes_none)) {
- continue;
- } else {
- result = SCAN_EXCEED_NONE_PTE;
- count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
- goto out_unmap;
- }
- }
if (pte_uffd_wp(pteval)) {
/*
* Don't collapse the page if any of the small
diff --git a/mm/migrate.c b/mm/migrate.c
index 4324fc01bfce..69d8b4a9db25 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -492,7 +492,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
pte = ptep_get(ptep);
pte_unmap(ptep);
- if (!is_swap_pte(pte))
+ if (pte_none(pte) || pte_present(pte))
goto out;
entry = pte_to_swp_entry(pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c090bc063a31..e25ac9835cc2 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -345,7 +345,26 @@ static long change_pte_range(struct mmu_gather *tlb,
prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
pages += nr_ptes;
- } else if (is_swap_pte(oldpte)) {
+ } else if (pte_none(oldpte)) {
+ /*
+ * Nobody plays with any none ptes besides
+ * userfaultfd when applying the protections.
+ */
+ if (likely(!uffd_wp))
+ continue;
+
+ if (userfaultfd_wp_use_markers(vma)) {
+ /*
+ * For file-backed mem, we need to be able to
+ * wr-protect a none pte, because even if the
+ * pte is none, the page/swap cache could
+ * exist. Doing that by install a marker.
+ */
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ pages++;
+ }
+ } else {
swp_entry_t entry = pte_to_swp_entry(oldpte);
pte_t newpte;
@@ -406,28 +425,6 @@ static long change_pte_range(struct mmu_gather *tlb,
set_pte_at(vma->vm_mm, addr, pte, newpte);
pages++;
}
- } else {
- /* It must be an none page, or what else?.. */
- WARN_ON_ONCE(!pte_none(oldpte));
-
- /*
- * Nobody plays with any none ptes besides
- * userfaultfd when applying the protections.
- */
- if (likely(!uffd_wp))
- continue;
-
- if (userfaultfd_wp_use_markers(vma)) {
- /*
- * For file-backed mem, we need to be able to
- * wr-protect a none pte, because even if the
- * pte is none, the page/swap cache could
- * exist. Doing that by install a marker.
- */
- set_pte_at(vma->vm_mm, addr, pte,
- make_pte_marker(PTE_MARKER_UFFD_WP));
- pages++;
- }
}
} while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
diff --git a/mm/mremap.c b/mm/mremap.c
index bd7314898ec5..f01c74add990 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -158,6 +158,9 @@ static void drop_rmap_locks(struct vm_area_struct *vma)
static pte_t move_soft_dirty_pte(pte_t pte)
{
+ if (pte_none(pte))
+ return pte;
+
/*
* Set soft dirty bit so we can notice
* in userspace the ptes were moved.
@@ -165,7 +168,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
#ifdef CONFIG_MEM_SOFT_DIRTY
if (pte_present(pte))
pte = pte_mksoft_dirty(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_mksoft_dirty(pte);
#endif
return pte;
@@ -294,7 +297,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = pte_clear_uffd_wp(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_clear_uffd_wp(pte);
}
set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 4eeca782b888..43f75d2f7c36 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -185,12 +185,15 @@ static inline bool swap_cached_writable(swp_entry_t entry)
is_writable_migration_entry(entry);
}
-static inline void page_table_check_pte_flags(pte_t pte)
+static void page_table_check_pte_flags(pte_t pte)
{
- if (pte_present(pte) && pte_uffd_wp(pte))
- WARN_ON_ONCE(pte_write(pte));
- else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte))
- WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte)));
+ if (pte_present(pte)) {
+ WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte));
+ } else if (pte_swp_uffd_wp(pte)) {
+ const swp_entry_t entry = pte_to_swp_entry(pte);
+
+ WARN_ON_ONCE(swap_cached_writable(entry));
+ }
}
void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 75a8fbb788b7..2e5ac6572630 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
spinlock_t **ptlp)
{
+ bool is_migration;
pte_t ptent;
if (pvmw->flags & PVMW_SYNC) {
@@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
return !!pvmw->pte;
}
+ is_migration = pvmw->flags & PVMW_MIGRATION;
again:
/*
* It is important to return the ptl corresponding to pte,
@@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
ptent = ptep_get(pvmw->pte);
- if (pvmw->flags & PVMW_MIGRATION) {
- if (!is_swap_pte(ptent))
+ if (pte_none(ptent)) {
+ return false;
+ } else if (pte_present(ptent)) {
+ if (is_migration)
return false;
- } else if (is_swap_pte(ptent)) {
+ } else if (!is_migration) {
swp_entry_t entry;
+
/*
* Handle un-addressable ZONE_DEVICE memory.
*
@@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
if (!is_device_private_entry(entry) &&
!is_device_exclusive_entry(entry))
return false;
- } else if (!pte_present(ptent)) {
- return false;
}
spin_lock(*ptlp);
if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
@@ -107,27 +110,23 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
pte_t ptent = ptep_get(pvmw->pte);
if (pvmw->flags & PVMW_MIGRATION) {
- swp_entry_t entry = pte_to_swp_entry_or_zero(ptent);
+ const swp_entry_t entry = pte_to_swp_entry_or_zero(ptent);
if (!is_migration_entry(entry))
return false;
pfn = swp_offset_pfn(entry);
- } else if (is_swap_pte(ptent)) {
- swp_entry_t entry;
+ } else if (pte_present(ptent)) {
+ pfn = pte_pfn(ptent);
+ } else {
+ const swp_entry_t entry = pte_to_swp_entry(ptent);
/* Handle un-addressable ZONE_DEVICE memory */
- entry = pte_to_swp_entry(ptent);
if (!is_device_private_entry(entry) &&
!is_device_exclusive_entry(entry))
return false;
pfn = swp_offset_pfn(entry);
- } else {
- if (!pte_present(ptent))
- return false;
-
- pfn = pte_pfn(ptent);
}
if ((pfn + pte_nr - 1) < pvmw->pfn)
--
2.51.0
Powered by blists - more mailing lists