[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <49cf11ce2b91bb4648fc41c78fc4babf73879ee4.1761288179.git.lorenzo.stoakes@oracle.com>
Date: Fri, 24 Oct 2025 08:41:23 +0100
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Andrew Morton <akpm@...ux-foundation.org>
Cc: Christian Borntraeger <borntraeger@...ux.ibm.com>,
Janosch Frank <frankja@...ux.ibm.com>,
Claudio Imbrenda <imbrenda@...ux.ibm.com>,
David Hildenbrand <david@...hat.com>,
Alexander Gordeev <agordeev@...ux.ibm.com>,
Gerald Schaefer <gerald.schaefer@...ux.ibm.com>,
Heiko Carstens <hca@...ux.ibm.com>, Vasily Gorbik <gor@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>, Zi Yan <ziy@...dia.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
"Liam R . Howlett" <Liam.Howlett@...cle.com>,
Nico Pache <npache@...hat.com>, Ryan Roberts <ryan.roberts@....com>,
Dev Jain <dev.jain@....com>, Barry Song <baohua@...nel.org>,
Lance Yang <lance.yang@...ux.dev>,
Kemeng Shi <shikemeng@...weicloud.com>,
Kairui Song <kasong@...cent.com>, Nhat Pham <nphamcs@...il.com>,
Baoquan He <bhe@...hat.com>, Chris Li <chrisl@...nel.org>,
Peter Xu <peterx@...hat.com>, Matthew Wilcox <willy@...radead.org>,
Jason Gunthorpe <jgg@...pe.ca>, Leon Romanovsky <leon@...nel.org>,
Muchun Song <muchun.song@...ux.dev>,
Oscar Salvador <osalvador@...e.de>, Vlastimil Babka <vbabka@...e.cz>,
Mike Rapoport <rppt@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>,
Jann Horn <jannh@...gle.com>, Matthew Brost <matthew.brost@...el.com>,
Joshua Hahn <joshua.hahnjy@...il.com>, Rakie Kim <rakie.kim@...com>,
Byungchul Park <byungchul@...com>, Gregory Price <gourry@...rry.net>,
Ying Huang <ying.huang@...ux.alibaba.com>,
Alistair Popple <apopple@...dia.com>, Pedro Falcato <pfalcato@...e.de>,
Pasha Tatashin <pasha.tatashin@...een.com>,
Rik van Riel <riel@...riel.com>, Harry Yoo <harry.yoo@...cle.com>,
kvm@...r.kernel.org, linux-s390@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org
Subject: [RFC PATCH 07/12] mm: introduce is_huge_pmd() and use where appropriate
The 'swap' PMD case is confusing as only non-present entries (i.e. non-swap
'swap' entries) are valid at PMD level- currently migration entries and
device private entries.
We repeatedly perform checks of the form is_swap_pmd() || pmd_trans_huge()
which is itself confusing - it implies that swap entries at PMD level exist
and are different from huge entries.
Address this confusion by introduced is_huge_pmd() which checks for either
case. Sadly due to header dependency issues (huge_mm.h is included very
early on in headers and cannot really rely on much else) we cannot assert
here that is_pmd_non_present_folio_entry() is true for the input PMD if
non-present.
However since these are the only valid, handled cases the function is still
achieving what it intends to do.
We then replace all instances of is_swap_pmd() || pmd_trans_huge() with
is_huge_pmd() invocations and adjust logic accordingly to accommodate
this.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
---
include/linux/huge_mm.h | 40 ++++++++++++++++++++++++++++++++++++----
mm/huge_memory.c | 3 ++-
mm/memory.c | 4 ++--
mm/mprotect.c | 2 +-
mm/mremap.c | 2 +-
5 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 7698b3542c4f..892cb825dfc7 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -416,10 +416,37 @@ void reparent_deferred_split_queue(struct mem_cgroup *memcg);
void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long address, bool freeze);
+/**
+ * is_huge_pmd() - Is this PMD either a huge PMD entry or a huge non-present
+ * entry?
+ * @pmd: The PMD to check.
+ *
+ * A huge PMD entry is a non-empty entry which is present and marked huge or a
+ * huge non-present entry. This check be performed without the appropriate locks
+ * held, in which case the condition should be rechecked after they are
+ * acquired.
+ *
+ * Returns: true if this PMD is huge, false otherwise.
+ */
+static inline bool is_huge_pmd(pmd_t pmd)
+{
+ if (pmd_present(pmd)) {
+ return pmd_trans_huge(pmd);
+ } else if (!pmd_none(pmd)) {
+ /*
+ * Non-present PMDs must be valid huge non-present entries. We
+ * cannot assert that here due to header dependency issues.
+ */
+ return true;
+ }
+
+ return false;
+}
+
#define split_huge_pmd(__vma, __pmd, __address) \
do { \
pmd_t *____pmd = (__pmd); \
- if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \
+ if (is_huge_pmd(*____pmd)) \
__split_huge_pmd(__vma, __pmd, __address, \
false); \
} while (0)
@@ -466,10 +493,10 @@ static inline int is_swap_pmd(pmd_t pmd)
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
{
- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd))
+ if (is_huge_pmd(*pmd))
return __pmd_trans_huge_lock(pmd, vma);
- else
- return NULL;
+
+ return NULL;
}
static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
struct vm_area_struct *vma)
@@ -734,6 +761,11 @@ static inline struct folio *get_persistent_huge_zero_folio(void)
{
return NULL;
}
+
+static inline bool is_huge_pmd(pmd_t pmd)
+{
+ return false;
+}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline int split_folio_to_list_to_order(struct folio *folio,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a59718f85ec3..e0fe1c3e01c9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2799,8 +2799,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
{
spinlock_t *ptl;
+
ptl = pmd_lock(vma->vm_mm, pmd);
- if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)))
+ if (likely(is_huge_pmd(*pmd)))
return ptl;
spin_unlock(ptl);
return NULL;
diff --git a/mm/memory.c b/mm/memory.c
index 83828548ef5f..cc163060933f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1373,7 +1373,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
src_pmd = pmd_offset(src_pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)) {
+ if (is_huge_pmd(*src_pmd)) {
int err;
VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma);
@@ -1921,7 +1921,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) {
+ if (is_huge_pmd(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE)
__split_huge_pmd(vma, pmd, addr, false);
else if (zap_huge_pmd(tlb, vma, pmd, addr)) {
diff --git a/mm/mprotect.c b/mm/mprotect.c
index e25ac9835cc2..c1d640758b60 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -522,7 +522,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
goto next;
_pmd = pmdp_get_lockless(pmd);
- if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd)) {
+ if (is_huge_pmd(_pmd)) {
if ((next - addr != HPAGE_PMD_SIZE) ||
pgtable_split_needed(vma, cp_flags)) {
__split_huge_pmd(vma, pmd, addr, false);
diff --git a/mm/mremap.c b/mm/mremap.c
index f01c74add990..070ba5a85824 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -850,7 +850,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc)
if (!new_pmd)
break;
again:
- if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) {
+ if (is_huge_pmd(*old_pmd)) {
if (extent == HPAGE_PMD_SIZE &&
move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd))
continue;
--
2.51.0
Powered by blists - more mailing lists