[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251002073255.14867-3-lance.yang@linux.dev>
Date: Thu, 2 Oct 2025 15:32:55 +0800
From: Lance Yang <lance.yang@...ux.dev>
To: akpm@...ux-foundation.org,
david@...hat.com,
lorenzo.stoakes@...cle.com
Cc: ziy@...dia.com,
baolin.wang@...ux.alibaba.com,
Liam.Howlett@...cle.com,
npache@...hat.com,
ryan.roberts@....com,
dev.jain@....com,
baohua@...nel.org,
ioworker0@...il.com,
richard.weiyang@...il.com,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org,
Lance Yang <lance.yang@...ux.dev>
Subject: [PATCH mm-new 2/2] mm/khugepaged: merge PTE scanning logic into a new helper
From: Lance Yang <lance.yang@...ux.dev>
As David suggested, the PTE scanning logic in hpage_collapse_scan_pmd()
and __collapse_huge_page_isolate() was almost duplicated.
This patch cleans things up by moving all the common PTE checking logic
into a new shared helper, thp_collapse_check_pte().
Suggested-by: David Hildenbrand <david@...hat.com>
Signed-off-by: Lance Yang <lance.yang@...ux.dev>
---
mm/khugepaged.c | 167 ++++++++++++++++++++++++++++++------------------
1 file changed, 104 insertions(+), 63 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 808523f92c7b..2a897cfb1d03 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -61,6 +61,12 @@ enum scan_result {
SCAN_PAGE_FILLED,
};
+enum pte_check_result {
+ PTE_CHECK_SUCCEED,
+ PTE_CHECK_CONTINUE,
+ PTE_CHECK_FAIL,
+};
+
#define CREATE_TRACE_POINTS
#include <trace/events/huge_memory.h>
@@ -533,6 +539,87 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
}
}
+/*
+ * thp_collapse_check_pte - Check if a PTE is suitable for THP collapse
+ * @pte: PTE to check
+ * @vma: VMA the PTE belongs to
+ * @cc: Collapse control settings
+ * @scan_swap_pte: Allow scanning of swap PTEs if true
+ * @none_or_zero: Counter for none/zero PTEs (must be non-NULL)
+ * @unmapped: Counter for swap PTEs (must be non-NULL if scan_swap_pte
+ * is true)
+ * @scan_result: Used to return the failure reason (SCAN_*) on a
+ * PTE_CHECK_FAIL return. Must be non-NULL
+ *
+ * Returns:
+ * PTE_CHECK_SUCCEED - Valid PTE, proceed with collapse
+ * PTE_CHECK_CONTINUE - Skip this none/zero PTE but continue scanning
+ * PTE_CHECK_FAIL - Abort collapse scan
+ */
+static inline int thp_collapse_check_pte(pte_t pte, struct vm_area_struct *vma,
+ struct collapse_control *cc, bool scan_swap_pte,
+ int *none_or_zero, int *unmapped, int *scan_result)
+{
+ VM_BUG_ON(!none_or_zero || !scan_result);
+ VM_BUG_ON(scan_swap_pte && !unmapped);
+
+ if (pte_none(pte) || is_zero_pfn(pte_pfn(pte))) {
+ (*none_or_zero)++;
+ if (!userfaultfd_armed(vma) &&
+ (!cc->is_khugepaged ||
+ *none_or_zero <= khugepaged_max_ptes_none)) {
+ return PTE_CHECK_CONTINUE;
+ } else {
+ *scan_result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ return PTE_CHECK_FAIL;
+ }
+ } else if (!pte_present(pte)) {
+ if (!scan_swap_pte) {
+ *scan_result = SCAN_PTE_NON_PRESENT;
+ return PTE_CHECK_FAIL;
+ }
+
+ if (non_swap_entry(pte_to_swp_entry(pte))) {
+ *scan_result = SCAN_PTE_NON_PRESENT;
+ return PTE_CHECK_FAIL;
+ }
+
+ (*unmapped)++;
+ if (!cc->is_khugepaged ||
+ *unmapped <= khugepaged_max_ptes_swap) {
+ /*
+ * Always be strict with uffd-wp
+ * enabled swap entries. Please see
+ * comment below for pte_uffd_wp().
+ */
+ if (pte_swp_uffd_wp(pte)) {
+ *scan_result = SCAN_PTE_UFFD_WP;
+ return PTE_CHECK_FAIL;
+ }
+ return PTE_CHECK_CONTINUE;
+ } else {
+ *scan_result = SCAN_EXCEED_SWAP_PTE;
+ count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
+ return PTE_CHECK_FAIL;
+ }
+ } else if (pte_uffd_wp(pte)) {
+ /*
+ * Don't collapse the page if any of the small
+ * PTEs are armed with uffd write protection.
+ * Here we can also mark the new huge pmd as
+ * write protected if any of the small ones is
+ * marked but that could bring unknown
+ * userfault messages that falls outside of
+ * the registered range. So, just be simple.
+ */
+ *scan_result = SCAN_PTE_UFFD_WP;
+ return PTE_CHECK_FAIL;
+ }
+
+ return PTE_CHECK_SUCCEED;
+}
+
static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long start_addr,
pte_t *pte,
@@ -544,28 +631,20 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
unsigned long addr = start_addr;
pte_t *_pte;
int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
+ int pte_check_res;
for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, addr += PAGE_SIZE) {
pte_t pteval = ptep_get(_pte);
- if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- ++none_or_zero;
- if (!userfaultfd_armed(vma) &&
- (!cc->is_khugepaged ||
- none_or_zero <= khugepaged_max_ptes_none)) {
- continue;
- } else {
- result = SCAN_EXCEED_NONE_PTE;
- count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
- goto out;
- }
- } else if (!pte_present(pteval)) {
- result = SCAN_PTE_NON_PRESENT;
- goto out;
- } else if (pte_uffd_wp(pteval)) {
- result = SCAN_PTE_UFFD_WP;
+ pte_check_res = thp_collapse_check_pte(
+ pteval, vma, cc, false, /* scan_swap_pte = false */
+ &none_or_zero, NULL, &result);
+
+ if (pte_check_res == PTE_CHECK_CONTINUE)
+ continue;
+ else if (pte_check_res == PTE_CHECK_FAIL)
goto out;
- }
+
page = vm_normal_page(vma, addr, pteval);
if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL;
@@ -1260,6 +1339,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
unsigned long addr;
spinlock_t *ptl;
int node = NUMA_NO_NODE, unmapped = 0;
+ int pte_check_res;
VM_BUG_ON(start_addr & ~HPAGE_PMD_MASK);
@@ -1278,54 +1358,15 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, addr += PAGE_SIZE) {
pte_t pteval = ptep_get(_pte);
- if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- ++none_or_zero;
- if (!userfaultfd_armed(vma) &&
- (!cc->is_khugepaged ||
- none_or_zero <= khugepaged_max_ptes_none)) {
- continue;
- } else {
- result = SCAN_EXCEED_NONE_PTE;
- count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
- goto out_unmap;
- }
- } else if (!pte_present(pteval)) {
- if (non_swap_entry(pte_to_swp_entry(pteval))) {
- result = SCAN_PTE_NON_PRESENT;
- goto out_unmap;
- }
- ++unmapped;
- if (!cc->is_khugepaged ||
- unmapped <= khugepaged_max_ptes_swap) {
- /*
- * Always be strict with uffd-wp
- * enabled swap entries. Please see
- * comment below for pte_uffd_wp().
- */
- if (pte_swp_uffd_wp(pteval)) {
- result = SCAN_PTE_UFFD_WP;
- goto out_unmap;
- }
- continue;
- } else {
- result = SCAN_EXCEED_SWAP_PTE;
- count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
- goto out_unmap;
- }
- } else if (pte_uffd_wp(pteval)) {
- /*
- * Don't collapse the page if any of the small
- * PTEs are armed with uffd write protection.
- * Here we can also mark the new huge pmd as
- * write protected if any of the small ones is
- * marked but that could bring unknown
- * userfault messages that falls outside of
- * the registered range. So, just be simple.
- */
- result = SCAN_PTE_UFFD_WP;
+ pte_check_res = thp_collapse_check_pte(
+ pteval, vma, cc, true, /* scan_swap_pte = true */
+ &none_or_zero, &unmapped, &result);
+
+ if (pte_check_res == PTE_CHECK_CONTINUE)
+ continue;
+ else if (pte_check_res == PTE_CHECK_FAIL)
goto out_unmap;
- }
page = vm_normal_page(vma, addr, pteval);
if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
--
2.49.0
Powered by blists - more mailing lists