[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251014151126.87589-1-pedrodemargomes@gmail.com>
Date: Tue, 14 Oct 2025 12:11:26 -0300
From: Pedro Demarchi Gomes <pedrodemargomes@...il.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
David Hildenbrand <david@...hat.com>
Cc: Xu Xin <xu.xin16@....com.cn>,
Chengming Zhou <chengming.zhou@...ux.dev>,
linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
Pedro Demarchi Gomes <pedrodemargomes@...il.com>
Subject: [PATCH v2] ksm: use range-walk function to jump over holes in scan_get_next_rmap_item
Currently, scan_get_next_rmap_item() walks every page address in a VMA
to locate mergeable pages. This becomes highly inefficient when scanning
large virtual memory areas that contain mostly unmapped regions.
This patch replaces the per-address lookup with a range walk using
walk_page_range(). The range walker allows KSM to skip over entire
unmapped holes in a VMA, avoiding unnecessary lookups.
This problem was previously discussed in [1].
Changes since v1 [2]:
- Use pmd_entry to walk page range
- Use cond_resched inside pmd_entry()
- walk_page_range returns page+folio
[1] https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/
[2] https://lore.kernel.org/linux-mm/20251014055828.124522-1-pedrodemargomes@gmail.com/
Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@...il.com>
---
mm/ksm.c | 144 ++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 94 insertions(+), 50 deletions(-)
diff --git a/mm/ksm.c b/mm/ksm.c
index 3aed0478fdce..adb0267a1b7d 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2455,14 +2455,82 @@ static bool should_skip_rmap_item(struct folio *folio,
return true;
}
+struct ksm_walk_private {
+ struct page *page;
+ struct folio *folio;
+ struct vm_area_struct *vma;
+};
+
+static int ksm_walk_test(unsigned long addr, unsigned long next, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+
+ if (!vma->anon_vma || !(vma->vm_flags & VM_MERGEABLE)) {
+ ksm_scan.address = vma->vm_end;
+ return 1;
+ }
+ return 0;
+}
+
+static int ksm_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct mm_struct *mm = walk->mm;
+ struct vm_area_struct *vma = walk->vma;
+ struct ksm_walk_private *private = (struct ksm_walk_private *) walk->private;
+ struct folio *folio;
+ pte_t *start_pte, *pte, ptent;
+ spinlock_t *ptl;
+ int ret = 0;
+
+ start_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!start_pte) {
+ ksm_scan.address = end;
+ return 0;
+ }
+
+ for (; addr < end; pte++, addr += PAGE_SIZE) {
+ ptent = ptep_get(pte);
+ struct page *page = vm_normal_page(vma, addr, ptent);
+ ksm_scan.address = addr;
+
+ if (ksm_test_exit(mm)) {
+ ret = 1;
+ break;
+ }
+
+ if (!page)
+ continue;
+
+ folio = page_folio(page);
+ if (folio_is_zone_device(folio) || !folio_test_anon(folio))
+ continue;
+
+ ret = 1;
+ folio_get(folio);
+ private->page = page;
+ private->folio = folio;
+ private->vma = vma;
+ break;
+ }
+ pte_unmap_unlock(start_pte, ptl);
+
+ cond_resched();
+ return ret;
+}
+
+struct mm_walk_ops walk_ops = {
+ .pmd_entry = ksm_pmd_entry,
+ .test_walk = ksm_walk_test,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
{
struct mm_struct *mm;
struct ksm_mm_slot *mm_slot;
struct mm_slot *slot;
- struct vm_area_struct *vma;
struct ksm_rmap_item *rmap_item;
- struct vma_iterator vmi;
int nid;
if (list_empty(&ksm_mm_head.slot.mm_node))
@@ -2527,64 +2595,40 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page)
slot = &mm_slot->slot;
mm = slot->mm;
- vma_iter_init(&vmi, mm, ksm_scan.address);
mmap_read_lock(mm);
if (ksm_test_exit(mm))
goto no_vmas;
- for_each_vma(vmi, vma) {
- if (!(vma->vm_flags & VM_MERGEABLE))
- continue;
- if (ksm_scan.address < vma->vm_start)
- ksm_scan.address = vma->vm_start;
- if (!vma->anon_vma)
- ksm_scan.address = vma->vm_end;
-
- while (ksm_scan.address < vma->vm_end) {
- struct page *tmp_page = NULL;
- struct folio_walk fw;
- struct folio *folio;
+get_page:
+ struct ksm_walk_private walk_private = {
+ .page = NULL,
+ .folio = NULL,
+ .vma = NULL
+ };
- if (ksm_test_exit(mm))
- break;
+ walk_page_range(mm, ksm_scan.address, -1, &walk_ops, (void *) &walk_private);
+ if (walk_private.page) {
+ flush_anon_page(walk_private.vma, walk_private.page, ksm_scan.address);
+ flush_dcache_page(walk_private.page);
+ rmap_item = get_next_rmap_item(mm_slot,
+ ksm_scan.rmap_list, ksm_scan.address);
+ if (rmap_item) {
+ ksm_scan.rmap_list =
+ &rmap_item->rmap_list;
- folio = folio_walk_start(&fw, vma, ksm_scan.address, 0);
- if (folio) {
- if (!folio_is_zone_device(folio) &&
- folio_test_anon(folio)) {
- folio_get(folio);
- tmp_page = fw.page;
- }
- folio_walk_end(&fw, vma);
+ ksm_scan.address += PAGE_SIZE;
+ if (should_skip_rmap_item(walk_private.folio, rmap_item)) {
+ folio_put(walk_private.folio);
+ goto get_page;
}
- if (tmp_page) {
- flush_anon_page(vma, tmp_page, ksm_scan.address);
- flush_dcache_page(tmp_page);
- rmap_item = get_next_rmap_item(mm_slot,
- ksm_scan.rmap_list, ksm_scan.address);
- if (rmap_item) {
- ksm_scan.rmap_list =
- &rmap_item->rmap_list;
-
- if (should_skip_rmap_item(folio, rmap_item)) {
- folio_put(folio);
- goto next_page;
- }
-
- ksm_scan.address += PAGE_SIZE;
- *page = tmp_page;
- } else {
- folio_put(folio);
- }
- mmap_read_unlock(mm);
- return rmap_item;
- }
-next_page:
- ksm_scan.address += PAGE_SIZE;
- cond_resched();
+ *page = walk_private.page;
+ } else {
+ folio_put(walk_private.folio);
}
+ mmap_read_unlock(mm);
+ return rmap_item;
}
if (ksm_test_exit(mm)) {
--
2.43.0
Powered by blists - more mailing lists