[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <85604399-40a8-4d13-809d-e5492e74a988@redhat.com>
Date: Wed, 29 Oct 2025 15:45:14 +0100
From: David Hildenbrand <david@...hat.com>
To: Pedro Demarchi Gomes <pedrodemargomes@...il.com>,
Andrew Morton <akpm@...ux-foundation.org>
Cc: Xu Xin <xu.xin16@....com.cn>, Chengming Zhou <chengming.zhou@...ux.dev>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/3] ksm: perform a range-walk in break_ksm
On 28.10.25 14:19, Pedro Demarchi Gomes wrote:
> Make break_ksm() receive an address range and change
> break_ksm_pmd_entry() to perform a range-walk and return the address of
> the first ksm page found.
>
> This change allows break_ksm() to skip unmapped regions instead of
> iterating every page address. When unmerging large sparse VMAs, this
> significantly reduces runtime, as confirmed by benchmark test (see
> cover letter).
Instead of referencing the cover letter, directly include the data here.
>
> Suggested-by: David Hildenbrand <david@...hat.com>
> Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@...il.com>
> ---
> mm/ksm.c | 88 +++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 49 insertions(+), 39 deletions(-)
>
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 2a9a7fd4c777..1d1ef0554c7c 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -607,34 +607,54 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
> return atomic_read(&mm->mm_users) == 0;
> }
>
> -static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next,
> +struct break_ksm_arg {
> + unsigned long addr;
> +};
You can avoid that by simply passing a pointer to addr.
> +
> +static int break_ksm_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end,
> struct mm_walk *walk)
> {
> - struct page *page = NULL;
> + struct page *page;
> spinlock_t *ptl;
> - pte_t *pte;
> - pte_t ptent;
> - int ret;
> + pte_t *start_ptep = NULL, *ptep, pte;
Is there a need to initialize start_ptep?
> + int ret = 0;
> + struct mm_struct *mm = walk->mm;
> + struct break_ksm_arg *private = (struct break_ksm_arg *) walk->private;
Prefer reverse xmas tree:
struct break_ksm_arg *private = (struct break_ksm_arg *) walk->private;
struct mm_struct *mm = walk->mm;
...
With the break_ksm_arg simplification you'd had
unsigned long *found_addr = (unsigned long *)walk->private;
>
> - pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
> - if (!pte)
> + if (ksm_test_exit(walk->mm))
> return 0;
> - ptent = ptep_get(pte);
> - if (pte_present(ptent)) {
> - page = vm_normal_page(walk->vma, addr, ptent);
> - } else if (!pte_none(ptent)) {
> - swp_entry_t entry = pte_to_swp_entry(ptent);
>
> - /*
> - * As KSM pages remain KSM pages until freed, no need to wait
> - * here for migration to end.
> - */
> - if (is_migration_entry(entry))
> - page = pfn_swap_entry_to_page(entry);
> + if (signal_pending(current))
> + return -ERESTARTSYS;
I assume that's not a problem for the other callsites that wouldn't
check this before.
> +
> + start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl);
> + if (!start_ptep)
> + return 0;
> +
> + for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) {
Best to declare pte and folio (see last mail) here in the loop:
pte_t pte = ptep_get(ptep);
struct folio *folio = NULL;
> + pte = ptep_get(ptep);
> + page = NULL;
> + if (pte_present(pte)) {
> + page = vm_normal_page(walk->vma, addr, pte);
> + } else if (!pte_none(pte)) {
> + swp_entry_t entry = pte_to_swp_entry(pte);
> +
> + /*
> + * As KSM pages remain KSM pages until freed, no need to wait
> + * here for migration to end.
> + */
> + if (is_migration_entry(entry))
> + page = pfn_swap_entry_to_page(entry);
> + }
> + /* return 1 if the page is an normal ksm page or KSM-placed zero page */
> + ret = (page && folio_test_ksm(page_folio(page))) || is_ksm_zero_pte(pte);
> + if (ret) {
> + private->addr = addr;
> + goto out_unlock;
> + }
I suggest you call "ret" "found" instead.
> }
> - /* return 1 if the page is an normal ksm page or KSM-placed zero page */
> - ret = (page && folio_test_ksm(page_folio(page))) || is_ksm_zero_pte(ptent);
> - pte_unmap_unlock(pte, ptl);
> +out_unlock:
> + pte_unmap_unlock(ptep, ptl);
> return ret;
> }
>
> @@ -661,9 +681,11 @@ static const struct mm_walk_ops break_ksm_lock_vma_ops = {
> * of the process that owns 'vma'. We also do not want to enforce
> * protection keys here anyway.
> */
> -static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
> +static int break_ksm(struct vm_area_struct *vma, unsigned long addr,
> + unsigned long end, bool lock_vma)
> {
> vm_fault_t ret = 0;
> + struct break_ksm_arg break_ksm_arg;
> const struct mm_walk_ops *ops = lock_vma ?
> &break_ksm_lock_vma_ops : &break_ksm_ops;
>
> @@ -671,11 +693,10 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_v
> int ksm_page;
>
> cond_resched();
> - ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
> - if (WARN_ON_ONCE(ksm_page < 0))
> + ksm_page = walk_page_range_vma(vma, addr, end, ops, &break_ksm_arg);
> + if (ksm_page <= 0)
> return ksm_page;
> - if (!ksm_page)
> - return 0;
> + addr = break_ksm_arg.addr;
> ret = handle_mm_fault(vma, addr,
> FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
> NULL);
> @@ -761,7 +782,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
> mmap_read_lock(mm);
> vma = find_mergeable_vma(mm, addr);
> if (vma)
> - break_ksm(vma, addr, false);
> + break_ksm(vma, addr, addr + 1, false);
Better to use addr + PAGE_SIZE
--
Cheers
David / dhildenb
Powered by blists - more mailing lists