[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250211111326.14295-9-dev.jain@arm.com>
Date: Tue, 11 Feb 2025 16:43:17 +0530
From: Dev Jain <dev.jain@....com>
To: akpm@...ux-foundation.org,
david@...hat.com,
willy@...radead.org,
kirill.shutemov@...ux.intel.com
Cc: npache@...hat.com,
ryan.roberts@....com,
anshuman.khandual@....com,
catalin.marinas@....com,
cl@...two.org,
vbabka@...e.cz,
mhocko@...e.com,
apopple@...dia.com,
dave.hansen@...ux.intel.com,
will@...nel.org,
baohua@...nel.org,
jack@...e.cz,
srivatsa@...il.mit.edu,
haowenchao22@...il.com,
hughd@...gle.com,
aneesh.kumar@...nel.org,
yang@...amperecomputing.com,
peterx@...hat.com,
ioworker0@...il.com,
wangkefeng.wang@...wei.com,
ziy@...dia.com,
jglisse@...gle.com,
surenb@...gle.com,
vishal.moola@...il.com,
zokeefe@...gle.com,
zhengqi.arch@...edance.com,
jhubbard@...dia.com,
21cnbao@...il.com,
linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
Dev Jain <dev.jain@....com>
Subject: [PATCH v2 08/17] khugepaged: Introduce vma_collapse_anon_folio()
Similar to PMD collapse, take the write locks to stop pagetable walking.
Copy page contents, clear the PTEs, remove folio pins, and (try to) unmap the
old folios. Set the PTEs to the new folio using the set_ptes() API.
Signed-off-by: Dev Jain <dev.jain@....com>
---
mm/khugepaged.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index fbfd8a78ef51..a674014b6563 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1217,6 +1217,96 @@ static int vma_collapse_anon_folio_pmd(struct mm_struct *mm, unsigned long addre
return result;
}
+/* Similar to the PMD case except we have to batch set the PTEs */
+static int vma_collapse_anon_folio(struct mm_struct *mm, unsigned long address,
+ struct vm_area_struct *vma, struct collapse_control *cc, pmd_t *pmd,
+ struct folio *folio, int order)
+{
+ LIST_HEAD(compound_pagelist);
+ spinlock_t *pmd_ptl, *pte_ptl;
+ int result = SCAN_FAIL;
+ struct mmu_notifier_range range;
+ pmd_t _pmd;
+ pte_t *pte;
+ pte_t entry;
+ int nr_pages = folio_nr_pages(folio);
+ unsigned long haddress = address & HPAGE_PMD_MASK;
+
+ VM_BUG_ON(address & ((PAGE_SIZE << order) - 1));;
+
+ result = hugepage_vma_revalidate(mm, address, true, &vma, order, cc);
+ if (result != SCAN_SUCCEED)
+ goto out;
+ result = check_pmd_still_valid(mm, address, pmd);
+ if (result != SCAN_SUCCEED)
+ goto out;
+
+ vma_start_write(vma);
+ anon_vma_lock_write(vma->anon_vma);
+
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddress,
+ haddress + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
+
+ pmd_ptl = pmd_lock(mm, pmd);
+ _pmd = pmdp_collapse_flush(vma, haddress, pmd);
+ spin_unlock(pmd_ptl);
+ mmu_notifier_invalidate_range_end(&range);
+ tlb_remove_table_sync_one();
+
+ pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
+ if (pte) {
+ result = __collapse_huge_page_isolate(vma, address, pte, cc,
+ &compound_pagelist, order);
+ spin_unlock(pte_ptl);
+ } else {
+ result = SCAN_PMD_NULL;
+ }
+
+ if (unlikely(result != SCAN_SUCCEED)) {
+ if (pte)
+ pte_unmap(pte);
+ spin_lock(pmd_ptl);
+ BUG_ON(!pmd_none(*pmd));
+ pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+ spin_unlock(pmd_ptl);
+ anon_vma_unlock_write(vma->anon_vma);
+ goto out;
+ }
+
+ anon_vma_unlock_write(vma->anon_vma);
+
+ result = __collapse_huge_page_copy(pte, folio, pmd, *pmd,
+ vma, address, pte_ptl,
+ &compound_pagelist, order);
+ pte_unmap(pte);
+ if (unlikely(result != SCAN_SUCCEED))
+ goto out;
+
+ __folio_mark_uptodate(folio);
+ entry = mk_pte(&folio->page, vma->vm_page_prot);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+
+ spin_lock(pte_ptl);
+ folio_ref_add(folio, nr_pages - 1);
+ folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
+ folio_add_lru_vma(folio, vma);
+ set_ptes(mm, address, pte, entry, nr_pages);
+ spin_unlock(pte_ptl);
+ spin_lock(pmd_ptl);
+
+ /* See pmd_install() */
+ smp_wmb();
+ BUG_ON(!pmd_none(*pmd));
+ pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+ update_mmu_cache_pmd(vma, haddress, pmd);
+ spin_unlock(pmd_ptl);
+
+ result = SCAN_SUCCEED;
+out:
+ return result;
+}
+
static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
int referenced, int unmapped, int order,
struct collapse_control *cc)
@@ -1276,6 +1366,8 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
if (order == HPAGE_PMD_ORDER)
result = vma_collapse_anon_folio_pmd(mm, address, vma, cc, pmd, folio);
+ else
+ result = vma_collapse_anon_folio(mm, address, vma, cc, pmd, folio, order);
mmap_write_unlock(mm);
--
2.30.2
Powered by blists - more mailing lists