lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250211111326.14295-9-dev.jain@arm.com>
Date: Tue, 11 Feb 2025 16:43:17 +0530
From: Dev Jain <dev.jain@....com>
To: akpm@...ux-foundation.org,
	david@...hat.com,
	willy@...radead.org,
	kirill.shutemov@...ux.intel.com
Cc: npache@...hat.com,
	ryan.roberts@....com,
	anshuman.khandual@....com,
	catalin.marinas@....com,
	cl@...two.org,
	vbabka@...e.cz,
	mhocko@...e.com,
	apopple@...dia.com,
	dave.hansen@...ux.intel.com,
	will@...nel.org,
	baohua@...nel.org,
	jack@...e.cz,
	srivatsa@...il.mit.edu,
	haowenchao22@...il.com,
	hughd@...gle.com,
	aneesh.kumar@...nel.org,
	yang@...amperecomputing.com,
	peterx@...hat.com,
	ioworker0@...il.com,
	wangkefeng.wang@...wei.com,
	ziy@...dia.com,
	jglisse@...gle.com,
	surenb@...gle.com,
	vishal.moola@...il.com,
	zokeefe@...gle.com,
	zhengqi.arch@...edance.com,
	jhubbard@...dia.com,
	21cnbao@...il.com,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	Dev Jain <dev.jain@....com>
Subject: [PATCH v2 08/17] khugepaged: Introduce vma_collapse_anon_folio()

Similar to PMD collapse, take the write locks to stop pagetable walking.
Copy page contents, clear the PTEs, remove folio pins, and (try to) unmap the
old folios. Set the PTEs to the new folio using the set_ptes() API.

Signed-off-by: Dev Jain <dev.jain@....com>
---
 mm/khugepaged.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index fbfd8a78ef51..a674014b6563 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1217,6 +1217,96 @@ static int vma_collapse_anon_folio_pmd(struct mm_struct *mm, unsigned long addre
 	return result;
 }
 
+/* Similar to the PMD case except we have to batch set the PTEs */
+static int vma_collapse_anon_folio(struct mm_struct *mm, unsigned long address,
+		struct vm_area_struct *vma, struct collapse_control *cc, pmd_t *pmd,
+		struct folio *folio, int order)
+{
+	LIST_HEAD(compound_pagelist);
+	spinlock_t *pmd_ptl, *pte_ptl;
+	int result = SCAN_FAIL;
+	struct mmu_notifier_range range;
+	pmd_t _pmd;
+	pte_t *pte;
+	pte_t entry;
+	int nr_pages = folio_nr_pages(folio);
+	unsigned long haddress = address & HPAGE_PMD_MASK;
+
+	VM_BUG_ON(address & ((PAGE_SIZE << order) - 1));;
+
+	result = hugepage_vma_revalidate(mm, address, true, &vma, order, cc);
+	if (result != SCAN_SUCCEED)
+		goto out;
+	result = check_pmd_still_valid(mm, address, pmd);
+	if (result != SCAN_SUCCEED)
+		goto out;
+
+	vma_start_write(vma);
+	anon_vma_lock_write(vma->anon_vma);
+
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, haddress,
+				haddress + HPAGE_PMD_SIZE);
+	mmu_notifier_invalidate_range_start(&range);
+
+	pmd_ptl = pmd_lock(mm, pmd);
+	_pmd = pmdp_collapse_flush(vma, haddress, pmd);
+	spin_unlock(pmd_ptl);
+	mmu_notifier_invalidate_range_end(&range);
+	tlb_remove_table_sync_one();
+
+	pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
+	if (pte) {
+		result = __collapse_huge_page_isolate(vma, address, pte, cc,
+						      &compound_pagelist, order);
+		spin_unlock(pte_ptl);
+	} else {
+		result = SCAN_PMD_NULL;
+	}
+
+	if (unlikely(result != SCAN_SUCCEED)) {
+		if (pte)
+			pte_unmap(pte);
+		spin_lock(pmd_ptl);
+		BUG_ON(!pmd_none(*pmd));
+		pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+		spin_unlock(pmd_ptl);
+		anon_vma_unlock_write(vma->anon_vma);
+		goto out;
+	}
+
+	anon_vma_unlock_write(vma->anon_vma);
+
+	result = __collapse_huge_page_copy(pte, folio, pmd, *pmd,
+					   vma, address, pte_ptl,
+					   &compound_pagelist, order);
+	pte_unmap(pte);
+	if (unlikely(result != SCAN_SUCCEED))
+		goto out;
+
+	__folio_mark_uptodate(folio);
+	entry = mk_pte(&folio->page, vma->vm_page_prot);
+	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+
+	spin_lock(pte_ptl);
+	folio_ref_add(folio, nr_pages - 1);
+	folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
+	folio_add_lru_vma(folio, vma);
+	set_ptes(mm, address, pte, entry, nr_pages);
+	spin_unlock(pte_ptl);
+	spin_lock(pmd_ptl);
+
+	/* See pmd_install() */
+	smp_wmb();
+	BUG_ON(!pmd_none(*pmd));
+	pmd_populate(mm, pmd, pmd_pgtable(_pmd));
+	update_mmu_cache_pmd(vma, haddress, pmd);
+	spin_unlock(pmd_ptl);
+
+	result = SCAN_SUCCEED;
+out:
+	return result;
+}
+
 static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 			      int referenced, int unmapped, int order,
 			      struct collapse_control *cc)
@@ -1276,6 +1366,8 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 
 	if (order == HPAGE_PMD_ORDER)
 		result = vma_collapse_anon_folio_pmd(mm, address, vma, cc, pmd, folio);
+	else
+		result = vma_collapse_anon_folio(mm, address, vma, cc, pmd, folio, order);
 
 	mmap_write_unlock(mm);
 
-- 
2.30.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ