lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230218002819.1486479-20-jthoughton@google.com>
Date:   Sat, 18 Feb 2023 00:27:52 +0000
From:   James Houghton <jthoughton@...gle.com>
To:     Mike Kravetz <mike.kravetz@...cle.com>,
        Muchun Song <songmuchun@...edance.com>,
        Peter Xu <peterx@...hat.com>,
        Andrew Morton <akpm@...ux-foundation.org>
Cc:     David Hildenbrand <david@...hat.com>,
        David Rientjes <rientjes@...gle.com>,
        Axel Rasmussen <axelrasmussen@...gle.com>,
        Mina Almasry <almasrymina@...gle.com>,
        "Zach O'Keefe" <zokeefe@...gle.com>,
        Manish Mishra <manish.mishra@...anix.com>,
        Naoya Horiguchi <naoya.horiguchi@....com>,
        "Dr . David Alan Gilbert" <dgilbert@...hat.com>,
        "Matthew Wilcox (Oracle)" <willy@...radead.org>,
        Vlastimil Babka <vbabka@...e.cz>,
        Baolin Wang <baolin.wang@...ux.alibaba.com>,
        Miaohe Lin <linmiaohe@...wei.com>,
        Yang Shi <shy828301@...il.com>,
        Frank van der Linden <fvdl@...gle.com>,
        Jiaqi Yan <jiaqiyan@...gle.com>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org,
        James Houghton <jthoughton@...gle.com>
Subject: [PATCH v2 19/46] hugetlb: add HGM support to hugetlb_change_protection

The main change here is to do a high-granularity walk and pulling the
shift from the walk (not from the hstate).

Signed-off-by: James Houghton <jthoughton@...gle.com>

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ecf1a28dbaaa..7321c6602d6f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6900,15 +6900,15 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long start = address;
-	pte_t *ptep;
 	pte_t pte;
 	struct hstate *h = hstate_vma(vma);
-	long pages = 0, psize = huge_page_size(h);
+	long base_pages = 0, psize = huge_page_size(h);
 	bool shared_pmd = false;
 	struct mmu_notifier_range range;
 	unsigned long last_addr_mask;
 	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
 	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+	struct hugetlb_pte hpte;
 
 	/*
 	 * In the case of shared PMDs, the area to flush could be beyond
@@ -6926,39 +6926,43 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 	hugetlb_vma_lock_write(vma);
 	i_mmap_lock_write(vma->vm_file->f_mapping);
 	last_addr_mask = hugetlb_mask_last_page(h);
-	for (; address < end; address += psize) {
+	while (address < end) {
 		spinlock_t *ptl;
-		ptep = hugetlb_walk(vma, address, psize);
-		if (!ptep) {
+		if (hugetlb_full_walk(&hpte, vma, address)) {
 			if (!uffd_wp) {
-				address |= last_addr_mask;
+				address = (address | last_addr_mask) + psize;
 				continue;
 			}
 			/*
 			 * Userfaultfd wr-protect requires pgtable
 			 * pre-allocations to install pte markers.
+			 *
+			 * Use hugetlb_full_walk_alloc to allocate
+			 * the hstate-level PTE.
 			 */
-			ptep = huge_pte_alloc(mm, vma, address, psize);
-			if (!ptep) {
-				pages = -ENOMEM;
+			if (hugetlb_full_walk_alloc(&hpte, vma,
+						    address, psize)) {
+				base_pages = -ENOMEM;
 				break;
 			}
 		}
-		ptl = huge_pte_lock(h, mm, ptep);
-		if (huge_pmd_unshare(mm, vma, address, ptep)) {
+
+		ptl = hugetlb_pte_lock(&hpte);
+		if (hugetlb_pte_size(&hpte) == psize &&
+		    huge_pmd_unshare(mm, vma, address, hpte.ptep)) {
 			/*
 			 * When uffd-wp is enabled on the vma, unshare
 			 * shouldn't happen at all.  Warn about it if it
 			 * happened due to some reason.
 			 */
 			WARN_ON_ONCE(uffd_wp || uffd_wp_resolve);
-			pages++;
+			base_pages += psize / PAGE_SIZE;
 			spin_unlock(ptl);
 			shared_pmd = true;
-			address |= last_addr_mask;
+			address = (address | last_addr_mask) + psize;
 			continue;
 		}
-		pte = huge_ptep_get(ptep);
+		pte = huge_ptep_get(hpte.ptep);
 		if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
 			/* Nothing to do. */
 		} else if (unlikely(is_hugetlb_entry_migration(pte))) {
@@ -6974,7 +6978,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 					entry = make_readable_migration_entry(
 								swp_offset(entry));
 				newpte = swp_entry_to_pte(entry);
-				pages++;
+				base_pages += hugetlb_pte_size(&hpte) / PAGE_SIZE;
 			}
 
 			if (uffd_wp)
@@ -6982,34 +6986,49 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 			else if (uffd_wp_resolve)
 				newpte = pte_swp_clear_uffd_wp(newpte);
 			if (!pte_same(pte, newpte))
-				set_huge_pte_at(mm, address, ptep, newpte);
+				set_huge_pte_at(mm, address, hpte.ptep, newpte);
 		} else if (unlikely(is_pte_marker(pte))) {
 			/* No other markers apply for now. */
 			WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
 			if (uffd_wp_resolve)
 				/* Safe to modify directly (non-present->none). */
-				huge_pte_clear(mm, address, ptep, psize);
+				huge_pte_clear(mm, address, hpte.ptep,
+						hugetlb_pte_size(&hpte));
 		} else if (!huge_pte_none(pte)) {
 			pte_t old_pte;
-			unsigned int shift = huge_page_shift(hstate_vma(vma));
+			unsigned int shift = hpte.shift;
+
+			if (unlikely(!hugetlb_pte_present_leaf(&hpte, pte))) {
+				/*
+				 * Someone split the PTE from under us, so retry
+				 * the walk,
+				 */
+				spin_unlock(ptl);
+				continue;
+			}
 
-			old_pte = huge_ptep_modify_prot_start(vma, address, ptep);
+			old_pte = huge_ptep_modify_prot_start(
+					vma, address, hpte.ptep);
 			pte = huge_pte_modify(old_pte, newprot);
-			pte = arch_make_huge_pte(pte, shift, vma->vm_flags);
+			pte = arch_make_huge_pte(
+					pte, shift, vma->vm_flags);
 			if (uffd_wp)
 				pte = huge_pte_mkuffd_wp(pte);
 			else if (uffd_wp_resolve)
 				pte = huge_pte_clear_uffd_wp(pte);
-			huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte);
-			pages++;
+			huge_ptep_modify_prot_commit(
+					vma, address, hpte.ptep,
+					old_pte, pte);
+			base_pages += hugetlb_pte_size(&hpte) / PAGE_SIZE;
 		} else {
 			/* None pte */
 			if (unlikely(uffd_wp))
 				/* Safe to modify directly (none->non-present). */
-				set_huge_pte_at(mm, address, ptep,
+				set_huge_pte_at(mm, address, hpte.ptep,
 						make_pte_marker(PTE_MARKER_UFFD_WP));
 		}
 		spin_unlock(ptl);
+		address += hugetlb_pte_size(&hpte);
 	}
 	/*
 	 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
@@ -7032,7 +7051,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
 	hugetlb_vma_unlock_write(vma);
 	mmu_notifier_invalidate_range_end(&range);
 
-	return pages > 0 ? (pages << h->order) : pages;
+	return base_pages;
 }
 
 /* Return true if reservation was successful, false otherwise.  */
-- 
2.39.2.637.g21b0678d19-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ