lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230428004139.2899856-3-jiaqiyan@google.com>
Date:   Fri, 28 Apr 2023 00:41:34 +0000
From:   Jiaqi Yan <jiaqiyan@...gle.com>
To:     mike.kravetz@...cle.com, peterx@...hat.com, naoya.horiguchi@....com
Cc:     songmuchun@...edance.com, duenwen@...gle.com,
        axelrasmussen@...gle.com, jthoughton@...gle.com,
        rientjes@...gle.com, linmiaohe@...wei.com, shy828301@...il.com,
        baolin.wang@...ux.alibaba.com, wangkefeng.wang@...wei.com,
        akpm@...ux-foundation.org, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org, Jiaqi Yan <jiaqiyan@...gle.com>
Subject: [RFC PATCH v1 2/7] hugetlb: create PTE level mapping when possible

In memory_failure handling, for each VMA that the HWPOISON HugeTLB
page mapped to, enable HGM if eligible, then split the P*D mapped
hugepage to smaller PTEs. try_to_unmap still unmaps the entire hugetlb
page, one PTE by one PTE, at levels smaller than original P*D.
For example, if a hugepage was original mapped at PUD size, it will
be split into PMDs and PTEs, and all of these PMDs and PTEs will
be unmapped. The next commit will only unmap the raw HWPOISON PTE.

For VMA that is not HGM eligible, or failed to enable HGM, or
failed to split hugepage mapping, the hugepage is still mapped by
its original P*D then unmapped at this P*D.

Signed-off-by: Jiaqi Yan <jiaqiyan@...gle.com>
---
 include/linux/hugetlb.h |  5 +++
 mm/hugetlb.c            | 27 ++++++++++++++++
 mm/memory-failure.c     | 68 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d44bf6a794e5..03074b23c396 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1266,6 +1266,7 @@ int hugetlb_alloc_largest_pte(struct hugetlb_pte *hpte, struct mm_struct *mm,
 			      unsigned long end);
 int hugetlb_collapse(struct mm_struct *mm, unsigned long start,
 		     unsigned long end);
+int hugetlb_enable_hgm_vma(struct vm_area_struct *vma);
 int hugetlb_split_to_shift(struct mm_struct *mm, struct vm_area_struct *vma,
 			   struct hugetlb_pte *hpte, unsigned long addr,
 			   unsigned int desired_shift);
@@ -1295,6 +1296,10 @@ int hugetlb_collapse(struct mm_struct *mm, unsigned long start,
 {
 	return -EINVAL;
 }
+int hugetlb_enable_hgm_vma(struct vm_area_struct *vma)
+{
+	return -EINVAL;
+}
 int hugetlb_split_to_shift(struct mm_struct *mm, struct vm_area_struct *vma,
 			   const struct hugetlb_pte *hpte, unsigned long addr,
 			   unsigned int desired_shift)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d3f3f1c2d293..1419176b7e51 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -8203,6 +8203,33 @@ int hugetlb_collapse(struct mm_struct *mm, unsigned long start,
 	return ret;
 }
 
+int hugetlb_enable_hgm_vma(struct vm_area_struct *vma)
+{
+	if (hugetlb_hgm_enabled(vma))
+		return 0;
+
+	if (!is_vm_hugetlb_page(vma)) {
+		pr_warn("VMA=[%#lx, %#lx) is not HugeTLB\n",
+			vma->vm_start, vma->vm_end);
+		return -EINVAL;
+	}
+
+	if (!hugetlb_hgm_eligible(vma)) {
+		pr_warn("VMA=[%#lx, %#lx) is not HGM eligible\n",
+			vma->vm_start, vma->vm_end);
+		return -EINVAL;
+	}
+
+	hugetlb_unshare_all_pmds(vma);
+
+	/*
+	 * TODO: add the ability to tell if HGM is enabled by kernel
+	 * (for HWPOISON unmapping) or by userspace (via MADV_SPLIT).
+	 */
+	vm_flags_set(vma, VM_HUGETLB_HGM);
+	return 0;
+}
+
 /*
  * Find the optimal HugeTLB PTE shift that @desired_addr could be mapped at.
  */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 0b37cbc6e8ae..eb5579b6787e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1479,6 +1479,73 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
 	return ret;
 }
 
+#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
+/*
+ * For each HGM-eligible VMA that the poisoned page mapped to, create new
+ * HGM mapping for hugepage @folio and make sure @poisoned_page is mapped
+ * by a PAGESIZE level PTE. Caller (hwpoison_user_mappings) must ensure
+ * 1. folio's address space (mapping) is locked in write mode.
+ * 2. folio is locked.
+ */
+static void try_to_split_huge_mapping(struct folio *folio,
+				      struct page *poisoned_page)
+{
+	struct address_space *mapping = folio_mapping(folio);
+	pgoff_t pgoff_start;
+	pgoff_t pgoff_end;
+	struct vm_area_struct *vma;
+	unsigned long poisoned_addr;
+	unsigned long head_addr;
+	struct hugetlb_pte hpte;
+
+	if (WARN_ON(!mapping))
+		return;
+
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+
+	pgoff_start = folio_pgoff(folio);
+	pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;
+
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff_start, pgoff_end) {
+		/* Enable HGM on HGM-eligible VMAs. */
+		if (!hugetlb_hgm_eligible(vma))
+			continue;
+
+		i_mmap_assert_locked(vma->vm_file->f_mapping);
+		if (hugetlb_enable_hgm_vma(vma)) {
+			pr_err("Failed to enable HGM on eligible VMA=[%#lx, %#lx)\n",
+				vma->vm_start, vma->vm_end);
+			continue;
+		}
+
+		poisoned_addr = vma_address(poisoned_page, vma);
+		head_addr = vma_address(folio_page(folio, 0), vma);
+		/*
+		 * Get the hugetlb_pte of the PUD-mapped hugepage first,
+		 * then split the PUD entry into PMD + PTE entries.
+		 *
+		 * Both getting original huge PTE and splitting requires write
+		 * lock on vma->vm_file->f_mapping, which caller
+		 * (e.g. hwpoison_user_mappings) should already acquired.
+		 */
+		if (hugetlb_full_walk(&hpte, vma, head_addr))
+			continue;
+
+		if (hugetlb_split_to_shift(vma->vm_mm, vma, &hpte,
+					   poisoned_addr, PAGE_SHIFT)) {
+			pr_err("Failed to split huge mapping: pfn=%#lx, vaddr=%#lx in VMA=[%#lx, %#lx)\n",
+				page_to_pfn(poisoned_page), poisoned_addr,
+				vma->vm_start, vma->vm_end);
+		}
+	}
+}
+#else
+static void try_to_split_huge_mapping(struct folio *folio,
+				      struct page *poisoned_page)
+{
+}
+#endif /* CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING */
+
 /*
  * Do all that is necessary to remove user space mappings. Unmap
  * the pages and send SIGBUS to the processes if the data was dirty.
@@ -1555,6 +1622,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 		 */
 		mapping = hugetlb_page_mapping_lock_write(hpage);
 		if (mapping) {
+			try_to_split_huge_mapping(folio, p);
 			try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
 			i_mmap_unlock_write(mapping);
 		} else
-- 
2.40.1.495.gc816e09b53d-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ