lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220624173656.2033256-14-jthoughton@google.com>
Date:   Fri, 24 Jun 2022 17:36:43 +0000
From:   James Houghton <jthoughton@...gle.com>
To:     Mike Kravetz <mike.kravetz@...cle.com>,
        Muchun Song <songmuchun@...edance.com>,
        Peter Xu <peterx@...hat.com>
Cc:     David Hildenbrand <david@...hat.com>,
        David Rientjes <rientjes@...gle.com>,
        Axel Rasmussen <axelrasmussen@...gle.com>,
        Mina Almasry <almasrymina@...gle.com>,
        Jue Wang <juew@...gle.com>,
        Manish Mishra <manish.mishra@...anix.com>,
        "Dr . David Alan Gilbert" <dgilbert@...hat.com>,
        linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        James Houghton <jthoughton@...gle.com>
Subject: [RFC PATCH 13/26] hugetlb: add huge_pte_alloc_high_granularity

This function is to be used to do a HugeTLB page table walk where we may
need to split a leaf-level huge PTE into a new page table level.

Consider the case where we want to install 4K inside an empty 1G page:
1. We walk to the PUD and notice that it is pte_none.
2. We split the PUD by calling `hugetlb_split_to_shift`, creating a
   standard PUD that points to PMDs that are all pte_none.
3. We continue the PT walk to find the PMD. We split it just like we
   split the PUD.
4. We find the PTE and give it back to the caller.

To avoid concurrent splitting operations on the same page table entry,
we require that the mapping rwsem is held for writing while collapsing
and for reading when doing a high-granularity PT walk.

Signed-off-by: James Houghton <jthoughton@...gle.com>
---
 include/linux/hugetlb.h | 23 ++++++++++++++
 mm/hugetlb.c            | 67 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 605aa19d8572..321f5745d87f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -1176,14 +1176,37 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 }
 #endif	/* CONFIG_HUGETLB_PAGE */
 
+enum split_mode {
+	HUGETLB_SPLIT_NEVER   = 0,
+	HUGETLB_SPLIT_NONE    = 1 << 0,
+	HUGETLB_SPLIT_PRESENT = 1 << 1,
+	HUGETLB_SPLIT_ALWAYS  = HUGETLB_SPLIT_NONE | HUGETLB_SPLIT_PRESENT,
+};
 #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
 /* If HugeTLB high-granularity mappings are enabled for this VMA. */
 bool hugetlb_hgm_enabled(struct vm_area_struct *vma);
+int huge_pte_alloc_high_granularity(struct hugetlb_pte *hpte,
+				    struct mm_struct *mm,
+				    struct vm_area_struct *vma,
+				    unsigned long addr,
+				    unsigned int desired_sz,
+				    enum split_mode mode,
+				    bool write_locked);
 #else
 static inline bool hugetlb_hgm_enabled(struct vm_area_struct *vma)
 {
 	return false;
 }
+static inline int huge_pte_alloc_high_granularity(struct hugetlb_pte *hpte,
+					   struct mm_struct *mm,
+					   struct vm_area_struct *vma,
+					   unsigned long addr,
+					   unsigned int desired_sz,
+					   enum split_mode mode,
+					   bool write_locked)
+{
+	return -EINVAL;
+}
 #endif
 
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eaffe7b4f67c..6e0c5fbfe32c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7166,6 +7166,73 @@ static int hugetlb_split_to_shift(struct mm_struct *mm, struct vm_area_struct *v
 	tlb_finish_mmu(&tlb);
 	return ret;
 }
+
+/*
+ * Similar to huge_pte_alloc except that this can be used to create or walk
+ * high-granularity mappings. It will automatically split existing HugeTLB PTEs
+ * if required by @mode. The resulting HugeTLB PTE will be returned in @hpte.
+ *
+ * There are three options for @mode:
+ *  - HUGETLB_SPLIT_NEVER   - Never split.
+ *  - HUGETLB_SPLIT_NONE    - Split empty PTEs.
+ *  - HUGETLB_SPLIT_PRESENT - Split present PTEs.
+ *  - HUGETLB_SPLIT_ALWAYS  - Split both empty and present PTEs.
+ */
+int huge_pte_alloc_high_granularity(struct hugetlb_pte *hpte,
+				    struct mm_struct *mm,
+				    struct vm_area_struct *vma,
+				    unsigned long addr,
+				    unsigned int desired_shift,
+				    enum split_mode mode,
+				    bool write_locked)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	bool has_write_lock = write_locked;
+	unsigned long desired_sz = 1UL << desired_shift;
+	int ret;
+
+	BUG_ON(!hpte);
+
+	if (has_write_lock)
+		i_mmap_assert_write_locked(mapping);
+	else
+		i_mmap_assert_locked(mapping);
+
+retry:
+	ret = 0;
+	hugetlb_pte_init(hpte);
+
+	ret = hugetlb_walk_to(mm, hpte, addr, desired_sz,
+			      !(mode & HUGETLB_SPLIT_NONE));
+	if (ret || hugetlb_pte_size(hpte) == desired_sz)
+		goto out;
+
+	if (
+		((mode & HUGETLB_SPLIT_NONE) && hugetlb_pte_none(hpte)) ||
+		((mode & HUGETLB_SPLIT_PRESENT) &&
+		  hugetlb_pte_present_leaf(hpte))
+	   ) {
+		if (!has_write_lock) {
+			i_mmap_unlock_read(mapping);
+			i_mmap_lock_write(mapping);
+			has_write_lock = true;
+			goto retry;
+		}
+		ret = hugetlb_split_to_shift(mm, vma, hpte, addr,
+					     desired_shift);
+	}
+
+out:
+	if (has_write_lock && !write_locked) {
+		/* Drop the write lock. */
+		i_mmap_unlock_write(mapping);
+		i_mmap_lock_read(mapping);
+		has_write_lock = false;
+		goto retry;
+	}
+
+	return ret;
+}
 #endif /* CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING */
 
 /*
-- 
2.37.0.rc0.161.g10f37bed90-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ