lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 20 Nov 2020 14:43:09 +0800
From:   Muchun Song <songmuchun@...edance.com>
To:     corbet@....net, mike.kravetz@...cle.com, tglx@...utronix.de,
        mingo@...hat.com, bp@...en8.de, x86@...nel.org, hpa@...or.com,
        dave.hansen@...ux.intel.com, luto@...nel.org, peterz@...radead.org,
        viro@...iv.linux.org.uk, akpm@...ux-foundation.org,
        paulmck@...nel.org, mchehab+huawei@...nel.org,
        pawan.kumar.gupta@...ux.intel.com, rdunlap@...radead.org,
        oneukum@...e.com, anshuman.khandual@....com, jroedel@...e.de,
        almasrymina@...gle.com, rientjes@...gle.com, willy@...radead.org,
        osalvador@...e.de, mhocko@...e.com, song.bao.hua@...ilicon.com
Cc:     duanxiongchun@...edance.com, linux-doc@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-mm@...ck.org,
        linux-fsdevel@...r.kernel.org,
        Muchun Song <songmuchun@...edance.com>
Subject: [PATCH v5 05/21] mm/hugetlb: Introduce pgtable allocation/freeing helpers

On x86_64, vmemmap is always PMD mapped if the machine has hugepages
support and if we have 2MB contiguous pages and PMD alignment. If we
want to free the unused vmemmap pages, we have to split the huge PMD
firstly. So we should pre-allocate pgtable to split PMD to PTE.

Signed-off-by: Muchun Song <songmuchun@...edance.com>
Suggested-by: Oscar Salvador <osalvador@...e.de>
Acked-by: Mike Kravetz <mike.kravetz@...cle.com>
---
 mm/hugetlb_vmemmap.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/hugetlb_vmemmap.h | 11 ++++++++
 2 files changed, 87 insertions(+)

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 1afe245395e5..ec70980000d8 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -99,6 +99,8 @@
  */
 #define pr_fmt(fmt)	"HugeTLB Vmemmap: " fmt
 
+#include <linux/list.h>
+#include <asm/pgalloc.h>
 #include "hugetlb_vmemmap.h"
 
 /*
@@ -111,6 +113,80 @@
  */
 #define RESERVE_VMEMMAP_NR		2U
 
+#ifndef VMEMMAP_HPAGE_SHIFT
+#define VMEMMAP_HPAGE_SHIFT		HPAGE_SHIFT
+#endif
+#define VMEMMAP_HPAGE_ORDER		(VMEMMAP_HPAGE_SHIFT - PAGE_SHIFT)
+#define VMEMMAP_HPAGE_NR		(1 << VMEMMAP_HPAGE_ORDER)
+#define VMEMMAP_HPAGE_SIZE		((1UL) << VMEMMAP_HPAGE_SHIFT)
+#define VMEMMAP_HPAGE_MASK		(~(VMEMMAP_HPAGE_SIZE - 1))
+
+static inline unsigned int free_vmemmap_pages_per_hpage(struct hstate *h)
+{
+	return h->nr_free_vmemmap_pages;
+}
+
+static inline unsigned int vmemmap_pages_per_hpage(struct hstate *h)
+{
+	return free_vmemmap_pages_per_hpage(h) + RESERVE_VMEMMAP_NR;
+}
+
+static inline unsigned long vmemmap_pages_size_per_hpage(struct hstate *h)
+{
+	return (unsigned long)vmemmap_pages_per_hpage(h) << PAGE_SHIFT;
+}
+
+static inline unsigned int pgtable_pages_to_prealloc_per_hpage(struct hstate *h)
+{
+	unsigned long vmemmap_size = vmemmap_pages_size_per_hpage(h);
+
+	/*
+	 * No need pre-allocate page tables when there is no vmemmap pages
+	 * to free.
+	 */
+	if (!free_vmemmap_pages_per_hpage(h))
+		return 0;
+
+	return ALIGN(vmemmap_size, VMEMMAP_HPAGE_SIZE) >> VMEMMAP_HPAGE_SHIFT;
+}
+
+void vmemmap_pgtable_free(struct page *page)
+{
+	struct page *pte_page, *t_page;
+
+	list_for_each_entry_safe(pte_page, t_page, &page->lru, lru) {
+		list_del(&pte_page->lru);
+		pte_free_kernel(&init_mm, page_to_virt(pte_page));
+	}
+}
+
+int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
+{
+	unsigned int nr = pgtable_pages_to_prealloc_per_hpage(h);
+
+	/*
+	 * Use the huge page lru list to temporarily store the preallocated
+	 * pages. The preallocated pages are used and the list is emptied
+	 * before the huge page is put into use. When the huge page is put
+	 * into use by prep_new_huge_page() the list will be reinitialized.
+	 */
+	INIT_LIST_HEAD(&page->lru);
+
+	while (nr--) {
+		pte_t *pte_p;
+
+		pte_p = pte_alloc_one_kernel(&init_mm);
+		if (!pte_p)
+			goto out;
+		list_add(&virt_to_page(pte_p)->lru, &page->lru);
+	}
+
+	return 0;
+out:
+	vmemmap_pgtable_free(page);
+	return -ENOMEM;
+}
+
 void __init hugetlb_vmemmap_init(struct hstate *h)
 {
 	unsigned int order = huge_page_order(h);
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 40c0c7dfb60d..9eca6879c0a4 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -12,9 +12,20 @@
 
 #ifdef CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
 void __init hugetlb_vmemmap_init(struct hstate *h);
+int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page);
+void vmemmap_pgtable_free(struct page *page);
 #else
 static inline void hugetlb_vmemmap_init(struct hstate *h)
 {
 }
+
+static inline int vmemmap_pgtable_prealloc(struct hstate *h, struct page *page)
+{
+	return 0;
+}
+
+static inline void vmemmap_pgtable_free(struct page *page)
+{
+}
 #endif /* CONFIG_HUGETLB_PAGE_FREE_VMEMMAP */
 #endif /* _LINUX_HUGETLB_VMEMMAP_H */
-- 
2.11.0

Powered by blists - more mailing lists