[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230105101844.1893104-32-jthoughton@google.com>
Date: Thu, 5 Jan 2023 10:18:29 +0000
From: James Houghton <jthoughton@...gle.com>
To: Mike Kravetz <mike.kravetz@...cle.com>,
Muchun Song <songmuchun@...edance.com>,
Peter Xu <peterx@...hat.com>
Cc: David Hildenbrand <david@...hat.com>,
David Rientjes <rientjes@...gle.com>,
Axel Rasmussen <axelrasmussen@...gle.com>,
Mina Almasry <almasrymina@...gle.com>,
"Zach O'Keefe" <zokeefe@...gle.com>,
Manish Mishra <manish.mishra@...anix.com>,
Naoya Horiguchi <naoya.horiguchi@....com>,
"Dr . David Alan Gilbert" <dgilbert@...hat.com>,
"Matthew Wilcox (Oracle)" <willy@...radead.org>,
Vlastimil Babka <vbabka@...e.cz>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
Miaohe Lin <linmiaohe@...wei.com>,
Yang Shi <shy828301@...il.com>,
Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
James Houghton <jthoughton@...gle.com>
Subject: [PATCH 31/46] hugetlb: sort hstates in hugetlb_init_hstates
When using HugeTLB high-granularity mapping, we need to go through the
supported hugepage sizes in decreasing order so that we pick the largest
size that works. Consider the case where we're faulting in a 1G hugepage
for the first time: we want hugetlb_fault/hugetlb_no_page to map it with
a PUD. By going through the sizes in decreasing order, we will find that
PUD_SIZE works before finding out that PMD_SIZE or PAGE_SIZE work too.
This commit also changes bootmem hugepages from storing hstate pointers
directly to storing the hstate sizes. The hstate pointers used for
boot-time-allocated hugepages become invalid after we sort the hstates.
`gather_bootmem_prealloc`, called after the hstates have been sorted,
now converts the size to the correct hstate.
Signed-off-by: James Houghton <jthoughton@...gle.com>
---
include/linux/hugetlb.h | 2 +-
mm/hugetlb.c | 49 ++++++++++++++++++++++++++++++++---------
2 files changed, 40 insertions(+), 11 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index daf993fdbc38..8a664a9dd0a8 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -789,7 +789,7 @@ struct hstate {
struct huge_bootmem_page {
struct list_head list;
- struct hstate *hstate;
+ unsigned long hstate_sz;
};
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2fb95ecafc63..1e9e149587b3 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,6 +34,7 @@
#include <linux/nospec.h>
#include <linux/delayacct.h>
#include <linux/memory.h>
+#include <linux/sort.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -49,6 +50,10 @@
int hugetlb_max_hstate __read_mostly;
unsigned int default_hstate_idx;
+/*
+ * After hugetlb_init_hstates is called, hstates will be sorted from largest
+ * to smallest.
+ */
struct hstate hstates[HUGE_MAX_HSTATE];
#ifdef CONFIG_CMA
@@ -3347,7 +3352,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
/* Put them into a private list first because mem_map is not up yet */
INIT_LIST_HEAD(&m->list);
list_add(&m->list, &huge_boot_pages);
- m->hstate = h;
+ m->hstate_sz = huge_page_size(h);
return 1;
}
@@ -3362,7 +3367,7 @@ static void __init gather_bootmem_prealloc(void)
list_for_each_entry(m, &huge_boot_pages, list) {
struct page *page = virt_to_page(m);
struct folio *folio = page_folio(page);
- struct hstate *h = m->hstate;
+ struct hstate *h = size_to_hstate(m->hstate_sz);
VM_BUG_ON(!hstate_is_gigantic(h));
WARN_ON(folio_ref_count(folio) != 1);
@@ -3478,9 +3483,38 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
kfree(node_alloc_noretry);
}
+static int compare_hstates_decreasing(const void *a, const void *b)
+{
+ unsigned long sz_a = huge_page_size((const struct hstate *)a);
+ unsigned long sz_b = huge_page_size((const struct hstate *)b);
+
+ if (sz_a < sz_b)
+ return 1;
+ if (sz_a > sz_b)
+ return -1;
+ return 0;
+}
+
+static void sort_hstates(void)
+{
+ unsigned long default_hstate_sz = huge_page_size(&default_hstate);
+
+ /* Sort from largest to smallest. */
+ sort(hstates, hugetlb_max_hstate, sizeof(*hstates),
+ compare_hstates_decreasing, NULL);
+
+ /*
+ * We may have changed the location of the default hstate, so we need to
+ * update it.
+ */
+ default_hstate_idx = hstate_index(size_to_hstate(default_hstate_sz));
+}
+
static void __init hugetlb_init_hstates(void)
{
- struct hstate *h, *h2;
+ struct hstate *h;
+
+ sort_hstates();
for_each_hstate(h) {
/* oversize hugepages were init'ed in early boot */
@@ -3499,13 +3533,8 @@ static void __init hugetlb_init_hstates(void)
continue;
if (hugetlb_cma_size && h->order <= HUGETLB_PAGE_ORDER)
continue;
- for_each_hstate(h2) {
- if (h2 == h)
- continue;
- if (h2->order < h->order &&
- h2->order > h->demote_order)
- h->demote_order = h2->order;
- }
+ if (h - 1 >= &hstates[0])
+ h->demote_order = huge_page_order(h - 1);
}
}
--
2.39.0.314.g84b9a713c41-goog
Powered by blists - more mailing lists