[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240811212129.3074314-2-yuzhao@google.com>
Date: Sun, 11 Aug 2024 15:21:27 -0600
From: Yu Zhao <yuzhao@...gle.com>
To: Andrew Morton <akpm@...ux-foundation.org>, Muchun Song <muchun.song@...ux.dev>
Cc: "Matthew Wilcox (Oracle)" <willy@...radead.org>, Zi Yan <ziy@...dia.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org, Yu Zhao <yuzhao@...gle.com>
Subject: [PATCH mm-unstable v1 1/3] mm/contig_alloc: support __GFP_COMP
Support __GFP_COMP in alloc_contig_range(). When the flag is set, upon
success the function returns a large folio prepared by
prep_new_page(), rather than a range of order-0 pages prepared by
split_free_pages() (which is renamed from split_map_pages()).
alloc_contig_range() can return folios larger than MAX_PAGE_ORDER,
e.g., gigantic hugeTLB folios. As a result, on the free path
free_one_page() needs to handle this case by split_large_buddy(), in
addition to free_contig_range() properly handling large folios by
folio_put().
Signed-off-by: Yu Zhao <yuzhao@...gle.com>
---
mm/compaction.c | 48 +++------------------
mm/internal.h | 9 ++++
mm/page_alloc.c | 111 ++++++++++++++++++++++++++++++++++--------------
3 files changed, 94 insertions(+), 74 deletions(-)
diff --git a/mm/compaction.c b/mm/compaction.c
index eb95e9b435d0..1ebfef98e1d0 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -79,40 +79,6 @@ static inline bool is_via_compact_memory(int order) { return false; }
#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT)
#endif
-static struct page *mark_allocated_noprof(struct page *page, unsigned int order, gfp_t gfp_flags)
-{
- post_alloc_hook(page, order, __GFP_MOVABLE);
- return page;
-}
-#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__))
-
-static void split_map_pages(struct list_head *freepages)
-{
- unsigned int i, order;
- struct page *page, *next;
- LIST_HEAD(tmp_list);
-
- for (order = 0; order < NR_PAGE_ORDERS; order++) {
- list_for_each_entry_safe(page, next, &freepages[order], lru) {
- unsigned int nr_pages;
-
- list_del(&page->lru);
-
- nr_pages = 1 << order;
-
- mark_allocated(page, order, __GFP_MOVABLE);
- if (order)
- split_page(page, order);
-
- for (i = 0; i < nr_pages; i++) {
- list_add(&page->lru, &tmp_list);
- page++;
- }
- }
- list_splice_init(&tmp_list, &freepages[0]);
- }
-}
-
static unsigned long release_free_list(struct list_head *freepages)
{
int order;
@@ -742,11 +708,11 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
*
* Non-free pages, invalid PFNs, or zone boundaries within the
* [start_pfn, end_pfn) range are considered errors, cause function to
- * undo its actions and return zero.
+ * undo its actions and return zero. cc->freepages[] are empty.
*
* Otherwise, function returns one-past-the-last PFN of isolated page
* (which may be greater then end_pfn if end fell in a middle of
- * a free page).
+ * a free page). cc->freepages[] contain free pages isolated.
*/
unsigned long
isolate_freepages_range(struct compact_control *cc,
@@ -754,10 +720,9 @@ isolate_freepages_range(struct compact_control *cc,
{
unsigned long isolated, pfn, block_start_pfn, block_end_pfn;
int order;
- struct list_head tmp_freepages[NR_PAGE_ORDERS];
for (order = 0; order < NR_PAGE_ORDERS; order++)
- INIT_LIST_HEAD(&tmp_freepages[order]);
+ INIT_LIST_HEAD(&cc->freepages[order]);
pfn = start_pfn;
block_start_pfn = pageblock_start_pfn(pfn);
@@ -788,7 +753,7 @@ isolate_freepages_range(struct compact_control *cc,
break;
isolated = isolate_freepages_block(cc, &isolate_start_pfn,
- block_end_pfn, tmp_freepages, 0, true);
+ block_end_pfn, cc->freepages, 0, true);
/*
* In strict mode, isolate_freepages_block() returns 0 if
@@ -807,13 +772,10 @@ isolate_freepages_range(struct compact_control *cc,
if (pfn < end_pfn) {
/* Loop terminated early, cleanup. */
- release_free_list(tmp_freepages);
+ release_free_list(cc->freepages);
return 0;
}
- /* __isolate_free_page() does not map the pages */
- split_map_pages(tmp_freepages);
-
/* We don't use freelists for anything. */
return pfn;
}
diff --git a/mm/internal.h b/mm/internal.h
index acda347620c6..03e795ce755f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -679,6 +679,15 @@ extern void prep_compound_page(struct page *page, unsigned int order);
extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
+
+static inline struct page *post_alloc_hook_noprof(struct page *page, unsigned int order,
+ gfp_t gfp_flags)
+{
+ post_alloc_hook(page, order, __GFP_MOVABLE);
+ return page;
+}
+#define mark_allocated(...) alloc_hooks(post_alloc_hook_noprof(__VA_ARGS__))
+
extern bool free_pages_prepare(struct page *page, unsigned int order);
extern int user_min_free_kbytes;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84a7154fde93..6c801404a108 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1196,16 +1196,36 @@ static void free_pcppages_bulk(struct zone *zone, int count,
spin_unlock_irqrestore(&zone->lock, flags);
}
+/* Split a multi-block free page into its individual pageblocks */
+static void split_large_buddy(struct zone *zone, struct page *page,
+ unsigned long pfn, int order, fpi_t fpi)
+{
+ unsigned long end = pfn + (1 << order);
+
+ VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
+ /* Caller removed page from freelist, buddy info cleared! */
+ VM_WARN_ON_ONCE(PageBuddy(page));
+
+ if (order > pageblock_order)
+ order = pageblock_order;
+
+ while (pfn != end) {
+ int mt = get_pfnblock_migratetype(page, pfn);
+
+ __free_one_page(page, pfn, zone, order, mt, fpi);
+ pfn += 1 << order;
+ page = pfn_to_page(pfn);
+ }
+}
+
static void free_one_page(struct zone *zone, struct page *page,
unsigned long pfn, unsigned int order,
fpi_t fpi_flags)
{
unsigned long flags;
- int migratetype;
spin_lock_irqsave(&zone->lock, flags);
- migratetype = get_pfnblock_migratetype(page, pfn);
- __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
+ split_large_buddy(zone, page, pfn, order, fpi_flags);
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -1697,27 +1717,6 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
return start_pfn;
}
-/* Split a multi-block free page into its individual pageblocks */
-static void split_large_buddy(struct zone *zone, struct page *page,
- unsigned long pfn, int order)
-{
- unsigned long end_pfn = pfn + (1 << order);
-
- VM_WARN_ON_ONCE(order <= pageblock_order);
- VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
-
- /* Caller removed page from freelist, buddy info cleared! */
- VM_WARN_ON_ONCE(PageBuddy(page));
-
- while (pfn != end_pfn) {
- int mt = get_pfnblock_migratetype(page, pfn);
-
- __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
- pfn += pageblock_nr_pages;
- page = pfn_to_page(pfn);
- }
-}
-
/**
* move_freepages_block_isolate - move free pages in block for page isolation
* @zone: the zone
@@ -1758,7 +1757,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
del_page_from_free_list(buddy, zone, order,
get_pfnblock_migratetype(buddy, pfn));
set_pageblock_migratetype(page, migratetype);
- split_large_buddy(zone, buddy, pfn, order);
+ split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
return true;
}
@@ -1769,7 +1768,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
del_page_from_free_list(page, zone, order,
get_pfnblock_migratetype(page, pfn));
set_pageblock_migratetype(page, migratetype);
- split_large_buddy(zone, page, pfn, order);
+ split_large_buddy(zone, page, pfn, order, FPI_NONE);
return true;
}
move:
@@ -6482,6 +6481,31 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
return (ret < 0) ? ret : 0;
}
+static void split_free_pages(struct list_head *list)
+{
+ int order;
+
+ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ struct page *page, *next;
+ int nr_pages = 1 << order;
+
+ list_for_each_entry_safe(page, next, &list[order], lru) {
+ int i;
+
+ mark_allocated(page, order, __GFP_MOVABLE);
+ if (!order)
+ continue;
+
+ split_page(page, order);
+
+ /* add all subpages to the order-0 head, in sequence */
+ list_del(&page->lru);
+ for (i = 0; i < nr_pages; i++)
+ list_add_tail(&page[i].lru, &list[0]);
+ }
+ }
+}
+
/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -6594,12 +6618,25 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
goto done;
}
- /* Free head and tail (if any) */
- if (start != outer_start)
- free_contig_range(outer_start, start - outer_start);
- if (end != outer_end)
- free_contig_range(end, outer_end - end);
+ if (!(gfp_mask & __GFP_COMP)) {
+ split_free_pages(cc.freepages);
+ /* Free head and tail (if any) */
+ if (start != outer_start)
+ free_contig_range(outer_start, start - outer_start);
+ if (end != outer_end)
+ free_contig_range(end, outer_end - end);
+ } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
+ struct page *head = pfn_to_page(start);
+ int order = ilog2(end - start);
+
+ check_new_pages(head, order);
+ prep_new_page(head, order, gfp_mask, 0);
+ } else {
+ ret = -EINVAL;
+ WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
+ start, end, outer_start, outer_end);
+ }
done:
undo_isolate_page_range(start, end, migratetype);
return ret;
@@ -6708,6 +6745,18 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
void free_contig_range(unsigned long pfn, unsigned long nr_pages)
{
unsigned long count = 0;
+ struct folio *folio = pfn_folio(pfn);
+
+ if (folio_test_large(folio)) {
+ int expected = folio_nr_pages(folio);
+
+ if (nr_pages == expected)
+ folio_put(folio);
+ else
+ WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
+ pfn, nr_pages, expected);
+ return;
+ }
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
--
2.46.0.76.ge559c4bf1a-goog
Powered by blists - more mailing lists