From 1734bb24a38052f13e3f2ddb26b82aa043638c95 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 25 Sep 2023 16:55:18 -0400 Subject: [PATCH v2 2/2] mm: enable move_freepages() to properly move part of free pages. alloc_contig_range() uses set_migrateype_isolate(), which eventually calls move_freepages(), to isolate free pages. But move_freepages() was not able to move free pages partially covered by the specified range, leaving a race window open[1]. Fix it by teaching move_freepages() to split a free page when only part of it is going to be moved. In addition, when a >pageblock_order free page is moved, only its first pageblock migratetype is changed. It can cause warnings later. Fix it by set all pageblocks in a free page to the same migratetype after move. split_free_page() is changed to be used in move_freepages() and isolate_single_pageblock(). A common code to find the start pfn of a free page is refactored in get_freepage_start_pfn(). [1] https://lore.kernel.org/linux-mm/20230920160400.GC124289@cmpxchg.org/ Signed-off-by: Zi Yan --- mm/page_alloc.c | 94 +++++++++++++++++++++++++++++++++++++-------- mm/page_isolation.c | 38 +++++------------- 2 files changed, 88 insertions(+), 44 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 928bb595d7cc..a86025f5e80a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -865,15 +865,15 @@ int split_free_page(struct page *free_page, struct zone *zone = page_zone(free_page); unsigned long free_page_pfn = page_to_pfn(free_page); unsigned long pfn; - unsigned long flags; int free_page_order; int mt; int ret = 0; - if (split_pfn_offset == 0) - return ret; + /* zone lock should be held when this function is called */ + lockdep_assert_held(&zone->lock); - spin_lock_irqsave(&zone->lock, flags); + if (split_pfn_offset == 0 || split_pfn_offset >= (1 << order)) + return ret; if (!PageBuddy(free_page) || buddy_order(free_page) != order) { ret = -ENOENT; @@ -899,7 +899,6 @@ int split_free_page(struct page *free_page, split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); } out: - spin_unlock_irqrestore(&zone->lock, flags); return ret; } /* @@ -1588,6 +1587,25 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, unsigned int order) { return NULL; } #endif +/* + * Get first pfn of the free page, where pfn is in. If this free page does + * not exist, return the given pfn. + */ +static unsigned long get_freepage_start_pfn(unsigned long pfn) +{ + int order = 0; + unsigned long start_pfn = pfn; + + while (!PageBuddy(pfn_to_page(start_pfn))) { + if (++order > MAX_ORDER) { + start_pfn = pfn; + break; + } + start_pfn &= ~0UL << order; + } + return start_pfn; +} + /* * Move the free pages in a range to the freelist tail of the requested type. * Note that start_page and end_pages are not aligned on a pageblock @@ -1597,9 +1615,29 @@ static int move_freepages(struct zone *zone, unsigned long start_pfn, unsigned long end_pfn, int old_mt, int new_mt) { struct page *page; - unsigned long pfn; + unsigned long pfn, pfn2; unsigned int order; int pages_moved = 0; + unsigned long mt_changed_pfn = start_pfn - pageblock_nr_pages; + unsigned long new_start_pfn = get_freepage_start_pfn(start_pfn); + + /* split at start_pfn if it is in the middle of a free page */ + if (new_start_pfn != start_pfn && PageBuddy(pfn_to_page(new_start_pfn))) { + struct page *new_page = pfn_to_page(new_start_pfn); + int new_page_order = buddy_order(new_page); + + if (new_start_pfn + (1 << new_page_order) > start_pfn) { + /* change migratetype so that split_free_page can work */ + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt); + split_free_page(new_page, buddy_order(new_page), + start_pfn - new_start_pfn); + + mt_changed_pfn = start_pfn; + /* move to next page */ + start_pfn = new_start_pfn + (1 << new_page_order); + } + } + for (pfn = start_pfn; pfn <= end_pfn;) { page = pfn_to_page(pfn); @@ -1614,10 +1652,43 @@ static int move_freepages(struct zone *zone, unsigned long start_pfn, order = buddy_order(page); move_to_free_list(page, zone, order, old_mt, new_mt); + /* + * set page migratetype 1) only after we move all free pages in + * one pageblock and 2) for all pageblocks within the page. + * + * for 1), since move_to_free_list() checks page migratetype with + * old_mt and changing one page migratetype affects all pages + * within the same pageblock, if we are moving more than + * one free pages in the same pageblock, setting migratetype + * right after first move_to_free_list() triggers the warning + * in the following move_to_free_list(). + * + * for 2), when a free page order is greater than pageblock_order, + * all pageblocks within the free page need to be changed after + * move_to_free_list(). + */ + if (pfn + (1 << order) > pageblock_end_pfn(pfn)) { + for (pfn2 = pfn; + pfn2 < min_t(unsigned long, + pfn + (1 << order), + end_pfn + 1); + pfn2 += pageblock_nr_pages) { + set_pageblock_migratetype(pfn_to_page(pfn2), + new_mt); + mt_changed_pfn = pfn2; + } + /* split the free page if it goes beyond the specified range */ + if (pfn + (1 << order) > (end_pfn + 1)) + split_free_page(page, order, end_pfn + 1 - pfn); + } pfn += 1 << order; pages_moved += 1 << order; } - set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt); + /* set migratetype for the remaining pageblocks */ + for (pfn2 = mt_changed_pfn + pageblock_nr_pages; + pfn2 <= end_pfn; + pfn2 += pageblock_nr_pages) + set_pageblock_migratetype(pfn_to_page(pfn2), new_mt); return pages_moved; } @@ -6213,14 +6284,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, */ order = 0; - outer_start = start; - while (!PageBuddy(pfn_to_page(outer_start))) { - if (++order > MAX_ORDER) { - outer_start = start; - break; - } - outer_start &= ~0UL << order; - } + outer_start = get_freepage_start_pfn(start); if (outer_start != start) { order = buddy_order(pfn_to_page(outer_start)); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 5f8c658c0853..e053386f5e3a 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -380,11 +380,8 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, if (PageBuddy(page)) { int order = buddy_order(page); - if (pfn + (1UL << order) > boundary_pfn) { - /* free page changed before split, check it again */ - if (split_free_page(page, order, boundary_pfn - pfn)) - continue; - } + VM_WARN_ONCE(pfn + (1UL << order) > boundary_pfn, + "a free page sits across isolation boundary"); pfn += 1UL << order; continue; @@ -408,8 +405,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, * can be migrated. Otherwise, fail the isolation. */ if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) { - int order; - unsigned long outer_pfn; int page_mt = get_pageblock_migratetype(page); bool isolate_page = !is_migrate_isolate_page(page); struct compact_control cc = { @@ -427,9 +422,11 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, /* * XXX: mark the page as MIGRATE_ISOLATE so that * no one else can grab the freed page after migration. - * Ideally, the page should be freed as two separate - * pages to be added into separate migratetype free - * lists. + * The page should be freed into separate migratetype + * free lists, unless the free page order is greater + * than pageblock order. It is not the case now, + * since gigantic hugetlb is freed as order-0 + * pages and LRU pages do not cross pageblocks. */ if (isolate_page) { ret = set_migratetype_isolate(page, page_mt, @@ -451,25 +448,8 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, if (ret) goto failed; - /* - * reset pfn to the head of the free page, so - * that the free page handling code above can split - * the free page to the right migratetype list. - * - * head_pfn is not used here as a hugetlb page order - * can be bigger than MAX_ORDER, but after it is - * freed, the free page order is not. Use pfn within - * the range to find the head of the free page. - */ - order = 0; - outer_pfn = pfn; - while (!PageBuddy(pfn_to_page(outer_pfn))) { - /* stop if we cannot find the free page */ - if (++order > MAX_ORDER) - goto failed; - outer_pfn &= ~0UL << order; - } - pfn = outer_pfn; + + pfn = head_pfn + nr_pages; continue; } else #endif -- 2.40.1