From 75a4d327efd94230f3b9aab29ef6ec0badd488a6 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Mon, 25 Sep 2023 16:55:18 -0400 Subject: [PATCH 3/3] mm: enable move_freepages() to properly move part of free pages. alloc_contig_range() uses set_migrateype_isolate(), which eventually calls move_freepages(), to isolate free pages. But move_freepages() was not able to move free pages partially covered by the specified range, leaving a race window open[1]. Fix it by teaching move_freepages() to split a free page when only part of it is going to be moved. In addition, when a >pageblock_order free page is moved, only its first pageblock migratetype is changed. It can cause warnings later. Fix it by set all pageblocks in a free page to the same migratetype after move. split_free_page() is changed to be used in move_freepages() and isolate_single_pageblock(). A common code to find the start pfn of a free page is refactored in get_freepage_start_pfn(). [1] https://lore.kernel.org/linux-mm/20230920160400.GC124289@cmpxchg.org/ Signed-off-by: Zi Yan --- mm/page_alloc.c | 75 ++++++++++++++++++++++++++++++++++++--------- mm/page_isolation.c | 17 +++++++--- 2 files changed, 73 insertions(+), 19 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7c41cb5d8a36..3fd5ab40b55c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -866,15 +866,15 @@ int split_free_page(struct page *free_page, struct zone *zone = page_zone(free_page); unsigned long free_page_pfn = page_to_pfn(free_page); unsigned long pfn; - unsigned long flags; int free_page_order; int mt; int ret = 0; - if (split_pfn_offset == 0) - return ret; + /* zone lock should be held when this function is called */ + lockdep_assert_held(&zone->lock); - spin_lock_irqsave(&zone->lock, flags); + if (split_pfn_offset == 0 || split_pfn_offset >= (1 << order)) + return ret; if (!PageBuddy(free_page) || buddy_order(free_page) != order) { ret = -ENOENT; @@ -900,7 +900,6 @@ int split_free_page(struct page *free_page, split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); } out: - spin_unlock_irqrestore(&zone->lock, flags); return ret; } /* @@ -1589,6 +1588,25 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, unsigned int order) { return NULL; } #endif +/* + * Get first pfn of the free page, where pfn is in. If this free page does + * not exist, return the given pfn. + */ +static unsigned long get_freepage_start_pfn(unsigned long pfn) +{ + int order = 0; + unsigned long start_pfn = pfn; + + while (!PageBuddy(pfn_to_page(start_pfn))) { + if (++order > MAX_ORDER) { + start_pfn = pfn; + break; + } + start_pfn &= ~0UL << order; + } + return start_pfn; +} + /* * Move the free pages in a range to the freelist tail of the requested type. * Note that start_page and end_pages are not aligned on a pageblock @@ -1598,9 +1616,29 @@ static int move_freepages(struct zone *zone, unsigned long start_pfn, unsigned long end_pfn, int old_mt, int new_mt) { struct page *page; - unsigned long pfn; + unsigned long pfn, pfn2; unsigned int order; int pages_moved = 0; + unsigned long mt_change_pfn = start_pfn; + unsigned long new_start_pfn = get_freepage_start_pfn(start_pfn); + + /* split at start_pfn if it is in the middle of a free page */ + if (new_start_pfn != start_pfn && PageBuddy(pfn_to_page(new_start_pfn))) { + struct page *new_page = pfn_to_page(new_start_pfn); + int new_page_order = buddy_order(new_page); + + if (new_start_pfn + (1 << new_page_order) > start_pfn) { + /* change migratetype so that split_free_page can work */ + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt); + split_free_page(new_page, buddy_order(new_page), + start_pfn - new_start_pfn); + + mt_change_pfn = start_pfn; + /* move to next page */ + start_pfn = new_start_pfn + (1 << new_page_order); + } + } + for (pfn = start_pfn; pfn <= end_pfn;) { page = pfn_to_page(pfn); @@ -1615,10 +1653,24 @@ static int move_freepages(struct zone *zone, unsigned long start_pfn, order = buddy_order(page); move_to_free_list(page, zone, order, old_mt, new_mt); + /* + * set page migratetype for all pageblocks within the page and + * only after we move all free pages in one pageblock + */ + if (pfn + (1 << order) >= pageblock_end_pfn(pfn)) { + for (pfn2 = pfn; pfn2 < pfn + (1 << order); + pfn2 += pageblock_nr_pages) { + set_pageblock_migratetype(pfn_to_page(pfn2), + new_mt); + mt_change_pfn = pfn2; + } + } pfn += 1 << order; pages_moved += 1 << order; } - set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt); + /* set migratetype for the remaining pageblocks */ + for (pfn2 = mt_change_pfn; pfn2 <= end_pfn; pfn2 += pageblock_nr_pages) + set_pageblock_migratetype(pfn_to_page(pfn2), new_mt); return pages_moved; } @@ -6214,14 +6266,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, */ order = 0; - outer_start = start; - while (!PageBuddy(pfn_to_page(outer_start))) { - if (++order > MAX_ORDER) { - outer_start = start; - break; - } - outer_start &= ~0UL << order; - } + outer_start = get_freepage_start_pfn(start); if (outer_start != start) { order = buddy_order(pfn_to_page(outer_start)); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index ee7818ff4e12..b5f90ae03190 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -380,8 +380,15 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, int order = buddy_order(page); if (pfn + (1UL << order) > boundary_pfn) { + int res; + unsigned long flags; + + spin_lock_irqsave(&zone->lock, flags); + res = split_free_page(page, order, boundary_pfn - pfn); + spin_unlock_irqrestore(&zone->lock, flags); + /* free page changed before split, check it again */ - if (split_free_page(page, order, boundary_pfn - pfn)) + if (res) continue; } @@ -426,9 +433,11 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, /* * XXX: mark the page as MIGRATE_ISOLATE so that * no one else can grab the freed page after migration. - * Ideally, the page should be freed as two separate - * pages to be added into separate migratetype free - * lists. + * The page should be freed into separate migratetype + * free lists, unless the free page order is greater + * than pageblock order. It is not the case now, + * since gigantic hugetlb is freed as order-0 + * pages and LRU pages do not cross pageblocks. */ if (isolate_page) { ret = set_migratetype_isolate(page, page_mt, -- 2.40.1