linux-kernel - [PATCH v2 7/8] mm/isolation: fix freepage counting bug on start/undo_isolat_page

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1407309517-3270-11-git-send-email-iamjoonsoo.kim@lge.com>
Date:	Wed,  6 Aug 2014 16:18:36 +0900
From:	Joonsoo Kim <iamjoonsoo.kim@....com>
To:	Andrew Morton <akpm@...ux-foundation.org>
Cc:	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Rik van Riel <riel@...hat.com>, Mel Gorman <mgorman@...e.de>,
	Johannes Weiner <hannes@...xchg.org>,
	Minchan Kim <minchan@...nel.org>,
	Yasuaki Ishimatsu <isimatu.yasuaki@...fujitsu.com>,
	Zhang Yanfei <zhangyanfei@...fujitsu.com>,
	"Srivatsa S. Bhat" <srivatsa.bhat@...ux.vnet.ibm.com>,
	Tang Chen <tangchen@...fujitsu.com>,
	Naoya Horiguchi <n-horiguchi@...jp.nec.com>,
	Bartlomiej Zolnierkiewicz <b.zolnierkie@...sung.com>,
	Wen Congyang <wency@...fujitsu.com>,
	Marek Szyprowski <m.szyprowski@...sung.com>,
	Michal Nazarewicz <mina86@...a86.com>,
	Laura Abbott <lauraa@...eaurora.org>,
	Heesub Shin <heesub.shin@...sung.com>,
	"Aneesh Kumar K.V" <aneesh.kumar@...ux.vnet.ibm.com>,
	Ritesh Harjani <ritesh.list@...il.com>,
	t.stanislaws@...sung.com, Gioh Kim <gioh.kim@....com>,
	linux-mm@...ck.org, linux-kernel@...r.kernel.org,
	Joonsoo Kim <iamjoonsoo.kim@....com>
Subject: [PATCH v2 7/8] mm/isolation: fix freepage counting bug on start/undo_isolat_page_range()

Current isolation logic isolates each pageblock individually.
This causes freepage counting problem when page with pageblock order is
merged with other page on different buddy list. We can prevent it by
following solutions.

1. decrease MAX_ORDER to pageblock order
2. prevent merging buddy pages if they are on different buddy list

Solution 1. looks really easy, but, I'm not sure if there is a user
to allocate more than pageblock order.

Solution 2. seems not to get greeted, because it needs to inserts
hooks to the core part of allocator.

So, this is solution 3, that is, making start/undo_isolat_page_range()
bug free through handling whole range at one go. If given range is
aligned with MAX_ORDER properly, page isn't merged with other page on
different buddy list. So we can calm down freepage counting bug.
Unfortunately, this solution only works for MAX_ORDER aligned range
like as CMA and aligning range is caller's duty.

Although we can go with solution 1., this patch is still useful since
some synchronization call is reduced since we call them in batch.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@....com>
---
 mm/page_isolation.c |  105 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 84 insertions(+), 21 deletions(-)

diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index b91f9ec..063f1f9 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -222,30 +222,63 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			     unsigned migratetype, bool skip_hwpoisoned_pages)
 {
 	unsigned long pfn;
-	unsigned long undo_pfn;
-	struct page *page;
+	unsigned long flags = 0, nr_pages;
+	struct page *page = NULL;
+	struct zone *zone = NULL;
 
 	BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
 	BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
 
-	for (pfn = start_pfn;
-	     pfn < end_pfn;
-	     pfn += pageblock_nr_pages) {
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
 		page = __first_valid_page(pfn, pageblock_nr_pages);
-		if (page &&
-		    set_migratetype_isolate(page, skip_hwpoisoned_pages)) {
-			undo_pfn = pfn;
-			goto undo;
+		if (!page)
+			continue;
+
+		if (!zone) {
+			zone = page_zone(page);
+			spin_lock_irqsave(&zone->lock, flags);
+		}
+
+		if (set_migratetype_isolate_pre(page, skip_hwpoisoned_pages)) {
+			spin_unlock_irqrestore(&zone->lock, flags);
+			return -EBUSY;
 		}
 	}
-	return 0;
-undo:
-	for (pfn = start_pfn;
-	     pfn < undo_pfn;
-	     pfn += pageblock_nr_pages)
-		unset_migratetype_isolate(pfn_to_page(pfn), migratetype);
 
-	return -EBUSY;
+	if (!zone)
+		return 0;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+		page = __first_valid_page(pfn, pageblock_nr_pages);
+		if (!page)
+			continue;
+
+		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	zone_pcp_disable(zone);
+	/*
+	 * After this point, freed page will see MIGRATE_ISOLATE as
+	 * their pageblock migratetype on all cpus. And pcp list has
+	 * no free page.
+	 */
+	on_each_cpu(drain_local_pages, NULL, 1);
+
+	spin_lock_irqsave(&zone->lock, flags);
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+		page = __first_valid_page(pfn, pageblock_nr_pages);
+		if (!page)
+			continue;
+
+		nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
+		__mod_zone_freepage_state(zone, -nr_pages, migratetype);
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	zone_pcp_enable(zone);
+
+	return 0;
 }
 
 /*
@@ -256,18 +289,48 @@ int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 {
 	unsigned long pfn;
 	struct page *page;
+	struct zone *zone = NULL;
+	unsigned long flags, nr_pages;
+
 	BUG_ON((start_pfn) & (pageblock_nr_pages - 1));
 	BUG_ON((end_pfn) & (pageblock_nr_pages - 1));
-	for (pfn = start_pfn;
-	     pfn < end_pfn;
-	     pfn += pageblock_nr_pages) {
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
+		page = __first_valid_page(pfn, pageblock_nr_pages);
+		if (!page)
+			continue;
+
+		if (!zone) {
+			zone = page_zone(page);
+			spin_lock_irqsave(&zone->lock, flags);
+		}
+
+		if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
+			continue;
+
+		nr_pages = move_freepages_block(zone, page, migratetype);
+		__mod_zone_freepage_state(zone, nr_pages, migratetype);
+		set_pageblock_migratetype(page, migratetype);
+	}
+
+	if (!zone)
+		return 0;
+
+	spin_unlock_irqrestore(&zone->lock, flags);
+
+	/* Freed pages will see original migratetype after this point */
+	kick_all_cpus_sync();
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
 		page = __first_valid_page(pfn, pageblock_nr_pages);
-		if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE)
+		if (!page)
 			continue;
-		unset_migratetype_isolate(page, migratetype);
+
+		unset_migratetype_isolate_post(page, migratetype);
 	}
 	return 0;
 }
+
 /*
  * Test all pages in the range is free(means isolated) or not.
  * all pages in [start_pfn...end_pfn) must be in the same zone.
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/