lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-id: <1321634598-16859-6-git-send-email-m.szyprowski@samsung.com>
Date:	Fri, 18 Nov 2011 17:43:12 +0100
From:	Marek Szyprowski <m.szyprowski@...sung.com>
To:	linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
	linux-media@...r.kernel.org, linux-mm@...ck.org,
	linaro-mm-sig@...ts.linaro.org
Cc:	Michal Nazarewicz <mina86@...a86.com>,
	Marek Szyprowski <m.szyprowski@...sung.com>,
	Kyungmin Park <kyungmin.park@...sung.com>,
	Russell King <linux@....linux.org.uk>,
	Andrew Morton <akpm@...ux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
	Ankita Garg <ankita@...ibm.com>,
	Daniel Walker <dwalker@...eaurora.org>,
	Mel Gorman <mel@....ul.ie>, Arnd Bergmann <arnd@...db.de>,
	Jesse Barker <jesse.barker@...aro.org>,
	Jonathan Corbet <corbet@....net>,
	Shariq Hasnain <shariq.hasnain@...aro.org>,
	Chunsang Jeong <chunsang.jeong@...aro.org>,
	Dave Hansen <dave@...ux.vnet.ibm.com>
Subject: [PATCH 05/11] mm: page_alloc: introduce alloc_contig_range()

From: Michal Nazarewicz <mina86@...a86.com>

This commit adds the alloc_contig_range() function which tries
to allocate given range of pages.  It tries to migrate all
already allocated pages that fall in the range thus freeing them.
Once all pages in the range are freed they are removed from the
buddy system thus allocated for the caller to use.

__alloc_contig_migrate_range() borrows some code from KAMEZAWA
Hiroyuki's __alloc_contig_pages().

Signed-off-by: Michal Nazarewicz <mina86@...a86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@...sung.com>
---
 include/linux/page-isolation.h |    3 +
 mm/page_alloc.c                |  223 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 226 insertions(+), 0 deletions(-)

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 051c1b1..d305080 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -33,5 +33,8 @@ test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);
 extern int set_migratetype_isolate(struct page *page);
 extern void unset_migratetype_isolate(struct page *page);
 
+/* The below functions must be run on a range from a single zone. */
+int alloc_contig_range(unsigned long start, unsigned long end);
+void free_contig_range(unsigned long pfn, unsigned nr_pages);
 
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 58d1a2e..b7aac26 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -57,6 +57,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/migrate.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -5687,6 +5688,228 @@ out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
+static unsigned long pfn_align_to_maxpage_down(unsigned long pfn)
+{
+	return pfn & ~(MAX_ORDER_NR_PAGES - 1);
+}
+
+static unsigned long pfn_align_to_maxpage_up(unsigned long pfn)
+{
+	return ALIGN(pfn, MAX_ORDER_NR_PAGES);
+}
+
+static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
+{
+	/* This function is based on compact_zone() from compaction.c. */
+
+	unsigned long pfn = start;
+	int ret = -EBUSY;
+	unsigned tries = 0;
+
+	struct compact_control cc = {
+		.nr_migratepages = 0,
+		.nr_freepages = 0,
+		.order = -1,
+		.zone = page_zone(pfn_to_page(start)),
+		.sync = true,
+		.migrate_pfn = 0,
+	};
+	INIT_LIST_HEAD(&cc.freepages);
+	INIT_LIST_HEAD(&cc.migratepages);
+
+	/*
+	 * Compaction code uses two pointers, a "migrate_pfn" and
+	 * "free_pfn".  The first one goes from the beginning, and the
+	 * second one from the end of the zone.  Once they meet,
+	 * compaction stops.
+	 *
+	 * This is good behaviour if you want to migrate all pages to
+	 * the end of the zone put that's not what we want.  We want
+	 * to migrate pages from [start, end) *anywhere* inside the
+	 * zone.  To do that, we set "migrate_pfn" to the beginning of
+	 * the zone and never touch it again so compaction will finish
+	 * only when the "free_pfn" pointer reaches beginning of the
+	 * zone.
+	 */
+
+	cc.migrate_pfn = cc.zone->zone_start_pfn;
+	cc.free_pfn = cc.migrate_pfn + cc.zone->spanned_pages;
+	cc.free_pfn &= ~(pageblock_nr_pages-1);
+
+	migrate_prep_local();
+
+	while (pfn < end || cc.nr_migratepages) {
+		/* Abort on signal */
+		if (fatal_signal_pending(current)) {
+			ret = -EINTR;
+			goto done;
+		}
+
+		/* Are there any pages left? */
+		if (cc.nr_freepages && cc.free_pfn <= cc.migrate_pfn) {
+			/* We've run out of pages to migrate to. */
+			ret = -EBUSY;
+			goto done;
+		}
+
+		/* Get some pages to migrate. */
+		if (list_empty(&cc.migratepages)) {
+			cc.nr_migratepages = 0;
+			pfn = isolate_migratepages_range(cc.zone, &cc,
+							 pfn, end);
+			if (!pfn) {
+				ret = -EINTR;
+				goto done;
+			}
+			tries = 0;
+		}
+
+		/* Try to migrate. */
+		ret = migrate_pages(&cc.migratepages, compaction_alloc,
+				    (unsigned long)&cc, false, cc.sync);
+
+		/* Migrated all of them? Great! */
+		if (list_empty(&cc.migratepages))
+			continue;
+
+		/* Try five times. */
+		if (++tries == 5) {
+			ret = ret < 0 ? ret : -EBUSY;
+			goto done;
+		}
+
+		/* Before each time drain everything and reschedule. */
+		lru_add_drain_all();
+		drain_all_pages();
+		cond_resched();
+	}
+	ret = 0;
+
+done:
+	/* Make sure all pages are isolated. */
+	if (!ret) {
+		lru_add_drain_all();
+		drain_all_pages();
+		if (WARN_ON(test_pages_isolated(start, end)))
+			ret = -EBUSY;
+	}
+
+	/* Release pages */
+	putback_lru_pages(&cc.migratepages);
+	cc.nr_freepages -= release_freepages(&cc.freepages);
+	VM_BUG_ON(cc.nr_freepages != 0);
+
+	return ret;
+}
+
+/**
+ * alloc_contig_range() -- tries to allocate given range of pages
+ * @start:	start PFN to allocate
+ * @end:	one-past-the-last PFN to allocate
+ *
+ * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES
+ * aligned, hovewer it's callers responsibility to guarantee that we
+ * are the only thread that changes migrate type of pageblocks the
+ * pages fall in.
+ *
+ * Returns zero on success or negative error code.  On success all
+ * pages which PFN is in (start, end) are allocated for the caller and
+ * need to be freed with free_contig_range().
+ */
+int alloc_contig_range(unsigned long start, unsigned long end)
+{
+	unsigned long outer_start, outer_end;
+	int ret;
+
+	/*
+	 * What we do here is we mark all pageblocks in range as
+	 * MIGRATE_ISOLATE.  Because of the way page allocator work, we
+	 * align the range to MAX_ORDER pages so that page allocator
+	 * won't try to merge buddies from different pageblocks and
+	 * change MIGRATE_ISOLATE to some other migration type.
+	 *
+	 * Once the pageblocks are marked as MIGRATE_ISOLATE, we
+	 * migrate the pages from an unaligned range (ie. pages that
+	 * we are interested in).  This will put all the pages in
+	 * range back to page allocator as MIGRATE_ISOLATE.
+	 *
+	 * When this is done, we take the pages in range from page
+	 * allocator removing them from the buddy system.  This way
+	 * page allocator will never consider using them.
+	 *
+	 * This lets us mark the pageblocks back as
+	 * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the
+	 * MAX_ORDER aligned range but not in the unaligned, original
+	 * range are put back to page allocator so that buddy can use
+	 * them.
+	 */
+
+	ret = start_isolate_page_range(pfn_align_to_maxpage_down(start),
+				       pfn_align_to_maxpage_up(end));
+	if (ret)
+		goto done;
+
+	ret = __alloc_contig_migrate_range(start, end);
+	if (ret)
+		goto done;
+
+	/*
+	 * Pages from [start, end) are within a MAX_ORDER_NR_PAGES
+	 * aligned blocks that are marked as MIGRATE_ISOLATE.  What's
+	 * more, all pages in [start, end) are free in page allocator.
+	 * What we are going to do is to allocate all pages from
+	 * [start, end) (that is remove them from page allocater).
+	 *
+	 * The only problem is that pages at the beginning and at the
+	 * end of interesting range may be not aligned with pages that
+	 * page allocator holds, ie. they can be part of higher order
+	 * pages.  Because of this, we reserve the bigger range and
+	 * once this is done free the pages we are not interested in.
+	 */
+
+	ret = 0;
+	while (!PageBuddy(pfn_to_page(start & (~0UL << ret))))
+		if (WARN_ON(++ret >= MAX_ORDER)) {
+			ret = -EINVAL;
+			goto done;
+		}
+
+	outer_start = start & (~0UL << ret);
+	outer_end = isolate_freepages_range(page_zone(pfn_to_page(outer_start)),
+					    outer_start, end, NULL);
+	if (!outer_end) {
+		ret = -EBUSY;
+		goto done;
+	}
+	outer_end += outer_start;
+
+	/* Free head and tail (if any) */
+	if (start != outer_start)
+		free_contig_range(outer_start, start - outer_start);
+	if (end != outer_end)
+		free_contig_range(end, outer_end - end);
+
+	ret = 0;
+done:
+	undo_isolate_page_range(pfn_align_to_maxpage_down(start),
+				pfn_align_to_maxpage_up(end));
+	return ret;
+}
+
+void free_contig_range(unsigned long pfn, unsigned nr_pages)
+{
+	struct page *page = pfn_to_page(pfn);
+
+	while (nr_pages--) {
+		__free_page(page);
+		++pfn;
+		if (likely(zone_pfn_same_memmap(pfn - 1, pfn)))
+			++page;
+		else
+			page = pfn_to_page(pfn);
+	}
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /*
  * All pages in the range must be isolated before calling this.
-- 
1.7.1.569.g6f426

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ