lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <200707232316.23058.nigel@nigel.suspend2.net>
Date:	Mon, 23 Jul 2007 23:16:22 +1000
From:	Nigel Cunningham <nigel@...el.suspend2.net>
To:	LKML <linux-kernel@...r.kernel.org>
Subject: [RFC] [PATCH 1/5] Dynamically allocated pageflags - core code.

This patch adds the core of the support for dynamically allocated pageflags.

Signed-off-by: Nigel Cunningham <nigel@...el.suspend2.net>

 include/linux/dyn_pageflags.h |   65 +++
 init/main.c                   |    3 
 mm/Makefile                   |    2 
 mm/dyn_pageflags.c            |  774 ++++++++++++++++++++++++++++++++++++++++++
 mm/memory_hotplug.c           |    2 
 5 files changed, 845 insertions(+), 1 deletion(-)
diff -ruNp 840-dyn-pageflags.patch-old/include/linux/dyn_pageflags.h 840-dyn-pageflags.patch-new/include/linux/dyn_pageflags.h
--- 840-dyn-pageflags.patch-old/include/linux/dyn_pageflags.h	1970-01-01 10:00:00.000000000 +1000
+++ 840-dyn-pageflags.patch-new/include/linux/dyn_pageflags.h	2007-07-23 19:48:40.000000000 +1000
@@ -0,0 +1,65 @@
+/*
+ * include/linux/dyn_pageflags.h
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham <nigel@...pend2.net>
+ *
+ * This file is released under the GPLv2.
+ *
+ * It implements support for dynamically allocated bitmaps that are
+ * used for temporary or infrequently used pageflags, in lieu of
+ * bits in the struct page flags entry.
+ */
+
+#ifndef DYN_PAGEFLAGS_H
+#define DYN_PAGEFLAGS_H
+
+#include <linux/mm.h>
+
+struct dyn_pageflags {
+	unsigned long ****bitmap; /* [pg_dat][zone][page_num] */
+	int sparse, initialised;
+	struct list_head list;
+	spinlock_t struct_lock;
+};
+
+#define DYN_PAGEFLAGS_INIT(name) { \
+	.list = LIST_HEAD_INIT(name.list), \
+	.struct_lock = __SPIN_LOCK_UNLOCKED(name.lock) \
+}
+
+#define DECLARE_DYN_PAGEFLAGS(name) \
+	struct dyn_pageflags name = DYN_PAGEFLAGS_INIT(name);
+
+#define BITMAP_FOR_EACH_SET(bitmap, counter) \
+	for (counter = get_next_bit_on(bitmap, max_pfn + 1); counter <= max_pfn; \
+		counter = get_next_bit_on(bitmap, counter))
+
+extern void clear_dyn_pageflags(struct dyn_pageflags *pagemap);
+extern int allocate_dyn_pageflags(struct dyn_pageflags *pagemap, int sparse);
+extern void free_dyn_pageflags(struct dyn_pageflags *pagemap);
+extern unsigned long get_next_bit_on(struct dyn_pageflags *bitmap, unsigned long counter);
+
+extern int test_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+/*
+ * In sparse bitmaps, setting a flag can fail (we can fail to allocate
+ * the page to store the bit. If this happens, we will BUG(). If you don't
+ * want this behaviour, don't allocate sparse pageflags.
+ */
+extern void set_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+extern void clear_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+extern void dump_pagemap(struct dyn_pageflags *pagemap);
+
+/* 
+ * With the above macros defined, you can do...
+ * #define PagePageset1(page) (test_dynpageflag(&pageset1_map, page))
+ * #define SetPagePageset1(page) (set_dynpageflag(&pageset1_map, page))
+ * #define ClearPagePageset1(page) (clear_dynpageflag(&pageset1_map, page))
+ */
+
+extern void __init dyn_pageflags_init(void);
+extern void __init dyn_pageflags_use_kzalloc(void);
+
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+extern void dyn_pageflags_hotplug(struct zone *zone);
+#endif
+#endif
diff -ruNp 840-dyn-pageflags.patch-old/init/main.c 840-dyn-pageflags.patch-new/init/main.c
--- 840-dyn-pageflags.patch-old/init/main.c	2007-07-23 22:11:27.000000000 +1000
+++ 840-dyn-pageflags.patch-new/init/main.c	2007-07-23 19:48:40.000000000 +1000
@@ -55,6 +55,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/device.h>
 #include <linux/kthread.h>
+#include <linux/dyn_pageflags.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -585,6 +586,7 @@ asmlinkage void __init start_kernel(void
 	softirq_init();
 	timekeeping_init();
 	time_init();
+	dyn_pageflags_init();
 	profile_init();
 	if (!irqs_disabled())
 		printk("start_kernel(): bug: interrupts were enabled early\n");
@@ -621,6 +623,7 @@ asmlinkage void __init start_kernel(void
 	cpuset_init_early();
 	mem_init();
 	kmem_cache_init();
+	dyn_pageflags_use_kzalloc();
 	setup_per_cpu_pageset();
 	numa_policy_init();
 	if (late_time_init)
diff -ruNp 840-dyn-pageflags.patch-old/mm/Makefile 840-dyn-pageflags.patch-new/mm/Makefile
--- 840-dyn-pageflags.patch-old/mm/Makefile	2007-07-11 22:19:42.000000000 +1000
+++ 840-dyn-pageflags.patch-new/mm/Makefile	2007-07-23 19:48:40.000000000 +1000
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   $(mmu-y)
+			   dyn_pageflags.o $(mmu-y)
 
 ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
 obj-y			+= bounce.o
diff -ruNp 840-dyn-pageflags.patch-old/mm/dyn_pageflags.c 840-dyn-pageflags.patch-new/mm/dyn_pageflags.c
--- 840-dyn-pageflags.patch-old/mm/dyn_pageflags.c	1970-01-01 10:00:00.000000000 +1000
+++ 840-dyn-pageflags.patch-new/mm/dyn_pageflags.c	2007-07-23 22:11:30.000000000 +1000
@@ -0,0 +1,774 @@
+/*
+ * lib/dyn_pageflags.c
+ *
+ * Copyright (C) 2004-2007 Nigel Cunningham <nigel@...pend2.net>
+ * 
+ * This file is released under the GPLv2.
+ *
+ * Routines for dynamically allocating and releasing bitmaps
+ * used as pseudo-pageflags.
+ *
+ * We use bitmaps, built out of order zero allocations and
+ * linked together by kzalloc'd arrays of pointers into
+ * an array that looks like...
+ *
+ * 	pageflags->bitmap[node][zone_id][page_num][ul]
+ *
+ * All of this is transparent to the caller, who just uses
+ * the allocate & free routines to create/destroy bitmaps,
+ * and get/set/clear to operate on individual flags.
+ *
+ * Bitmaps can be sparse, with the individual pages only being
+ * allocated when a bit is set in the page.
+ *
+ * Memory hotplugging support is work in progress. A zone's
+ * start_pfn may change. If it does, we need to reallocate
+ * the zone bitmap, adding additional pages to the front to
+ * cover the bitmap. For simplicity, we don't shift the
+ * contents of existing pages around. The lock is only used
+ * to avoid reentrancy when resizing zones. The replacement
+ * of old data with new is done atomically. If we try to test
+ * a bit in the new area before the update is completed, we
+ * know it's zero.
+ *
+ * TuxOnIce knows the structure of these pageflags, so that
+ * it can serialise them in the image header. TODO: Make
+ * that support more generic so that TuxOnIce doesn't need
+ * to know how dyn_pageflags are stored.
+ */
+
+/* Avoid warnings in include/linux/mm.h */
+struct page;
+struct dyn_pageflags;
+int test_dynpageflag(struct dyn_pageflags *bitmap, struct page *page);
+
+#include <linux/bootmem.h>
+#include <linux/dyn_pageflags.h>
+
+static LIST_HEAD(flags_list);
+static DEFINE_SPINLOCK(flags_list_lock);
+
+static void* (*dyn_allocator)(unsigned long size, unsigned long flags);
+
+static int dyn_pageflags_debug = 0;
+
+#define PR_DEBUG(a, b...) \
+	do { if (dyn_pageflags_debug) printk(a, ##b); } while(0)
+#define DUMP_DEBUG(bitmap) \
+	do { if (dyn_pageflags_debug) dump_pagemap(bitmap); } while(0)
+
+#if BITS_PER_LONG == 32
+#define UL_SHIFT 5
+#else 
+#if BITS_PER_LONG == 64
+#define UL_SHIFT 6
+#else
+#error Bits per long not 32 or 64?
+#endif
+#endif
+
+#define BIT_NUM_MASK ((sizeof(unsigned long) << 3) - 1)
+#define PAGE_NUM_MASK (~((1 << (PAGE_SHIFT + 3)) - 1))
+#define UL_NUM_MASK (~(BIT_NUM_MASK | PAGE_NUM_MASK))
+
+/*
+ * PAGENUMBER gives the index of the page within the zone.
+ * PAGEINDEX gives the index of the unsigned long within that page.
+ * PAGEBIT gives the index of the bit within the unsigned long.
+ */
+#define PAGENUMBER(zone_offset) ((int) (zone_offset >> (PAGE_SHIFT + 3)))
+#define PAGEINDEX(zone_offset) ((int) ((zone_offset & UL_NUM_MASK) >> UL_SHIFT))
+#define PAGEBIT(zone_offset) ((int) (zone_offset & BIT_NUM_MASK))
+
+#define PAGE_UL_PTR(bitmap, node, zone_num, zone_pfn) \
+       ((bitmap[node][zone_num][PAGENUMBER(zone_pfn)])+PAGEINDEX(zone_pfn))
+
+#define pages_for_zone(zone) \
+	(DIV_ROUND_UP((zone)->spanned_pages, (PAGE_SIZE << 3)))
+
+#define pages_for_span(span) \
+	(DIV_ROUND_UP(span, PAGE_SIZE << 3))
+
+/* __maybe_unused for testing functions below */
+#define GET_BIT_AND_UL(pageflags, page) \
+	struct zone *zone = page_zone(page); \
+	unsigned long pfn = page_to_pfn(page); \
+	unsigned long zone_pfn = pfn - zone->zone_start_pfn; \
+	int node = page_to_nid(page); \
+	int zone_num = zone_idx(zone); \
+	int pagenum = PAGENUMBER(zone_pfn) + 2; \
+	int page_offset = PAGEINDEX(zone_pfn); \
+	unsigned long **zone_array = ((pageflags)->bitmap && \
+		(pageflags)->bitmap[node] && \
+		(pageflags)->bitmap[node][zone_num]) ? \
+			(pageflags)->bitmap[node][zone_num] : NULL; \
+	unsigned long __maybe_unused *ul = (zone_array && \
+		(unsigned long) zone_array[0] <= pfn && \
+		(unsigned long) zone_array[1] >= (pagenum-2) && \
+		zone_array[pagenum]) ? zone_array[pagenum] + page_offset : \
+		  NULL; \
+	int bit __maybe_unused = PAGEBIT(zone_pfn);
+
+#define for_each_online_pgdat_zone(pgdat, zone_nr) \
+	for_each_online_pgdat(pgdat) \
+		for (zone_nr = 0; zone_nr < MAX_NR_ZONES; zone_nr++)
+
+/**
+ * dump_pagemap: Display the contents of a bitmap for debugging purposes.
+ *
+ * @pagemap: The array to be dumped.
+ */
+void dump_pagemap(struct dyn_pageflags *pagemap)
+{
+	int i=0;
+	struct pglist_data *pgdat;
+	unsigned long **** bitmap = pagemap->bitmap;
+
+	printk(" --- Dump bitmap %p ---\n", pagemap);
+
+	printk("%p: Sparse flag = %d\n", &pagemap->sparse, pagemap->sparse);
+	printk("%p: Bitmap      = %p\n", &pagemap->bitmap, bitmap);
+
+	if (!bitmap)
+		goto out;
+
+	for_each_online_pgdat(pgdat) {
+		int node_id = pgdat->node_id, zone_nr;
+		printk("%p: Node %d => %p\n", &bitmap[node_id], node_id,
+				bitmap[node_id]);
+		if (!bitmap[node_id])
+			continue;
+		for (zone_nr = 0; zone_nr < MAX_NR_ZONES; zone_nr++) {
+			printk("%p:   Zone %d => %p%s\n",
+					&bitmap[node_id][zone_nr], zone_nr,
+					bitmap[node_id][zone_nr],
+					bitmap[node_id][zone_nr] ? "" :
+						" (empty)");
+			if (!bitmap[node_id][zone_nr])
+				continue;
+
+			printk("%p:     Zone start pfn  = %p\n",
+					&bitmap[node_id][zone_nr][0],
+					bitmap[node_id][zone_nr][0]);
+			printk("%p:     Number of pages = %p\n",
+					&bitmap[node_id][zone_nr][1],
+					bitmap[node_id][zone_nr][1]);
+			for (i = 2; i < (unsigned long) bitmap[node_id][zone_nr][1] + 2; i++)
+				printk("%p:     Page %2d         = %p\n",
+					&bitmap[node_id][zone_nr][i],
+					i - 2,
+					bitmap[node_id][zone_nr][i]);
+		}
+	}
+out:
+	printk(" --- Dump of bitmap %p finishes\n", pagemap);
+}
+
+/**
+ * clear_dyn_pageflags: Zero all pageflags in a bitmap.
+ *
+ * @pagemap: The array to be cleared.
+ *
+ * Clear an array used to store dynamically allocated pageflags.
+ */
+void clear_dyn_pageflags(struct dyn_pageflags *pagemap)
+{
+	int i=0, zone_idx;
+	struct pglist_data *pgdat;
+	unsigned long **** bitmap = pagemap->bitmap;
+
+	for_each_online_pgdat_zone(pgdat, zone_idx) {
+		int node_id = pgdat->node_id;
+		struct zone *zone = &pgdat->node_zones[zone_idx];
+
+		if (!populated_zone(zone) ||
+		   (!bitmap[node_id] || !bitmap[node_id][zone_idx]))
+			continue;
+
+		for (i = 2; i < pages_for_zone(zone) + 2; i++)
+			if (bitmap[node_id][zone_idx][i])
+				memset((bitmap[node_id][zone_idx][i]), 0,
+						PAGE_SIZE);
+	}
+}
+
+/**
+ * Allocators.
+ *
+ * During boot time, we want to use alloc_bootmem_low. Afterwards, we want
+ * kzalloc. These routines let us do that without causing compile time warnings
+ * about mismatched sections, as would happen if we did a simple
+ * boot ? alloc_bootmem_low() : kzalloc() below.
+ */
+
+/**
+ * boot_time_allocator: Allocator used while booting.
+ *
+ * @size: Number of bytes wanted.
+ * @flags: Allocation flags (ignored here).
+ */
+static __init void *boot_time_allocator(unsigned long size, unsigned long flags)
+{
+	return alloc_bootmem_low(size);
+}
+
+/**
+ * normal_allocator: Allocator used post-boot.
+ *
+ * @size: Number of bytes wanted.
+ * @flags: Allocation flags.
+ *
+ * Allocate memory for our page flags.
+ */
+static void *normal_allocator(unsigned long size, unsigned long flags)
+{
+	if (size == PAGE_SIZE)
+		return (void *) get_zeroed_page(flags);
+	else
+		return kzalloc(size, flags);
+}
+
+/**
+ * dyn_pageflags_init: Do the earliest initialisation.
+ *
+ * Very early in the boot process, set our allocator (alloc_bootmem_low) and
+ * allocate bitmaps for slab and buddy pageflags.
+ */
+void __init dyn_pageflags_init(void)
+{
+	dyn_allocator = boot_time_allocator;
+}
+
+/**
+ * dyn_pageflags_use_kzalloc: Reset the allocator for normal use.
+ *
+ * Reset the allocator to our normal, post boot function.
+ */
+void __init dyn_pageflags_use_kzalloc(void)
+{
+	dyn_allocator = (void *) normal_allocator;
+}
+
+/**
+ * try_alloc_dyn_pageflag_part: Try to allocate a pointer array.
+ *
+ * Try to allocate a contiguous array of pointers.
+ */
+static int try_alloc_dyn_pageflag_part(int nr_ptrs, void **ptr)
+{
+	*ptr = (*dyn_allocator)(sizeof(void *) * nr_ptrs, GFP_ATOMIC);
+
+	if (*ptr)
+		return 0;
+
+	printk("Error. Unable to allocate memory for dynamic pageflags.");
+	return -ENOMEM;
+}
+
+static int populate_bitmap_page(struct dyn_pageflags *pageflags,
+			unsigned long **page_ptr)
+{
+	void *address;
+	unsigned long flags = 0;
+
+	if (pageflags)
+		spin_lock_irqsave(&pageflags->struct_lock, flags);
+
+	/*
+	 * The page may have been allocated while we waited.
+	 */
+	if (*page_ptr)
+		goto out;
+
+	address = (*dyn_allocator)(PAGE_SIZE, GFP_ATOMIC);
+
+	if (!address) {
+		PR_DEBUG("Error. Unable to allocate memory for "
+			"dynamic pageflags page.");
+		if (pageflags)
+			spin_unlock_irqrestore(&pageflags->struct_lock, flags);
+		return -ENOMEM;
+	}
+
+	*page_ptr = address;
+out:
+	if (pageflags)
+		spin_unlock_irqrestore(&pageflags->struct_lock, flags);
+	return 0;
+}
+
+/**
+ * resize_zone_bitmap: Resize the array of pages for a bitmap.
+ *
+ * Shrink or extend a list of pages for a zone in a bitmap, preserving
+ * existing data.
+ */
+static int resize_zone_bitmap(struct dyn_pageflags *pagemap, struct zone *zone,
+		unsigned long old_pages, unsigned long new_pages,
+		unsigned long copy_offset)
+{
+	unsigned long **new_ptr = NULL, ****bitmap = pagemap->bitmap;
+	int node_id = zone_to_nid(zone), zone_idx = zone_idx(zone),
+	    to_copy = min(old_pages, new_pages);
+	unsigned long **old_ptr = bitmap[node_id][zone_idx], i;
+
+	if (new_pages) {
+		if (try_alloc_dyn_pageflag_part(new_pages + 2,
+					(void **) &new_ptr))
+			return -ENOMEM;
+
+		if (old_pages)
+			memcpy(new_ptr + 2 + copy_offset, old_ptr + 2,
+					sizeof(unsigned long) * to_copy);
+
+		new_ptr[0] = (void *) zone->zone_start_pfn;
+		new_ptr[1] = (void *) new_pages;
+	}
+
+	/* Free/alloc bitmap pages. */
+	if (old_pages > new_pages) {
+		for (i = new_pages + 2; i < old_pages + 2; i++)
+			if (old_ptr[i])
+				free_page((unsigned long) old_ptr[i]);
+	} else if (!pagemap->sparse) {
+		for (i = old_pages + 2; i < new_pages + 2; i++)
+			if (populate_bitmap_page(NULL,
+					(unsigned long **) &new_ptr[i]))
+				break;
+	}
+
+	bitmap[node_id][zone_idx] = new_ptr;
+
+	if (old_ptr)
+		kfree(old_ptr);
+
+	return 0;
+}
+
+/**
+ * check_dyn_pageflag_range: Resize a section of a dyn_pageflag array.
+ *
+ * @pagemap: The array to be worked on.
+ * @zone: The zone to get in sync with reality.
+ *
+ * Check the pagemap has correct allocations for the zone. This can be
+ * invoked when allocating a new bitmap, or for hot[un]plug, and so
+ * must deal with any disparities between zone_start_pfn/spanned_pages
+ * and what we have allocated. In addition, we must deal with the possibility
+ * of zone_start_pfn having changed.
+ */
+int check_dyn_pageflag_zone(struct dyn_pageflags *pagemap, struct zone *zone,
+		int force_free_all)
+{
+	int node_id = zone_to_nid(zone), zone_idx = zone_idx(zone);
+	unsigned long copy_offset = 0, old_pages, new_pages;
+	unsigned long **old_ptr = pagemap->bitmap[node_id][zone_idx];
+
+	old_pages = old_ptr ? (unsigned long) old_ptr[1] : 0;
+	new_pages = force_free_all ? 0 : pages_for_span(zone->spanned_pages);
+
+	if (old_pages == new_pages &&
+	    (!old_pages || (unsigned long) old_ptr[0] == zone->zone_start_pfn))
+		return 0;
+
+	if (old_pages &&
+	    (unsigned long) old_ptr[0] != zone->zone_start_pfn)
+		copy_offset = pages_for_span((unsigned long) old_ptr[0] -
+							zone->zone_start_pfn);
+
+	/* New/expanded zone? */
+	return resize_zone_bitmap(pagemap, zone, old_pages, new_pages,
+			copy_offset);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+/**
+ * dyn_pageflags_hotplug: Add pages to bitmaps for hotplugged memory.
+ *
+ * Seek to expand bitmaps for hotplugged memory. We ignore any failure.
+ * Since we handle sparse bitmaps anyway, they'll be automatically
+ * populated as needed.
+ */
+void dyn_pageflags_hotplug(struct zone *zone)
+{
+	struct dyn_pageflags *this;
+
+	list_for_each_entry(this, &flags_list, list)
+		check_dyn_pageflag_zone(this, zone, 0);
+}
+#endif
+
+/**
+ * free_dyn_pageflags: Free an array of dynamically allocated pageflags.
+ *
+ * @pagemap: The array to be freed.
+ *
+ * Free a dynamically allocated pageflags bitmap.
+ */
+void free_dyn_pageflags(struct dyn_pageflags *pagemap)
+{
+	int zone_idx;
+	struct pglist_data *pgdat;
+	unsigned long flags;
+
+	DUMP_DEBUG(pagemap);
+
+	if (!pagemap->bitmap)
+		return;
+	
+	for_each_online_pgdat_zone(pgdat, zone_idx)
+		check_dyn_pageflag_zone(pagemap,
+				&pgdat->node_zones[zone_idx], 1);
+
+	for_each_online_pgdat(pgdat) {
+		int i = pgdat->node_id;
+
+		if (pagemap->bitmap[i])
+			kfree((pagemap->bitmap)[i]);
+	}
+
+	kfree(pagemap->bitmap);
+	pagemap->bitmap = NULL;
+
+	pagemap->initialised = 0;
+
+	if (!pagemap->sparse) {
+		spin_lock_irqsave(&flags_list_lock, flags);
+		list_del_init(&pagemap->list);
+		spin_unlock_irqrestore(&flags_list_lock, flags);
+		pagemap->sparse = 1;
+	}
+}
+
+/**
+ * allocate_dyn_pageflags: Allocate a bitmap.
+ *
+ * @pagemap: The bitmap we want to allocate.
+ * @sparse: Whether to make the array sparse.
+ *
+ * The array we're preparing. If sparse, we don't allocate the actual
+ * pages until they're needed. If not sparse, we add the bitmap to the
+ * list so that if we're supporting memory hotplugging, we can allocate
+ * new pages on hotplug events.
+ *
+ * This routine may be called directly, or indirectly when the first bit
+ * needs to be set on a previously unused bitmap.
+ */
+int allocate_dyn_pageflags(struct dyn_pageflags *pagemap, int sparse)
+{
+	int zone_idx, result = 0;
+	struct zone *zone;
+	struct pglist_data *pgdat;
+	unsigned long flags;
+
+	if (!sparse && (pagemap->sparse || !pagemap->initialised)) {
+		spin_lock_irqsave(&flags_list_lock, flags);
+		list_add(&pagemap->list, &flags_list);
+		spin_unlock_irqrestore(&flags_list_lock, flags);
+	}
+
+	spin_lock_irqsave(&pagemap->struct_lock, flags);
+
+	pagemap->initialised = 1;
+	pagemap->sparse = sparse;
+
+	if (!pagemap->bitmap && try_alloc_dyn_pageflag_part((1 << NODES_WIDTH),
+				(void **) &pagemap->bitmap)) {
+		result = -ENOMEM;
+		goto out;
+	}
+
+	for_each_online_pgdat(pgdat) {
+		int node_id = pgdat->node_id;
+
+		if (!pagemap->bitmap[node_id] &&
+		    try_alloc_dyn_pageflag_part(MAX_NR_ZONES,
+			(void **) &(pagemap->bitmap)[node_id])) {
+				result = -ENOMEM;
+				goto out;
+		}
+
+		for (zone_idx = 0; zone_idx < MAX_NR_ZONES; zone_idx++) {
+			zone = &pgdat->node_zones[zone_idx];
+
+			if (populated_zone(zone))
+				check_dyn_pageflag_zone(pagemap, zone, 0);
+		}
+	}
+
+out:
+	spin_unlock_irqrestore(&pagemap->struct_lock, flags);
+	return result;
+}
+
+/**
+ * test_dynpageflag: Test a page in a bitmap.
+ *
+ * @bitmap: The bitmap we're checking.
+ * @page: The page for which we want to test the matching bit.
+ *
+ * Test whether the bit is on in the array. The array may be sparse,
+ * in which case the result is zero.
+ */
+int test_dynpageflag(struct dyn_pageflags *bitmap, struct page *page)
+{
+	GET_BIT_AND_UL(bitmap, page);
+	return ul ? test_bit(bit, ul) : 0;
+}
+
+/**
+ * set_dynpageflag: Set a bit in a bitmap.
+ *
+ * @bitmap: The bitmap we're operating on.
+ * @page: The page for which we want to set the matching bit.
+ *
+ * Set the associated bit in the array. If the array is sparse, we
+ * seek to allocate the missing page.
+ */
+void set_dynpageflag(struct dyn_pageflags *pageflags, struct page *page)
+{
+	GET_BIT_AND_UL(pageflags, page);
+
+	if (!ul) {	/* Sparse, hotplugged or unprepared */
+		/* Allocate / fill gaps in high levels */
+		if (allocate_dyn_pageflags(pageflags, 1) ||
+		    populate_bitmap_page(pageflags, (unsigned long **)
+				&pageflags->bitmap[node][zone_num][pagenum])) {
+			printk(KERN_EMERG "Failed to allocate storage in a "
+					"sparse bitmap.\n");
+			dump_pagemap(pageflags);
+			BUG();
+		}
+		set_dynpageflag(pageflags, page);
+	} else
+		set_bit(bit, ul);
+}
+
+/**
+ * clear_dynpageflag: Clear a bit in a bitmap.
+ *
+ * @bitmap: The bitmap we're operating on.
+ * @page: The page for which we want to clear the matching bit.
+ *
+ * Clear the associated bit in the array. It is not an error to be asked
+ * to clear a bit on a page we haven't allocated.
+ */
+void clear_dynpageflag(struct dyn_pageflags *bitmap, struct page *page)
+{
+	GET_BIT_AND_UL(bitmap, page);
+	if (ul)
+		clear_bit(bit, ul);
+}
+
+/**
+ * get_next_bit_on: Get the next bit in a bitmap.
+ *
+ * @pageflags: The bitmap we're searching.
+ * @counter: The previous pfn. We always return a value > this.
+ *
+ * Given a pfn (possibly max_pfn+1), find the next pfn in the bitmap that
+ * is set. If there are no more flags set, return max_pfn+1.
+ */
+unsigned long get_next_bit_on(struct dyn_pageflags *pageflags,
+		unsigned long counter)
+{
+	struct page *page;
+	struct zone *zone;
+	unsigned long *ul = NULL;
+	unsigned long zone_offset;
+	int pagebit, zone_num, first = (counter == (max_pfn + 1)), node;
+
+	if (first)
+		counter = first_online_pgdat()->node_zones->zone_start_pfn;
+
+	page = pfn_to_page(counter);
+	zone = page_zone(page);
+	node = zone->zone_pgdat->node_id;
+	zone_num = zone_idx(zone);
+	zone_offset = counter - zone->zone_start_pfn;
+
+	if (first)
+		goto test;
+
+	do {
+		zone_offset++;
+	
+		if (zone_offset >= zone->spanned_pages) {
+			do {
+				zone = next_zone(zone);
+				if (!zone)
+					return max_pfn + 1;
+			} while(!zone->spanned_pages);
+			
+			zone_num = zone_idx(zone);
+			node = zone->zone_pgdat->node_id;
+			zone_offset = 0;
+		}
+test:
+		pagebit = PAGEBIT(zone_offset);
+
+		if (!pagebit || !ul) {
+			ul = pageflags->bitmap[node][zone_num][PAGENUMBER(zone_offset)+2];
+			if (ul)
+				ul+= PAGEINDEX(zone_offset);
+			else {
+				PR_DEBUG("Unallocated page. Skipping from zone"
+					" offset %lu to the start of the next "
+					"one.\n", zone_offset);
+				zone_offset = roundup(zone_offset + 1,
+						PAGE_SIZE << 3) - 1;
+				PR_DEBUG("New zone offset is %lu.\n", zone_offset);
+				continue;
+			}
+		}
+
+		if (!ul || !(*ul & ~((1 << pagebit) - 1))) {
+			zone_offset += BITS_PER_LONG - pagebit - 1;
+			continue;
+		}
+
+	} while(!ul || !test_bit(pagebit, ul));
+
+	return zone->zone_start_pfn + zone_offset;
+}
+
+#ifdef SELF_TEST
+#include <linux/jiffies.h>
+
+static __init int dyn_pageflags_test(void)
+{
+	struct dyn_pageflags test_map;
+	struct page *test_page1 = pfn_to_page(1);
+	unsigned long pfn = 0, start, end;
+	int i, iterations;
+
+	memset(&test_map, 0, sizeof(test_map));
+
+	printk("Dynpageflags testing...\n");
+
+	printk("Set page 1...");
+	set_dynpageflag(&test_map, test_page1);
+	if (test_dynpageflag(&test_map, test_page1))
+		printk("Ok.\n");
+	else
+		printk("FAILED.\n");
+
+	printk("Test memory hotplugging #1 ...");
+	{
+		unsigned long orig_size;
+		GET_BIT_AND_UL(&test_map, test_page1);
+		orig_size = (unsigned long) test_map.bitmap[node][zone_num][1];
+		/*
+		 * Use the code triggered when zone_start_pfn lowers,
+		 * checking that our bit is then set in the third page.
+		 */
+		resize_zone_bitmap(&test_map, zone, orig_size, orig_size + 2, 2);
+		DUMP_DEBUG(&test_map);
+		if ((unsigned long) test_map.bitmap[node][zone_num][pagenum + 2] &&
+		    (unsigned long) test_map.bitmap[node][zone_num][pagenum + 2][0] == 2UL)
+			printk("Ok.\n");
+		else
+			printk("FAILED.\n");
+	}
+
+	printk("Test memory hotplugging #2 ...");
+	{
+		/*
+		 * Test expanding bitmap length.
+		 */
+		unsigned long orig_size;
+		GET_BIT_AND_UL(&test_map, test_page1);
+		orig_size = (unsigned long) test_map.bitmap[node][zone_num][1];
+		resize_zone_bitmap(&test_map, zone, orig_size, orig_size + 2, 0);
+		DUMP_DEBUG(&test_map);
+		pagenum += 2; /* Offset for first test */
+		if (test_map.bitmap[node][zone_num][pagenum] &&
+		    test_map.bitmap[node][zone_num][pagenum][0] == 2UL &&
+		    (unsigned long) test_map.bitmap[node][zone_num][1] ==
+					    	orig_size + 2)
+			printk("Ok.\n");
+		else
+			printk("FAILED ([%d][%d][%d]: %p && %lu == 2UL  && %p == %lu).\n",
+					node, zone_num, pagenum,
+					test_map.bitmap[node][zone_num][pagenum],
+					test_map.bitmap[node][zone_num][pagenum] ?
+					test_map.bitmap[node][zone_num][pagenum][0] : 0,
+					test_map.bitmap[node][zone_num][1],
+					orig_size + 2);
+	}
+
+	free_dyn_pageflags(&test_map);
+
+	allocate_dyn_pageflags(&test_map, 0);
+
+	start = jiffies;
+
+	iterations = 25000000 / max_pfn;
+
+	for (i = 0; i < iterations; i++) {
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			set_dynpageflag(&test_map, pfn_to_page(pfn));
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			clear_dynpageflag(&test_map, pfn_to_page(pfn));
+	}
+
+	end = jiffies;
+
+	free_dyn_pageflags(&test_map);
+
+	printk("Dyn: %d iterations of setting & clearing all %lu flags took %lu jiffies.\n",
+			iterations, max_pfn, end - start);
+
+	start = jiffies;
+
+	for (i = 0; i < iterations; i++) {
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			set_bit(7, &(pfn_to_page(pfn))->flags);
+		for (pfn = 0; pfn < max_pfn; pfn++)
+			clear_bit(7, &(pfn_to_page(pfn))->flags);
+	}
+
+	end = jiffies;
+
+	printk("Real flags: %d iterations of setting & clearing all %lu flags took %lu jiffies.\n",
+			iterations, max_pfn, end - start);
+
+	iterations = 25000000;
+
+	start = jiffies;
+
+	for (i = 0; i < iterations; i++) {
+		set_dynpageflag(&test_map, pfn_to_page(1));
+		clear_dynpageflag(&test_map, pfn_to_page(1));
+	}
+
+	end = jiffies;
+
+	printk("Dyn: %d iterations of setting & clearing all one flag took %lu jiffies.\n",
+			iterations, end - start);
+
+	start = jiffies;
+
+	for (i = 0; i < iterations; i++) {
+		set_bit(7, &(pfn_to_page(1))->flags);
+		clear_bit(7, &(pfn_to_page(1))->flags);
+	}
+
+	end = jiffies;
+
+	printk("Real pageflag: %d iterations of setting & clearing all one flag took %lu jiffies.\n",
+			iterations, end - start);
+	return 0;
+}
+
+late_initcall(dyn_pageflags_test);
+#endif
+
+static int __init dyn_pageflags_debug_setup(char *str)
+{
+	printk("Dynamic pageflags debugging enabled.\n");
+	dyn_pageflags_debug = 1;
+	return 1;
+}
+
+__setup("dyn_pageflags_debug", dyn_pageflags_debug_setup);
diff -ruNp 840-dyn-pageflags.patch-old/mm/memory_hotplug.c 840-dyn-pageflags.patch-new/mm/memory_hotplug.c
--- 840-dyn-pageflags.patch-old/mm/memory_hotplug.c	2007-07-11 22:19:42.000000000 +1000
+++ 840-dyn-pageflags.patch-new/mm/memory_hotplug.c	2007-07-23 19:48:40.000000000 +1000
@@ -74,6 +74,8 @@ static int __add_zone(struct zone *zone,
 	}
 	memmap_init_zone(nr_pages, nid, zone_type,
 			 phys_start_pfn, MEMMAP_HOTPLUG);
+
+	dyn_pageflags_hotplug(zone);
 	return 0;
 }
 
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ