[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <tip-0e9f93c1c04c8ab10cc564df54a7ad0f83c67796@git.kernel.org>
Date: Thu, 7 Apr 2011 02:07:18 GMT
From: tip-bot for Tejun Heo <tj@...nel.org>
To: linux-tip-commits@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, hpa@...or.com, mingo@...hat.com,
yinghai@...nel.org, tj@...nel.org, tglx@...utronix.de,
rientjes@...gle.com
Subject: [tip:x86/numa] x86-32, numa: Move lowmem address space reservation to init_alloc_remap()
Commit-ID: 0e9f93c1c04c8ab10cc564df54a7ad0f83c67796
Gitweb: http://git.kernel.org/tip/0e9f93c1c04c8ab10cc564df54a7ad0f83c67796
Author: Tejun Heo <tj@...nel.org>
AuthorDate: Tue, 5 Apr 2011 00:23:55 +0200
Committer: H. Peter Anvin <hpa@...or.com>
CommitDate: Wed, 6 Apr 2011 17:57:27 -0700
x86-32, numa: Move lowmem address space reservation to init_alloc_remap()
Remap alloc init is done in the following stages.
1. init_alloc_remap() calculates how much memory is necessary for each
node and reserves node local memory.
2. initmem_init() collects how much each node needs and reserves a
single contiguous lowmem area which can contain all.
3. init_remap_allocator() initializes allocator parameters from the
determined lowmem address and per-node offsets.
4. Actual remap happens.
There is no reason for the lowmem remap area to be reserved as a
single contiguous area at one go. They don't interact with each other
and the memblock allocator will put them side-by-side anyway.
This patch breaks up the single lowmem address reservation and put
per-node lowmem address reservation into init_alloc_remap() and
initializes allocator parameters directly in the function as all the
addresses are determined there. This merges steps 2 and 3 into 1.
While at it, remove now largely irrelevant comments in
init_alloc_remap().
This change causes the following behavior changes.
* Remap lowmem areas are allocated in smaller per-node chunks.
* Remap lowmem area reservation failure fail future remap allocations
instead of panicking.
* Remap allocator initialization is less verbose.
Signed-off-by: Tejun Heo <tj@...nel.org>
Link: http://lkml.kernel.org/r/1301955840-7246-10-git-send-email-tj@kernel.org
Acked-by: Yinghai Lu <yinghai@...nel.org>
Cc: David Rientjes <rientjes@...gle.com>
Signed-off-by: H. Peter Anvin <hpa@...or.com>
---
arch/x86/mm/numa_32.c | 82 +++++++++++++++----------------------------------
1 files changed, 25 insertions(+), 57 deletions(-)
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index c127543..12bb34c 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -108,9 +108,6 @@ static unsigned long node_remap_size[MAX_NUMNODES];
static void *node_remap_start_vaddr[MAX_NUMNODES];
void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
-static unsigned long kva_start_pfn;
-static unsigned long kva_pages;
-
int __cpuinit numa_cpu_node(int cpu)
{
return apic->x86_32_numa_cpu_node(cpu);
@@ -266,7 +263,8 @@ void resume_map_numa_kva(pgd_t *pgd_base)
static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
{
unsigned long size;
- u64 node_pa;
+ u64 node_pa, remap_pa;
+ void *remap_va;
/*
* The acpi/srat node info can show hot-add memroy zones where
@@ -287,6 +285,7 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
size += ALIGN(sizeof(pg_data_t), PAGE_SIZE);
size = ALIGN(size, LARGE_PAGE_BYTES);
+ /* allocate node memory and the lowmem remap area */
node_pa = memblock_find_in_range(node_start_pfn[nid] << PAGE_SHIFT,
(u64)node_end_pfn[nid] << PAGE_SHIFT,
size, LARGE_PAGE_BYTES);
@@ -295,45 +294,35 @@ static __init unsigned long init_alloc_remap(int nid, unsigned long offset)
size, nid);
return 0;
}
+ memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+
+ remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
+ max_low_pfn << PAGE_SHIFT,
+ size, LARGE_PAGE_BYTES);
+ if (remap_pa == MEMBLOCK_ERROR) {
+ pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n",
+ size, nid);
+ memblock_x86_free_range(node_pa, node_pa + size);
+ return 0;
+ }
+ memblock_x86_reserve_range(remap_pa, remap_pa + size, "KVA PG");
+ remap_va = phys_to_virt(remap_pa);
+ /* initialize remap allocator parameters */
+ node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
node_remap_size[nid] = size >> PAGE_SHIFT;
node_remap_offset[nid] = offset;
- printk(KERN_DEBUG "Reserving %ld pages of KVA for lmem_map of node %d at %llx\n",
- size >> PAGE_SHIFT, nid, node_pa >> PAGE_SHIFT);
- /*
- * prevent kva address below max_low_pfn want it on system
- * with less memory later.
- * layout will be: KVA address , KVA RAM
- *
- * we are supposed to only record the one less then
- * max_low_pfn but we could have some hole in high memory,
- * and it will only check page_is_ram(pfn) &&
- * !page_is_reserved_early(pfn) to decide to use it as free.
- * So memblock_x86_reserve_range here, hope we don't run out
- * of that array
- */
- memblock_x86_reserve_range(node_pa, node_pa + size, "KVA RAM");
+ node_remap_start_vaddr[nid] = remap_va;
+ node_remap_end_vaddr[nid] = remap_va + size;
+ node_remap_alloc_vaddr[nid] = remap_va + ALIGN(sizeof(pg_data_t), PAGE_SIZE);
- node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT;
+ printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n",
+ nid, node_pa, node_pa + size, remap_va, remap_va + size);
return size >> PAGE_SHIFT;
}
-static void init_remap_allocator(int nid)
-{
- node_remap_start_vaddr[nid] = pfn_to_kaddr(
- kva_start_pfn + node_remap_offset[nid]);
- node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
- (node_remap_size[nid] * PAGE_SIZE);
- node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] +
- ALIGN(sizeof(pg_data_t), PAGE_SIZE);
-
- printk(KERN_DEBUG "node %d will remap to vaddr %08lx - %08lx\n", nid,
- (ulong) node_remap_start_vaddr[nid],
- (ulong) node_remap_end_vaddr[nid]);
-}
-
void __init initmem_init(void)
{
unsigned long reserve_pages = 0;
@@ -352,25 +341,7 @@ void __init initmem_init(void)
for_each_online_node(nid)
reserve_pages += init_alloc_remap(nid, reserve_pages);
- kva_pages = roundup(reserve_pages, PTRS_PER_PTE);
- printk(KERN_INFO "Reserving total of %lx pages for numa KVA remap\n",
- reserve_pages);
-
- kva_start_pfn = memblock_find_in_range(min_low_pfn << PAGE_SHIFT,
- max_low_pfn << PAGE_SHIFT,
- kva_pages << PAGE_SHIFT,
- PTRS_PER_PTE << PAGE_SHIFT) >> PAGE_SHIFT;
- if (kva_start_pfn == MEMBLOCK_ERROR)
- panic("Can not get kva space\n");
-
- printk(KERN_INFO "kva_start_pfn ~ %lx max_low_pfn ~ %lx\n",
- kva_start_pfn, max_low_pfn);
- printk(KERN_INFO "max_pfn = %lx\n", max_pfn);
-
- /* avoid clash with initrd */
- memblock_x86_reserve_range(kva_start_pfn<<PAGE_SHIFT,
- (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
- "KVA PG");
+
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > max_low_pfn)
@@ -390,11 +361,8 @@ void __init initmem_init(void)
printk(KERN_DEBUG "Low memory ends at vaddr %08lx\n",
(ulong) pfn_to_kaddr(max_low_pfn));
- for_each_online_node(nid) {
- init_remap_allocator(nid);
-
+ for_each_online_node(nid)
allocate_pgdat(nid);
- }
remap_numa_kva();
printk(KERN_DEBUG "High memory starts at vaddr %08lx\n",
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists