linux-kernel - [PATCH 17/33] x86_64: Separate normal memory map initialization from the hotplug case

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <11544302381069-git-send-email-ebiederm@xmission.com>
Date:	Tue,  1 Aug 2006 05:03:32 -0600
From:	"Eric W. Biederman" <ebiederm@...ssion.com>
To:	<fastboot@...l.org>
Cc:	<linux-kernel@...r.kernel.org>, Horms <horms@...ge.net.au>,
	Jan Kratochvil <lace@...kratochvil.net>,
	"H. Peter Anvin" <hpa@...or.com>,
	Magnus Damm <magnus.damm@...il.com>,
	Vivek Goyal <vgoyal@...ibm.com>, Linda Wang <lwang@...hat.com>,
	"Eric W. Biederman" <ebiederm@...ssion.com>
Subject: [PATCH 17/33] x86_64: Separate normal memory map initialization from the hotplug case

Currently initializing the two memory maps are combining into one
set of functions with if(after_bootmem) tests scattered all over
to handle the semantic differences.  Just trying to think about
what is supposed to happen when and why makes my head hurt.

In one case we initialize a page but in another we don't because
it has been zeroed by the allocator.

In one case we have to map and unmap pages and in another we
don't because we have a mapping of the pages already.

In one case we care if a page table is partially initialized
and in the other we don't.

It is ugly to reason through and makes maintenance difficult,
because the rules are different in the two cases.  So I have
separated these code paths so the can evolve separately.  I
think code duplication is the lesser of two evils here.

Signed-off-by: Eric W. Biederman <ebiederm@...ssion.com>
---
 arch/x86_64/mm/init.c |  147 +++++++++++++++++++++++++++++++++----------------
 1 files changed, 98 insertions(+), 49 deletions(-)

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index d14fb2d..0522c1c 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -179,19 +179,13 @@ static  struct temp_map { 
 	{}
 }; 
 
-static __meminit void *alloc_low_page(int *index, unsigned long *phys)
+static __init void *alloc_low_page(int *index, unsigned long *phys)
 { 
 	struct temp_map *ti;
 	int i; 
 	unsigned long pfn = table_end++, paddr; 
 	void *adr;
 
-	if (after_bootmem) {
-		adr = (void *)get_zeroed_page(GFP_ATOMIC);
-		*phys = __pa(adr);
-		return adr;
-	}
-
 	if (pfn >= end_pfn) 
 		panic("alloc_low_page: ran out of memory"); 
 	for (i = 0; temp_mappings[i].allocated; i++) {
@@ -210,13 +204,10 @@ static __meminit void *alloc_low_page(in
 	return adr; 
 } 
 
-static __meminit void unmap_low_page(int i)
+static __init void unmap_low_page(int i)
 { 
 	struct temp_map *ti;
 
-	if (after_bootmem)
-		return;
-
 	ti = &temp_mappings[i];
 	set_pmd(ti->pmd, __pmd(0));
 	ti->allocated = 0; 
@@ -249,7 +240,7 @@ __init void early_iounmap(void *addr, un
 	__flush_tlb();
 }
 
-static void __meminit
+static void __init
 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
 {
 	int i;
@@ -258,9 +249,8 @@ phys_pmd_init(pmd_t *pmd, unsigned long 
 		unsigned long entry;
 
 		if (address >= end) {
-			if (!after_bootmem)
-				for (; i < PTRS_PER_PMD; i++, pmd++)
-					set_pmd(pmd, __pmd(0));
+			for (; i < PTRS_PER_PMD; i++, pmd++)
+				set_pmd(pmd, __pmd(0));
 			break;
 		}
 		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
@@ -269,30 +259,12 @@ phys_pmd_init(pmd_t *pmd, unsigned long 
 	}
 }
 
-static void __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
-{
-	pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
-
-	if (pmd_none(*pmd)) {
-		spin_lock(&init_mm.page_table_lock);
-		phys_pmd_init(pmd, address, end);
-		spin_unlock(&init_mm.page_table_lock);
-		__flush_tlb_all();
-	}
-}
-
-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
 { 
 	long i = pud_index(address);
 
 	pud = pud + i;
 
-	if (after_bootmem && pud_val(*pud)) {
-		phys_pmd_update(pud, address, end);
-		return;
-	}
-
 	for (; i < PTRS_PER_PUD; pud++, i++) {
 		int map; 
 		unsigned long paddr, pmd_phys;
@@ -302,16 +274,14 @@ static void __meminit phys_pud_init(pud_
 		if (paddr >= end)
 			break;
 
-		if (!after_bootmem && !e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
+		if (!e820_any_mapped(paddr, paddr+PUD_SIZE, 0)) {
 			set_pud(pud, __pud(0)); 
 			continue;
 		} 
 
 		pmd = alloc_low_page(&map, &pmd_phys);
-		spin_lock(&init_mm.page_table_lock);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
 		phys_pmd_init(pmd, paddr, end);
-		spin_unlock(&init_mm.page_table_lock);
 		unmap_low_page(map);
 	}
 	__flush_tlb();
@@ -345,7 +315,7 @@ static void __init find_early_table_spac
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
    physical memory. To access them they are temporarily mapped. */
-void __meminit init_memory_mapping(unsigned long start, unsigned long end)
+void __init init_memory_mapping(unsigned long start, unsigned long end)
 { 
 	unsigned long next; 
 
@@ -357,8 +327,7 @@ void __meminit init_memory_mapping(unsig
 	 * mapped.  Unfortunately this is done currently before the nodes are 
 	 * discovered.
 	 */
-	if (!after_bootmem)
-		find_early_table_space(end);
+	find_early_table_space(end);
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -369,22 +338,17 @@ void __meminit init_memory_mapping(unsig
 		pgd_t *pgd = pgd_offset_k(start);
 		pud_t *pud;
 
-		if (after_bootmem)
-			pud = pud_offset(pgd, start & PGDIR_MASK);
-		else
-			pud = alloc_low_page(&map, &pud_phys);
+		pud = alloc_low_page(&map, &pud_phys);
 
 		next = start + PGDIR_SIZE;
 		if (next > end) 
 			next = end; 
 		phys_pud_init(pud, __pa(start), __pa(next));
-		if (!after_bootmem)
-			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+		set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
 		unmap_low_page(map);   
 	} 
 
-	if (!after_bootmem)
-		asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
+	asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
 	__flush_tlb_all();
 }
 
@@ -529,6 +493,91 @@ int memory_add_physaddr_to_nid(u64 start
 }
 #endif
 
+static void
+late_phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
+{
+	int i;
+
+	for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
+		unsigned long entry;
+
+		if (address >= end)
+			break;
+		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry &= __supported_pte_mask;
+		set_pmd(pmd, __pmd(entry));
+	}
+}
+
+static void
+late_phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+{
+	pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
+
+	if (pmd_none(*pmd)) {
+		spin_lock(&init_mm.page_table_lock);
+		late_phys_pmd_init(pmd, address, end);
+		spin_unlock(&init_mm.page_table_lock);
+		__flush_tlb_all();
+	}
+}
+
+static void late_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+	long i = pud_index(address);
+
+	pud = pud + i;
+
+	if (pud_val(*pud)) {
+		late_phys_pmd_update(pud, address, end);
+		return;
+	}
+
+	for (; i < PTR_PER_PUD; pud++, i++) {
+		unsigned long paddr, pmd_phys;
+		pmd_t *pmd;
+
+		paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+		if (paddr >= end)
+			break;
+
+		pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+		phys_pmd = __pa(pmd);
+
+		spin_lock(&init_mm.page_table_lock);
+		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
+		late_phys_pmd_init(pmd, paddr, end);
+		spin_unlock(&init_mm.page_table_lock);
+	}
+}
+
+/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs after bootmem is initialized and gets pages normally.
+ */
+static void late_init_memory_mapping(unsigned long start, unsigned long end)
+{
+	unsigned long next;
+
+	Dprintk("add_memory_mapping\n");
+
+	start = (unsigned long)__va(start);
+	end = (unsigned long)__va(end);
+
+	for (; start < end; start = next) {
+		unsigned long pud_phys;
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		pud = pud_offset(pgd, start & PGDIR_MASK);
+
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+		late_phys_pud_init(pud, __pa(start), __pa(next));
+	}
+	__flush_tlb_all();
+}
+
 /*
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
@@ -545,7 +594,7 @@ int arch_add_memory(int nid, u64 start, 
 	if (ret)
 		goto error;
 
-	init_memory_mapping(start, (start + size -1));
+	late_init_memory_mapping(start, (start + size -1));
 
 	return ret;
 error:
-- 
1.4.2.rc2.g5209e

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/