lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Date:	Tue, 06 Jul 2010 16:09:18 +0800
From:	Haicheng Li <haicheng.li@...ux.intel.com>
To:	"H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...hat.com>,
	Thomas Gleixner <tglx@...utronix.de>
CC:	"ak@...ux.intel.com" <ak@...ux.intel.com>,
	Wu Fengguang <fengguang.wu@...el.com>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: [BUGFIX][PATCH 1/2] x86, mem-hotplug: separate x86_64 vmalloc_sync_all()
 into separate functions

Ingo & Peter,

These two patches are critical fixes for memory-hotplug on servers with large hotpluggable memory areas.

The bug is that when memory hotplug-adding happens for a large enough area that a new PGD entry is 
needed for the direct mapping, the PGDs of other processes would not get updated. This leads to some 
CPUs oopsing when they have to access the unmapped areas, e.g. onlining CPUs on the new added node.

Pls. apply if possible. Thanks!

---
 From 04d9fc860db40f15ad629f8b341ff84b83a00a8d Mon Sep 17 00:00:00 2001
From: Haicheng Li <haicheng.li@...ux.intel.com>
Date: Wed, 19 May 2010 17:42:14 +0800
Subject: [PATCH] x86, mem-hotplug: separate x86_64 vmalloc_sync_all() into separate functions

No behavior change here.

Move some of vmalloc_sync_all() code into a new function
sync_global_pgds() that will be useful for memory hotplug.

Signed-off-by: Haicheng Li <haicheng.li@...ux.intel.com>
Reviewed-by: Wu Fengguang <fengguang.wu@...el.com>
CC: Andi Kleen <ak@...ux.intel.com>
---
  arch/x86/include/asm/pgtable_64.h |    2 ++
  arch/x86/mm/fault.c               |   24 +-----------------------
  arch/x86/mm/init_64.c             |   30 ++++++++++++++++++++++++++++++
  3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 181be52..317026d 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -102,6 +102,8 @@ static inline void native_pgd_clear(pgd_t *pgd)
  	native_set_pgd(pgd, native_make_pgd(0));
  }

+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
  /*
   * Conversion functions: convert a page and protection to a page entry,
   * and a page entry and page directory to the page they refer to.
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index f627779..7ae0897 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -326,29 +326,7 @@ out:

  void vmalloc_sync_all(void)
  {
-	unsigned long address;
-
-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
-	     address += PGDIR_SIZE) {
-
-		const pgd_t *pgd_ref = pgd_offset_k(address);
-		unsigned long flags;
-		struct page *page;
-
-		if (pgd_none(*pgd_ref))
-			continue;
-
-		spin_lock_irqsave(&pgd_lock, flags);
-		list_for_each_entry(page, &pgd_list, lru) {
-			pgd_t *pgd;
-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
-			if (pgd_none(*pgd))
-				set_pgd(pgd, *pgd_ref);
-			else
-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
-		}
-		spin_unlock_irqrestore(&pgd_lock, flags);
-	}
+	sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
  }

  /*
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ee41bba..b0c3df0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -98,6 +98,36 @@ static int __init nonx32_setup(char *str)
  __setup("noexec32=", nonx32_setup);

  /*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+	unsigned long address;
+
+	for (address = start; address <= end; address += PGDIR_SIZE) {
+		const pgd_t *pgd_ref = pgd_offset_k(address);
+		unsigned long flags;
+		struct page *page;
+
+		if (pgd_none(*pgd_ref))
+			continue;
+
+		spin_lock_irqsave(&pgd_lock, flags);
+		list_for_each_entry(page, &pgd_list, lru) {
+			pgd_t *pgd;
+			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+			if (pgd_none(*pgd))
+				set_pgd(pgd, *pgd_ref);
+			else
+				BUG_ON(pgd_page_vaddr(*pgd)
+					 != pgd_page_vaddr(*pgd_ref));
+		}
+		spin_unlock_irqrestore(&pgd_lock, flags);
+	}
+}
+
+/*
   * NOTE: This function is marked __ref because it calls __init function
   * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
   */
-- 
1.5.6.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ