lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 07 Aug 2012 15:29:27 +0800
From:	"zhenzhong.duan" <zhenzhong.duan@...cle.com>
To:	tglx@...utronix.de, mingo@...hat.com, hpa@...or.com, x86@...nel.org
CC:	linux-kernel@...r.kernel.org,
	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>,
	Feng Jin <joe.jin@...cle.com>
Subject: [PATCH] Parallelize mtrr init between cpus

Current code serialize mtrr init with set_atomicity_lock.
Mtrr init is quite slow when we bootup on a hvm with large mem, vcpus
and pci passthroughed devices(eg. 24 vcpus + 90G mem).
It took about ~30 mins to bootup, after patch, it took ~2 min.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@...cle.com>
---
 arch/x86/kernel/cpu/mtrr/generic.c |   57 +++++++++++++++++-------------------
 1 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index e9fe907..a1468b7 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -335,8 +335,9 @@ print_fixed(unsigned base, unsigned step, const mtrr_type *types)
 	}
 }
 
-static void prepare_set(void);
-static void post_set(void);
+static void prepare_set(unsigned long *cr4_p, u32 *deftype_lo_p,
+			u32 *deftype_hi_p);
+static void post_set(unsigned long cr4, u32 deftype_lo, u32 deftype_hi);
 
 static void __init print_mtrr_state(void)
 {
@@ -385,7 +386,8 @@ static void __init print_mtrr_state(void)
 void __init get_mtrr_state(void)
 {
 	struct mtrr_var_range *vrs;
-	unsigned long flags;
+	unsigned long flags, cr4;
+	u32 deftype_lo, deftype_hi;
 	unsigned lo, dummy;
 	unsigned int i;
 
@@ -420,11 +422,11 @@ void __init get_mtrr_state(void)
 
 	/* PAT setup for BP. We need to go through sync steps here */
 	local_irq_save(flags);
-	prepare_set();
+	prepare_set(&cr4, &deftype_lo, &deftype_hi);
 
 	pat_init();
 
-	post_set();
+	post_set(cr4, deftype_lo, deftype_hi);
 	local_irq_restore(flags);
 }
 
@@ -610,15 +612,13 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
 	return changed;
 }
 
-static u32 deftype_lo, deftype_hi;
-
 /**
  * set_mtrr_state - Set the MTRR state for this CPU.
  *
  * NOTE: The CPU must already be in a safe state for MTRR changes.
  * RETURNS: 0 if no changes made, else a mask indicating what was changed.
  */
-static unsigned long set_mtrr_state(void)
+static unsigned long set_mtrr_state(u32 *deftype_lo_p, u32 *deftype_hi_p)
 {
 	unsigned long change_mask = 0;
 	unsigned int i;
@@ -635,10 +635,10 @@ static unsigned long set_mtrr_state(void)
 	 * Set_mtrr_restore restores the old value of MTRRdefType,
 	 * so to set it we fiddle with the saved value:
 	 */
-	if ((deftype_lo & 0xff) != mtrr_state.def_type
-	    || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
+	if ((*deftype_lo_p & 0xff) != mtrr_state.def_type
+	    || ((*deftype_lo_p & 0xc00) >> 10) != mtrr_state.enabled) {
 
-		deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
+		*deftype_lo_p = (*deftype_lo_p & ~0xcff) | mtrr_state.def_type |
 			     (mtrr_state.enabled << 10);
 		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
 	}
@@ -647,9 +647,6 @@ static unsigned long set_mtrr_state(void)
 }
 
 
-static unsigned long cr4;
-static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
-
 /*
  * Since we are disabling the cache don't allow any interrupts,
  * they would run extremely slow and would only increase the pain.
@@ -657,7 +654,8 @@ static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
  * The caller must ensure that local interrupts are disabled and
  * are reenabled after post_set() has been called.
  */
-static void prepare_set(void) __acquires(set_atomicity_lock)
+static void prepare_set(unsigned long *cr4_p, u32 *deftype_lo_p,
+			u32 *deftype_hi_p)
 {
 	unsigned long cr0;
 
@@ -668,8 +666,6 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 	 * changes to the way the kernel boots
 	 */
 
-	raw_spin_lock(&set_atomicity_lock);
-
 	/* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
 	cr0 = read_cr0() | X86_CR0_CD;
 	write_cr0(cr0);
@@ -677,22 +673,22 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 
 	/* Save value of CR4 and clear Page Global Enable (bit 7) */
 	if (cpu_has_pge) {
-		cr4 = read_cr4();
-		write_cr4(cr4 & ~X86_CR4_PGE);
+		*cr4_p = read_cr4();
+		write_cr4(*cr4_p & ~X86_CR4_PGE);
 	}
 
 	/* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
 	__flush_tlb();
 
 	/* Save MTRR state */
-	rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
+	rdmsr(MSR_MTRRdefType, *deftype_lo_p, *deftype_hi_p);
 
 	/* Disable MTRRs, and set the default type to uncached */
-	mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
+	mtrr_wrmsr(MSR_MTRRdefType, *deftype_lo_p & ~0xcff, *deftype_hi_p);
 	wbinvd();
 }
 
-static void post_set(void) __releases(set_atomicity_lock)
+static void post_set(unsigned long cr4, u32 deftype_lo, u32 deftype_hi)
 {
 	/* Flush TLBs (no need to flush caches - they are disabled) */
 	__flush_tlb();
@@ -706,24 +702,24 @@ static void post_set(void) __releases(set_atomicity_lock)
 	/* Restore value of CR4 */
 	if (cpu_has_pge)
 		write_cr4(cr4);
-	raw_spin_unlock(&set_atomicity_lock);
 }
 
 static void generic_set_all(void)
 {
 	unsigned long mask, count;
-	unsigned long flags;
+	unsigned long flags, cr4;
+	u32 deftype_lo, deftype_hi;
 
 	local_irq_save(flags);
-	prepare_set();
+	prepare_set(&cr4, &deftype_lo, &deftype_hi);
 
 	/* Actually set the state */
-	mask = set_mtrr_state();
+	mask = set_mtrr_state(&deftype_lo, &deftype_hi);
 
 	/* also set PAT */
 	pat_init();
 
-	post_set();
+	post_set(cr4, deftype_lo, deftype_hi);
 	local_irq_restore(flags);
 
 	/* Use the atomic bitops to update the global mask */
@@ -748,13 +744,14 @@ static void generic_set_all(void)
 static void generic_set_mtrr(unsigned int reg, unsigned long base,
 			     unsigned long size, mtrr_type type)
 {
-	unsigned long flags;
+	unsigned long flags, cr4;
+	u32 deftype_lo, deftype_hi;
 	struct mtrr_var_range *vr;
 
 	vr = &mtrr_state.var_ranges[reg];
 
 	local_irq_save(flags);
-	prepare_set();
+	prepare_set(&cr4, &deftype_lo, &deftype_hi);
 
 	if (size == 0) {
 		/*
@@ -773,7 +770,7 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
 		mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi);
 	}
 
-	post_set();
+	post_set(cr4, deftype_lo, deftype_hi);
 	local_irq_restore(flags);
 }
 
-- 
1.7.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ