lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon,  4 Jun 2012 11:17:52 -0700
From:	"Fenghua Yu" <fenghua.yu@...el.com>
To:	"Ingo Molnar" <mingo@...e.hu>,
	"Thomas Gleixner" <tglx@...utronix.de>,
	"H Peter Anvin" <hpa@...or.com>,
	"Suresh B Siddha" <suresh.b.siddha@...el.com>,
	"Tony Luck" <tony.luck@...el.com>,
	"Asit K Mallick" <asit.k.mallick@...el.com>,
	"Arjan Dan De Ven" <arjan@...ux.intel.com>
Cc:	"linux-kernel" <linux-kernel@...r.kernel.org>,
	"x86" <x86@...nel.org>, "linux-pm" <linux-pm@...r.kernel.org>,
	"Fenghua Yu" <fenghua.yu@...el.com>
Subject: [PATCH 3/6] x86/smpboot.c: Wake up offline CPU via mwait or nmi

From: Fenghua Yu <fenghua.yu@...el.com>

wakeup_secondary_cpu_via_soft() is defined to wake up offline CPU via mwait if
the CPU is in mwait or via nmi if the CPU is in hlt.

A CPU boots up by INIT, INIT, STARTUP sequence when it boots up for the first
time during boot time or hot plug.

Signed-off-by: Fenghua Yu <fenghua.yu@...el.com>
---
 arch/x86/include/asm/apic.h |    5 +-
 arch/x86/kernel/smpboot.c   |  187 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 164 insertions(+), 28 deletions(-)

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index eaff479..cad00b1 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -425,7 +425,10 @@ extern struct apic *__apicdrivers[], *__apicdrivers_end[];
 #ifdef CONFIG_SMP
 extern atomic_t init_deasserted;
 extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
-#endif
+extern int wakeup_secondary_cpu_via_soft(int apicid, unsigned long start_eip);
+#else /* CONFIG_SMP */
+#define wakeup_secondary_cpu_via_soft	NULL
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fd019d7..109df30 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -472,13 +472,8 @@ void __inquire_remote_apic(int apicid)
 	}
 }
 
-/*
- * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
- * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
- * won't ... remember to clear down the APIC, etc later.
- */
-int __cpuinit
-wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
+static int __cpuinit
+_wakeup_secondary_cpu_via_nmi(int apicid, int dest_mode)
 {
 	unsigned long send_status, accept_status = 0;
 	int maxlvt;
@@ -486,7 +481,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 	/* Target chip */
 	/* Boot on the stack */
 	/* Kick the second */
-	apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
+	apic_icr_write(APIC_DM_NMI | dest_mode, apicid);
 
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
@@ -511,6 +506,47 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 	return (send_status | accept_status);
 }
 
+/*
+ * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
+ * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
+ * won't ... remember to clear down the APIC, etc later.
+ */
+int __cpuinit
+wakeup_secondary_cpu_via_nmi_phys(int phys_apicid, unsigned long start_eip)
+{
+	return _wakeup_secondary_cpu_via_nmi(phys_apicid, APIC_DEST_PHYSICAL);
+}
+
+int __cpuinit
+wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
+{
+	return _wakeup_secondary_cpu_via_nmi(logical_apicid, APIC_DEST_LOGICAL);
+}
+
+DEFINE_PER_CPU(int, cpu_dead) = { 0 };
+#define CPU_DEAD_TRIGGER	1
+#define CPU_DEAD_MWAIT		2
+#define CPU_DEAD_HLT		4
+
+static int wakeup_secondary_cpu_via_mwait(int cpu)
+{
+	per_cpu(cpu_dead, cpu) |= CPU_DEAD_TRIGGER;
+	return 0;
+}
+
+static int wakeup_cpu_nmi(unsigned int cmd, struct pt_regs *regs)
+{
+	int cpu = smp_processor_id();
+	int *cpu_dead_ptr;
+
+	cpu_dead_ptr = &per_cpu(cpu_dead, cpu);
+	if (!cpu_online(cpu) && (*cpu_dead_ptr & CPU_DEAD_HLT) &&
+	    (*cpu_dead_ptr & CPU_DEAD_TRIGGER))
+		return NMI_HANDLED;
+
+	return NMI_DONE;
+}
+
 static int __cpuinit
 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
@@ -626,6 +662,52 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 	return (send_status | accept_status);
 }
 
+/*
+ * Kick a cpu.
+ *
+ * If the CPU is in mwait, wake it up by mwait method. Otherwise, if the CPU is
+ * in halt, wake it up by NMI. If none of above exists, wake it up by INIT boot
+ * APIC message.
+ *
+ * When the CPU first time boots up, i.e. cpu_dead is 0, it's waken up by INIT
+ * boot APIC message.
+ *
+ * At this point, the CPU should be in a fixed dead state. So we don't consider
+ * racy condition here.
+ */
+int __cpuinit
+wakeup_secondary_cpu_via_soft(int apicid, unsigned long start_eip)
+{
+	int cpu;
+	int boot_error = 0;
+	/* start_ip had better be page-aligned! */
+	unsigned long start_ip = real_mode_header->trampoline_start;
+
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		if (apicid == apic->cpu_present_to_apicid(cpu))
+			break;
+
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+
+	if (per_cpu(cpu_dead, cpu) & CPU_DEAD_MWAIT) {
+		boot_error = wakeup_secondary_cpu_via_mwait(cpu);
+	} else if (per_cpu(cpu_dead, cpu) & CPU_DEAD_HLT) {
+		int *cpu_dead_ptr;
+
+		cpu_dead_ptr = &per_cpu(cpu_dead, cpu);
+		*cpu_dead_ptr |= CPU_DEAD_TRIGGER;
+
+		boot_error = wakeup_secondary_cpu_via_nmi_phys(apicid,
+								start_ip);
+		if (boot_error)
+			*cpu_dead_ptr &= ~CPU_DEAD_TRIGGER;
+	} else
+		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
+
+	return boot_error;
+}
+
 /* reduce the number of lines printed when booting a large cpu count system */
 static void __cpuinit announce_cpu(int cpu, int apicid)
 {
@@ -778,6 +860,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 		 */
 		smpboot_restore_warm_reset_vector();
 	}
+
 	return boot_error;
 }
 
@@ -977,6 +1060,20 @@ static void __init smp_cpu_index_default(void)
 	}
 }
 
+static bool mwait_supported(void)
+{
+	struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
+
+	if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
+		return false;
+	if (!this_cpu_has(X86_FEATURE_CLFLSH))
+		return false;
+	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
+		return false;
+
+	return true;
+}
+
 /*
  * Prepare for SMP bootup.  The MP table or ACPI has been read
  * earlier.  Just do some sanity checking here and enable APIC mode.
@@ -1051,6 +1148,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 		uv_system_init();
 
 	set_mtrr_aps_delayed_init();
+
+#ifdef CONFIG_HOTPLUG_CPU
+	if (!mwait_supported())
+		register_nmi_handler(NMI_LOCAL, wakeup_cpu_nmi, 0, "wake_cpu");
+#endif
 out:
 	preempt_enable();
 }
@@ -1111,6 +1213,12 @@ static int __init _setup_possible_cpus(char *str)
 }
 early_param("possible_cpus", _setup_possible_cpus);
 
+static int __init setup_wakeup_cpu_via_init(char *str)
+{
+	apic->wakeup_secondary_cpu = NULL;
+	return 0;
+}
+__setup("wakeup_cpu_via_init", setup_wakeup_cpu_via_init);
 
 /*
  * cpu_possible_mask should be static, it cannot change as cpu's
@@ -1286,6 +1394,28 @@ void play_dead_common(void)
 	local_irq_disable();
 }
 
+static bool wakeup_cpu(int *trigger)
+{
+	unsigned int timeout;
+
+	/*
+	 * Wait up to 1 seconds to check if CPU wakeup trigger is set in
+	 * cpu_dead by either memory write or NMI.
+	 * If there is no CPU wakeup trigger, go back to sleep.
+	 */
+	for (timeout = 0; timeout < 1000000; timeout++) {
+		/*
+		 * Check if CPU0 wakeup NMI is issued and handled.
+		 */
+		if (*trigger & CPU_DEAD_TRIGGER)
+			return true;
+
+		udelay(1);
+	}
+
+	return false;
+}
+
 /*
  * We need to flush the caches before going to sleep, lest we have
  * dirty data in our caches when we come back up.
@@ -1296,14 +1426,9 @@ static inline void mwait_play_dead(void)
 	unsigned int highest_cstate = 0;
 	unsigned int highest_subcstate = 0;
 	int i;
-	void *mwait_ptr;
-	struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
+	int *cpu_dead_ptr;
 
-	if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
-		return;
-	if (!this_cpu_has(X86_FEATURE_CLFLSH))
-		return;
-	if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
+	if (!mwait_supported())
 		return;
 
 	eax = CPUID_MWAIT_LEAF;
@@ -1328,16 +1453,10 @@ static inline void mwait_play_dead(void)
 			(highest_subcstate - 1);
 	}
 
-	/*
-	 * This should be a memory location in a cache line which is
-	 * unlikely to be touched by other processors.  The actual
-	 * content is immaterial as it is not actually modified in any way.
-	 */
-	mwait_ptr = &current_thread_info()->flags;
-
-	wbinvd();
-
+	cpu_dead_ptr = &per_cpu(cpu_dead, smp_processor_id());
+	*cpu_dead_ptr = CPU_DEAD_MWAIT;
 	while (1) {
+		*cpu_dead_ptr &= ~CPU_DEAD_TRIGGER;
 		/*
 		 * The CLFLUSH is a workaround for erratum AAI65 for
 		 * the Xeon 7400 series.  It's not clear it is actually
@@ -1345,20 +1464,34 @@ static inline void mwait_play_dead(void)
 		 * The WBINVD is insufficient due to the spurious-wakeup
 		 * case where we return around the loop.
 		 */
-		clflush(mwait_ptr);
-		__monitor(mwait_ptr, 0, 0);
+		wbinvd();
+		clflush(cpu_dead_ptr);
+		__monitor(cpu_dead_ptr, 0, 0);
 		mb();
-		__mwait(eax, 0);
+		if ((*cpu_dead_ptr & CPU_DEAD_TRIGGER) == 0)
+			__mwait(eax, 0);
+
+		/* Waken up by another CPU. */
+		if (wakeup_cpu(cpu_dead_ptr))
+			start_cpu();
 	}
 }
 
 static inline void hlt_play_dead(void)
 {
+	int *cpu_dead_ptr;
+
 	if (__this_cpu_read(cpu_info.x86) >= 4)
 		wbinvd();
 
+	cpu_dead_ptr = &per_cpu(cpu_dead, smp_processor_id());
+	*cpu_dead_ptr = CPU_DEAD_HLT;
 	while (1) {
+		*cpu_dead_ptr &= ~CPU_DEAD_TRIGGER;
 		native_halt();
+		/* If NMI wants to wake up me, I'll start. */
+		if (wakeup_cpu(cpu_dead_ptr))
+			start_cpu();
 	}
 }
 
-- 
1.6.0.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ