[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230512205255.928917242@linutronix.de>
Date: Fri, 12 May 2023 23:07:11 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: x86@...nel.org, David Woodhouse <dwmw2@...radead.org>,
Andrew Cooper <andrew.cooper3@...rix.com>,
Brian Gerst <brgerst@...il.com>,
Arjan van de Veen <arjan@...ux.intel.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Paul McKenney <paulmck@...nel.org>,
Tom Lendacky <thomas.lendacky@....com>,
Sean Christopherson <seanjc@...gle.com>,
Oleksandr Natalenko <oleksandr@...alenko.name>,
Paul Menzel <pmenzel@...gen.mpg.de>,
"Guilherme G. Piccoli" <gpiccoli@...lia.com>,
Piotr Gorski <lucjan.lucjanov@...il.com>,
Usama Arif <usama.arif@...edance.com>,
Juergen Gross <jgross@...e.com>,
Boris Ostrovsky <boris.ostrovsky@...cle.com>,
xen-devel@...ts.xenproject.org,
Russell King <linux@...linux.org.uk>,
Arnd Bergmann <arnd@...db.de>,
linux-arm-kernel@...ts.infradead.org,
Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>, Guo Ren <guoren@...nel.org>,
linux-csky@...r.kernel.org,
Thomas Bogendoerfer <tsbogend@...ha.franken.de>,
linux-mips@...r.kernel.org,
"James E.J. Bottomley" <James.Bottomley@...senPartnership.com>,
Helge Deller <deller@....de>, linux-parisc@...r.kernel.org,
Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>,
linux-riscv@...ts.infradead.org,
Mark Rutland <mark.rutland@....com>,
Sabin Rapan <sabrapan@...zon.com>,
"Michael Kelley (LINUX)" <mikelley@...rosoft.com>,
Ross Philipson <ross.philipson@...cle.com>,
David Woodhouse <dwmw@...zon.co.uk>
Subject: [patch V4 09/37] x86/smpboot: Split up native_cpu_up() into separate
phases and document them
From: David Woodhouse <dwmw@...zon.co.uk>
There are four logical parts to what native_cpu_up() does on the BSP (or
on the controlling CPU for a later hotplug):
1) Wake the AP by sending the INIT/SIPI/SIPI sequence.
2) Wait for the AP to make it as far as wait_for_master_cpu() which
sets that CPU's bit in cpu_initialized_mask, then sets the bit in
cpu_callout_mask to let the AP proceed through cpu_init().
3) Wait for the AP to finish cpu_init() and get as far as the
smp_callin() call, which sets that CPU's bit in cpu_callin_mask.
4) Perform the TSC synchronization and wait for the AP to actually
mark itself online in cpu_online_mask.
In preparation to allow these phases to operate in parallel on multiple
APs, split them out into separate functions and document the interactions
a little more clearly in both the BP and AP code paths.
No functional change intended.
Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Tested-by: Michael Kelley <mikelley@...rosoft.com>
---
arch/x86/kernel/smpboot.c | 184 +++++++++++++++++++++++++++++-----------------
1 file changed, 119 insertions(+), 65 deletions(-)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -193,6 +193,10 @@ static void smp_callin(void)
wmb();
+ /*
+ * This runs the AP through all the cpuhp states to its target
+ * state CPUHP_ONLINE.
+ */
notify_cpu_starting(cpuid);
/*
@@ -233,12 +237,28 @@ static void notrace start_secondary(void
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
+ /*
+ * Sync point with wait_cpu_initialized(). Before proceeding through
+ * cpu_init(), the AP will call wait_for_master_cpu() which sets its
+ * own bit in cpu_initialized_mask and then waits for the BSP to set
+ * its bit in cpu_callout_mask to release it.
+ */
cpu_init_secondary();
rcu_cpu_starting(raw_smp_processor_id());
x86_cpuinit.early_percpu_clock_init();
+
+ /*
+ * Sync point with wait_cpu_callin(). The AP doesn't wait here
+ * but just sets the bit to let the controlling CPU (BSP) know that
+ * it's got this far.
+ */
smp_callin();
- /* Check TSC synchronization with the control CPU: */
+ /*
+ * Check TSC synchronization with the control CPU, which will do
+ * its part of this from wait_cpu_online(), making it an implicit
+ * synchronization point.
+ */
check_tsc_sync_target();
/*
@@ -257,6 +277,7 @@ static void notrace start_secondary(void
* half valid vector space.
*/
lock_vector_lock();
+ /* Sync point with do_wait_cpu_online() */
set_cpu_online(smp_processor_id(), true);
lapic_online();
unlock_vector_lock();
@@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, stru
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
- * Returns zero if CPU booted OK, else error code from
+ * Returns zero if startup was successfully sent, else error code from
* ->wakeup_secondary_cpu.
*/
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
- /* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start;
- unsigned long boot_error = 0;
- unsigned long timeout;
-
#ifdef CONFIG_X86_64
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
if (apic->wakeup_secondary_cpu_64)
@@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int c
* - Use an INIT boot APIC message
*/
if (apic->wakeup_secondary_cpu_64)
- boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
+ return apic->wakeup_secondary_cpu_64(apicid, start_ip);
else if (apic->wakeup_secondary_cpu)
- boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
- else
- boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
+ return apic->wakeup_secondary_cpu(apicid, start_ip);
- if (!boot_error) {
- /*
- * Wait 10s total for first sign of life from AP
- */
- boot_error = -1;
- timeout = jiffies + 10*HZ;
- while (time_before(jiffies, timeout)) {
- if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
- /*
- * Tell AP to proceed with initialization
- */
- cpumask_set_cpu(cpu, cpu_callout_mask);
- boot_error = 0;
- break;
- }
- schedule();
- }
- }
+ return wakeup_secondary_cpu_via_init(apicid, start_ip);
+}
- if (!boot_error) {
- /*
- * Wait till AP completes initial initialization
- */
- while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
- /*
- * Allow other tasks to run while we wait for the
- * AP to come online. This also gives a chance
- * for the MTRR work(triggered by the AP coming online)
- * to be completed in the stop machine context.
- */
- schedule();
- }
- }
+static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
+{
+ unsigned long timeout;
- if (x86_platform.legacy.warm_reset) {
- /*
- * Cleanup possible dangling ends...
- */
- smpboot_restore_warm_reset_vector();
+ /*
+ * Wait up to 10s for the CPU to report in.
+ */
+ timeout = jiffies + 10*HZ;
+ while (time_before(jiffies, timeout)) {
+ if (cpumask_test_cpu(cpu, mask))
+ return 0;
+
+ schedule();
}
+ return -1;
+}
- return boot_error;
+/*
+ * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
+ * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
+ * to proceed. The AP will then proceed past setting its 'callin' bit
+ * and end up waiting in check_tsc_sync_target() until we reach
+ * do_wait_cpu_online() to tend to it.
+ */
+static int wait_cpu_initialized(unsigned int cpu)
+{
+ /*
+ * Wait for first sign of life from AP.
+ */
+ if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
+ return -1;
+
+ cpumask_set_cpu(cpu, cpu_callout_mask);
+ return 0;
}
-int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+/*
+ * Bringup step three: Wait for the target AP to reach smp_callin().
+ * The AP is not waiting for us here so we don't need to parallelise
+ * this step. Not entirely clear why we care about this, since we just
+ * proceed directly to TSC synchronization which is the next sync
+ * point with the AP anyway.
+ */
+static void wait_cpu_callin(unsigned int cpu)
+{
+ while (!cpumask_test_cpu(cpu, cpu_callin_mask))
+ schedule();
+}
+
+/*
+ * Bringup step four: Synchronize the TSC and wait for the target AP
+ * to reach set_cpu_online() in start_secondary().
+ */
+static void wait_cpu_online(unsigned int cpu)
{
- int apicid = apic->cpu_present_to_apicid(cpu);
unsigned long flags;
+
+ /*
+ * Check TSC synchronization with the AP (keep irqs disabled
+ * while doing so):
+ */
+ local_irq_save(flags);
+ check_tsc_sync_source(cpu);
+ local_irq_restore(flags);
+
+ /*
+ * Wait for the AP to mark itself online, so the core caller
+ * can drop sparse_irq_lock.
+ */
+ while (!cpu_online(cpu))
+ schedule();
+}
+
+static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
+{
+ int apicid = apic->cpu_present_to_apicid(cpu);
int err;
lockdep_assert_irqs_enabled();
@@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, stru
return err;
err = do_boot_cpu(apicid, cpu, tidle);
- if (err) {
+ if (err)
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
- return err;
- }
- /*
- * Check TSC synchronization with the AP (keep irqs disabled
- * while doing so):
- */
- local_irq_save(flags);
- check_tsc_sync_source(cpu);
- local_irq_restore(flags);
+ return err;
+}
- while (!cpu_online(cpu)) {
- cpu_relax();
- touch_nmi_watchdog();
- }
+int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+ int ret;
- return 0;
+ ret = native_kick_ap(cpu, tidle);
+ if (ret)
+ goto out;
+
+ ret = wait_cpu_initialized(cpu);
+ if (ret)
+ goto out;
+
+ wait_cpu_callin(cpu);
+ wait_cpu_online(cpu);
+
+out:
+ /* Cleanup possible dangling ends... */
+ if (x86_platform.legacy.warm_reset)
+ smpboot_restore_warm_reset_vector();
+
+ return ret;
}
/**
Powered by blists - more mailing lists