lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230208100336.mdnrvpsssa2sjodu@yy-desk-7060>
Date:   Wed, 8 Feb 2023 18:03:36 +0800
From:   Yuan Yao <yuan.yao@...ux.intel.com>
To:     Usama Arif <usama.arif@...edance.com>
Cc:     dwmw2@...radead.org, tglx@...utronix.de, kim.phillips@....com,
        arjan@...ux.intel.com, mingo@...hat.com, bp@...en8.de,
        dave.hansen@...ux.intel.com, hpa@...or.com, x86@...nel.org,
        pbonzini@...hat.com, paulmck@...nel.org,
        linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        rcu@...r.kernel.org, mimoja@...oja.de, hewenliang4@...wei.com,
        thomas.lendacky@....com, seanjc@...gle.com, pmenzel@...gen.mpg.de,
        fam.zheng@...edance.com, punit.agrawal@...edance.com,
        simon.evans@...edance.com, liangma@...ngbit.com,
        David Woodhouse <dwmw@...zon.co.uk>
Subject: Re: [PATCH v7 5/9] x86/smpboot: Split up native_cpu_up into separate
 phases and document them

On Tue, Feb 07, 2023 at 11:04:32PM +0000, Usama Arif wrote:
> From: David Woodhouse <dwmw@...zon.co.uk>
>
> There are four logical parts to what native_cpu_up() does on the BSP (or
> on the controlling CPU for a later hotplug):
>
>  1) Wake the AP by sending the INIT/SIPI/SIPI sequence.
>
>  2) Wait for the AP to make it as far as wait_for_master_cpu() which
>     sets that CPU's bit in cpu_initialized_mask, then sets the bit in
>     cpu_callout_mask to let the AP proceed through cpu_init().
>
>  3) Waits for the AP to finish cpu_init() and get as far as the
>     smp_callin() call, which sets that CPU's bit in cpu_callin_mask.
>
>  4) Perform the TSC synchronization and wait for the AP to actually
>     mark itself online in cpu_online_mask.
>
> In preparation to allow these phases to operate in parallel on multiple
> APs, split them out into separate functions and document the interactions
> a little more clearly in both the BSP and AP code paths.
>
> No functional change intended.
>
> [Usama Arif: fixed rebase conflict]
> Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
> Signed-off-by: Usama Arif <usama.arif@...edance.com>
> ---
>  arch/x86/kernel/smpboot.c | 182 +++++++++++++++++++++++++++-----------
>  1 file changed, 128 insertions(+), 54 deletions(-)
>
> diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
> index 3a793772a2aa..eee717086ab7 100644
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -204,6 +204,10 @@ static void smp_callin(void)
>
>  	wmb();
>
> +	/*
> +	 * This runs the AP through all the cpuhp states to its target
> +	 * state (CPUHP_ONLINE in the case of serial bringup).
> +	 */
>  	notify_cpu_starting(cpuid);
>
>  	/*
> @@ -231,17 +235,33 @@ static void notrace start_secondary(void *unused)
>  	load_cr3(swapper_pg_dir);
>  	__flush_tlb_all();
>  #endif
> +	/*
> +	 * Sync point with do_wait_cpu_initialized(). On boot, all secondary
> +	 * CPUs reach this stage after receiving INIT/SIPI from do_cpu_up()
> +	 * in the x86/cpu:kick cpuhp stage. At the start of cpu_init() they
> +	 * will wait for do_wait_cpu_initialized() to set their bit in
> +	 * smp_callout_mask to release them.

The last sentence of the comment looks confused. The fact is:

For serial case, The BSP waits AP to set cpu_initialized_mask from
wait_for_master_cpu() after fired INIT/SIPI, then AP starts to wait
cpu_callout_mask set by BSP from do_boot_cpu().

Or the comments below "Bringup step two:..." which also looks clear
enough then above.

> +	 */
>  	cpu_init_secondary();
>  	rcu_cpu_starting(raw_smp_processor_id());
>  	x86_cpuinit.early_percpu_clock_init();
> +
> +	/*
> +	 * Sync point with do_wait_cpu_callin(). The AP doesn't wait here
> +	 * but just sets the bit to let the controlling CPU (BSP) know that
> +	 * it's got this far.
> +	 */
>  	smp_callin();
>
>  	enable_start_cpu0 = 0;
>
>  	/* otherwise gcc will move up smp_processor_id before the cpu_init */
>  	barrier();
> +
>  	/*
> -	 * Check TSC synchronization with the boot CPU:
> +	 * Check TSC synchronization with the boot CPU (or whichever CPU
> +	 * is controlling the bringup). It will do its part of this from
> +	 * do_wait_cpu_online(), making it an implicit sync point.
>  	 */
>  	check_tsc_sync_target();
>
> @@ -254,6 +274,7 @@ static void notrace start_secondary(void *unused)
>  	 * half valid vector space.
>  	 */
>  	lock_vector_lock();
> +	/* Sync point with do_wait_cpu_online() */
>  	set_cpu_online(smp_processor_id(), true);
>  	lapic_online();
>  	unlock_vector_lock();
> @@ -1083,7 +1104,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  	unsigned long start_ip = real_mode_header->trampoline_start;
>
>  	unsigned long boot_error = 0;
> -	unsigned long timeout;
>
>  #ifdef CONFIG_X86_64
>  	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
> @@ -1144,55 +1164,94 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
>  		boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
>  						     cpu0_nmi_registered);
>
> -	if (!boot_error) {
> -		/*
> -		 * Wait 10s total for first sign of life from AP
> -		 */
> -		boot_error = -1;
> -		timeout = jiffies + 10*HZ;
> -		while (time_before(jiffies, timeout)) {
> -			if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
> -				/*
> -				 * Tell AP to proceed with initialization
> -				 */
> -				cpumask_set_cpu(cpu, cpu_callout_mask);
> -				boot_error = 0;
> -				break;
> -			}
> -			schedule();
> -		}
> -	}
> +	return boot_error;
> +}
>
> -	if (!boot_error) {
> -		/*
> -		 * Wait till AP completes initial initialization
> -		 */
> -		while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
> -			/*
> -			 * Allow other tasks to run while we wait for the
> -			 * AP to come online. This also gives a chance
> -			 * for the MTRR work(triggered by the AP coming online)
> -			 * to be completed in the stop machine context.
> -			 */
> -			schedule();
> -		}
> +static int do_wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
> +{
> +	unsigned long timeout;
> +
> +	/*
> +	 * Wait up to 10s for the CPU to report in.
> +	 */
> +	timeout = jiffies + 10*HZ;
> +	while (time_before(jiffies, timeout)) {
> +		if (cpumask_test_cpu(cpu, mask))
> +			return 0;
> +
> +		schedule();
>  	}
> +	return -1;
> +}
>
> -	if (x86_platform.legacy.warm_reset) {
> -		/*
> -		 * Cleanup possible dangling ends...
> -		 */
> -		smpboot_restore_warm_reset_vector();
> +/*
> + * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
> + * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
> + * to proceed.  The AP will then proceed past setting its 'callin' bit
> + * and end up waiting in check_tsc_sync_target() until we reach
> + * do_wait_cpu_online() to tend to it.
> + */
> +static int do_wait_cpu_initialized(unsigned int cpu)
> +{
> +	/*
> +	 * Wait for first sign of life from AP.
> +	 */
> +	if (do_wait_cpu_cpumask(cpu, cpu_initialized_mask))
> +		return -1;
> +
> +	cpumask_set_cpu(cpu, cpu_callout_mask);
> +	return 0;
> +}
> +
> +/*
> + * Bringup step three: Wait for the target AP to reach smp_callin().
> + * The AP is not waiting for us here so we don't need to parallelise
> + * this step. Not entirely clear why we care about this, since we just
> + * proceed directly to TSC synchronization which is the next sync
> + * point with the AP anyway.
> + */
> +static int do_wait_cpu_callin(unsigned int cpu)
> +{
> +	/*
> +	 * Wait till AP completes initial initialization.
> +	 */
> +	return do_wait_cpu_cpumask(cpu, cpu_callin_mask);
> +}
> +
> +/*
> + * Bringup step four: Synchronize the TSC and wait for the target AP
> + * to reach set_cpu_online() in start_secondary().
> + */
> +static int do_wait_cpu_online(unsigned int cpu)
> +{
> +	unsigned long flags;
> +
> +	/*
> +	 * Check TSC synchronization with the AP (keep irqs disabled
> +	 * while doing so):
> +	 */
> +	local_irq_save(flags);
> +	check_tsc_sync_source(cpu);
> +	local_irq_restore(flags);
> +
> +	/*
> +	 * Wait for the AP to mark itself online. Not entirely
> +	 * clear why we care, since the generic cpuhp code will
> +	 * wait for it to each CPUHP_AP_ONLINE_IDLE before going
> +	 * ahead with the rest of the bringup anyway.
> +	 */
> +	while (!cpu_online(cpu)) {
> +		cpu_relax();
> +		touch_nmi_watchdog();
>  	}
>
> -	return boot_error;
> +	return 0;
>  }
>
> -int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
> +static int do_cpu_up(unsigned int cpu, struct task_struct *tidle)
>  {
>  	int apicid = apic->cpu_present_to_apicid(cpu);
>  	int cpu0_nmi_registered = 0;
> -	unsigned long flags;
>  	int err, ret = 0;
>
>  	lockdep_assert_irqs_enabled();
> @@ -1239,19 +1298,6 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
>  		goto unreg_nmi;
>  	}
>
> -	/*
> -	 * Check TSC synchronization with the AP (keep irqs disabled
> -	 * while doing so):
> -	 */
> -	local_irq_save(flags);
> -	check_tsc_sync_source(cpu);
> -	local_irq_restore(flags);
> -
> -	while (!cpu_online(cpu)) {
> -		cpu_relax();
> -		touch_nmi_watchdog();
> -	}
> -
>  unreg_nmi:
>  	/*
>  	 * Clean up the nmi handler. Do this after the callin and callout sync
> @@ -1263,6 +1309,34 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
>  	return ret;
>  }
>
> +int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
> +{
> +	int ret;
> +
> +	ret = do_cpu_up(cpu, tidle);
> +	if (ret)
> +		return ret;
> +
> +	ret = do_wait_cpu_initialized(cpu);
> +	if (ret)
> +		return ret;
> +
> +	ret = do_wait_cpu_callin(cpu);
> +	if (ret)
> +		return ret;
> +
> +	ret = do_wait_cpu_online(cpu);
> +
> +	if (x86_platform.legacy.warm_reset) {
> +		/*
> +		 * Cleanup possible dangling ends...
> +		 */
> +		smpboot_restore_warm_reset_vector();
> +	}
> +
> +	return ret;
> +}
> +
>  /**
>   * arch_disable_smp_support() - disables SMP support for x86 at runtime
>   */
> --
> 2.25.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ