lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sat, 6 Aug 2011 09:39:05 -0500
From:	Jack Steiner <steiner@....com>
To:	Yinghai Lu <yhlu.kernel@...il.com>
Cc:	Robin Holt <holt@....com>, Ingo Molnar <mingo@...e.hu>,
	tglx@...utronix.de, davej@...hat.com, yinghan@...gle.com,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2] x86: Reduce clock calibration time during slave cpu
	startup

On Fri, Aug 05, 2011 at 05:21:39PM -0700, Yinghai Lu wrote:
> On Fri, Aug 5, 2011 at 6:16 AM, Jack Steiner <steiner@....com> wrote:
> > Aside from the bogus comment (I'll send a V3 with a fixed comment), does the patch
> > look ok.
> 
> Several months ago, Robin said that he will test updated version
> 
> [PATCH] x86: Make calibrate_delay run in parallel.
> 
> so any reason that you sgi guyes stop that path?

I can take another look at Robin's patch. However, I though the one
I posted was simpler & less likely to cause unexpected behavior.

I'll look in more detail on Monday.....



> 
> Please check attached patch for updated version to current tip.
> 
> Thanks
> 
> Yinghai Lu

> [PATCH -v4] x86: Make calibrate_delay run in parallel.
> 
> On a 4096 cpu machine, we noticed that 318 seconds were taken for bringing
> up the cpus.  By specifying lpj=<value>, we reduced that to 75 seconds.
> Andi Kleen suggested we rework the calibrate_delay calls to run in
> parallel.
> 
> -v2: from Yinghai
>      two path: one for initial boot cpus. and one for hotplug cpus
> 	initial path:
> 	  after all cpu boot up, enter idle, use smp_call_function_many
> 	  let every ap call __calibrate_delay.
>           We can not put that calibrate_delay after local_irq_enable
>           in start_secondary(), at that time that cpu could be involed
> 	  with perf_event with nmi_watchdog enabling. that will cause
> 	  strange calibrating result.
>         hotplug path:
> 	  need to add cpu notify block.
>      add __calibrate_delay instead of changing calibrate_delay all over.
>      use cpu_calibrated_delay_mask instead.
>      use print_lpj to make print line complete.
> 
> Signed-off-by: Robin Holt <holt@....com>
> To: Andi Kleen <andi@...stfloor.org>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Ingo Molnar <mingo@...hat.com>
> Signed-off-by: Yinghai Lu <yinghai@...nel.org>
> 
> ---
>  arch/x86/include/asm/cpumask.h |    1 
>  arch/x86/kernel/cpu/common.c   |    3 ++
>  arch/x86/kernel/smpboot.c      |   58 ++++++++++++++++++++++++++++++++++-------
>  include/linux/delay.h          |    1 
>  init/calibrate.c               |   54 +++++++++++++++++++-------------------
>  5 files changed, 81 insertions(+), 36 deletions(-)
> 
> 
> --
> Index: linux-2.6/arch/x86/include/asm/cpumask.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/cpumask.h
> +++ linux-2.6/arch/x86/include/asm/cpumask.h
> @@ -6,6 +6,7 @@
>  extern cpumask_var_t cpu_callin_mask;
>  extern cpumask_var_t cpu_callout_mask;
>  extern cpumask_var_t cpu_initialized_mask;
> +extern cpumask_var_t cpu_calibrated_delay_mask;
>  extern cpumask_var_t cpu_sibling_setup_mask;
>  
>  extern void setup_cpu_local_masks(void);
> Index: linux-2.6/arch/x86/kernel/cpu/common.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/cpu/common.c
> +++ linux-2.6/arch/x86/kernel/cpu/common.c
> @@ -45,6 +45,7 @@
>  cpumask_var_t cpu_initialized_mask;
>  cpumask_var_t cpu_callout_mask;
>  cpumask_var_t cpu_callin_mask;
> +cpumask_var_t cpu_calibrated_delay_mask;
>  
>  /* representing cpus for which sibling maps can be computed */
>  cpumask_var_t cpu_sibling_setup_mask;
> @@ -58,6 +59,8 @@ void __init setup_cpu_local_masks(void)
>  	alloc_bootmem_cpumask_var(&cpu_callin_mask);
>  	set_bootmem_name("cpu_callout_mask");
>  	alloc_bootmem_cpumask_var(&cpu_callout_mask);
> +	set_bootmem_name("cpu_calibrated_delay_mask");
> +	alloc_bootmem_cpumask_var(&cpu_calibrated_delay_mask);
>  	set_bootmem_name("cpu_sibling_setup_mask");
>  	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
>  }
> Index: linux-2.6/arch/x86/kernel/smpboot.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/smpboot.c
> +++ linux-2.6/arch/x86/kernel/smpboot.c
> @@ -52,6 +52,7 @@
>  #include <linux/gfp.h>
>  
>  #include <asm/acpi.h>
> +#include <asm/cpumask.h>
>  #include <asm/desc.h>
>  #include <asm/nmi.h>
>  #include <asm/irq.h>
> @@ -210,15 +211,7 @@ static void __cpuinit smp_callin(void)
>  	 * Need to setup vector mappings before we enable interrupts.
>  	 */
>  	setup_vector_irq(smp_processor_id());
> -	/*
> -	 * Get our bogomips.
> -	 *
> -	 * Need to enable IRQs because it can take longer and then
> -	 * the NMI watchdog might kill us.
> -	 */
> -	local_irq_enable();
> -	calibrate_delay();
> -	local_irq_disable();
> +
>  	pr_debug("Stack at about %p\n", &cpuid);
>  
>  	/*
> @@ -1038,6 +1031,8 @@ void __init native_smp_prepare_cpus(unsi
>  	}
>  	set_cpu_sibling_map(0);
>  
> +	/* already called earlier for boot cpu */
> +	cpumask_set_cpu(0, cpu_calibrated_delay_mask);
>  
>  	if (smp_sanity_check(max_cpus) < 0) {
>  		printk(KERN_INFO "SMP disabled\n");
> @@ -1126,8 +1121,53 @@ void __init native_smp_prepare_boot_cpu(
>  	per_cpu(cpu_state, me) = CPU_ONLINE;
>  }
>  
> +static void __cpuinit calibrate_delay_fn(void *info)
> +{
> +	int cpu = smp_processor_id();
> +
> +	cpu_data(cpu).loops_per_jiffy = __calibrate_delay(cpu);
> +	cpumask_set_cpu(cpu, cpu_calibrated_delay_mask);
> +}
> +
> +#ifdef CONFIG_HOTPLUG_CPU
> +static int __cpuinit
> +cal_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
> +{
> +	int cpu = (unsigned long)hcpu;
> +
> +	switch (action) {
> +	case CPU_ONLINE:
> +	case CPU_ONLINE_FROZEN:
> +		smp_call_function_single(cpu, calibrate_delay_fn, NULL, 1);
> +		break;
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +static __cpuinitdata struct notifier_block __cpuinitdata cal_cpu_nfb = {
> +	.notifier_call = cal_cpu_callback
> +};
> +
> +static void __init register_cal_cpu_nfb(void)
> +{
> +	register_cpu_notifier(&cal_cpu_nfb);
> +}
> +#else
> +static void __init register_cal_cpu_nfb(void)
> +{
> +}
> +#endif
> +
>  void __init native_smp_cpus_done(unsigned int max_cpus)
>  {
> +	smp_call_function_many(cpu_online_mask, calibrate_delay_fn, NULL, 0);
> +	while (cpumask_weight(cpu_calibrated_delay_mask) != num_online_cpus()) {
> +		cpu_relax();
> +		touch_nmi_watchdog();
> +	}
> +	register_cal_cpu_nfb();
> +
>  	pr_debug("Boot done.\n");
>  
>  	impress_friends();
> Index: linux-2.6/include/linux/delay.h
> ===================================================================
> --- linux-2.6.orig/include/linux/delay.h
> +++ linux-2.6/include/linux/delay.h
> @@ -43,6 +43,7 @@ static inline void ndelay(unsigned long
>  
>  extern unsigned long lpj_fine;
>  void calibrate_delay(void);
> +unsigned long __calibrate_delay(int cpu);
>  void msleep(unsigned int msecs);
>  unsigned long msleep_interruptible(unsigned int msecs);
>  void usleep_range(unsigned long min, unsigned long max);
> Index: linux-2.6/init/calibrate.c
> ===================================================================
> --- linux-2.6.orig/init/calibrate.c
> +++ linux-2.6/init/calibrate.c
> @@ -31,7 +31,7 @@ __setup("lpj=", lpj_setup);
>  #define DELAY_CALIBRATION_TICKS			((HZ < 100) ? 1 : (HZ/100))
>  #define MAX_DIRECT_CALIBRATION_RETRIES		5
>  
> -static unsigned long __cpuinit calibrate_delay_direct(void)
> +static unsigned long __cpuinit calibrate_delay_direct(int cpu)
>  {
>  	unsigned long pre_start, start, post_start;
>  	unsigned long pre_end, end, post_end;
> @@ -134,15 +134,9 @@ static unsigned long __cpuinit calibrate
>  		good_timer_count = 0;
>  		if ((measured_times[max] - estimate) <
>  				(estimate - measured_times[min])) {
> -			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
> -					"min bogoMips estimate %d = %lu\n",
> -				min, measured_times[min]);
>  			measured_times[min] = 0;
>  			min = max;
>  		} else {
> -			printk(KERN_NOTICE "calibrate_delay_direct() dropping "
> -					"max bogoMips estimate %d = %lu\n",
> -				max, measured_times[max]);
>  			measured_times[max] = 0;
>  			max = min;
>  		}
> @@ -160,9 +154,9 @@ static unsigned long __cpuinit calibrate
>  
>  	}
>  
> -	printk(KERN_NOTICE "calibrate_delay_direct() failed to get a good "
> +	printk(KERN_NOTICE "CPU%d: calibrate_delay_direct() failed to get a good "
>  	       "estimate for loops_per_jiffy.\nProbably due to long platform "
> -		"interrupts. Consider using \"lpj=\" boot option.\n");
> +		"interrupts. Consider using \"lpj=\" boot option.\n", cpu);
>  	return 0;
>  }
>  #else
> @@ -246,32 +240,38 @@ recalibrate:
>  
>  static DEFINE_PER_CPU(unsigned long, cpu_loops_per_jiffy) = { 0 };
>  
> -void __cpuinit calibrate_delay(void)
> +static void __cpuinit print_lpj(int cpu, char *str, unsigned long lpj)
> +{
> +       pr_info("CPU%d: Calibrating delay%s"
> +               "%lu.%02lu BogoMIPS (lpj=%lu)\n", cpu, str,
> +               lpj/(500000/HZ), (lpj/(5000/HZ)) % 100, lpj);
> +}
> +
> +unsigned long __cpuinit __calibrate_delay(int cpu)
>  {
>  	unsigned long lpj;
> -	int this_cpu = smp_processor_id();
>  
> -	if (per_cpu(cpu_loops_per_jiffy, this_cpu)) {
> -		lpj = per_cpu(cpu_loops_per_jiffy, this_cpu);
> -		pr_info("Calibrating delay loop (skipped) "
> -				"already calibrated this CPU");
> +	if (per_cpu(cpu_loops_per_jiffy, cpu)) {
> +		lpj = per_cpu(cpu_loops_per_jiffy, cpu);
> +		print_lpj(cpu, " (skipped) already calibrated this CPU ", lpj);
>  	} else if (preset_lpj) {
>  		lpj = preset_lpj;
> -		pr_info("Calibrating delay loop (skipped) preset value.. ");
> -	} else if ((this_cpu == 0) && lpj_fine) {
> +		print_lpj(cpu, " (skipped) preset value.. ", lpj);
> +	} else if ((cpu == 0) && lpj_fine) {
>  		lpj = lpj_fine;
> -		pr_info("Calibrating delay loop (skipped), "
> -			"value calculated using timer frequency.. ");
> -	} else if ((lpj = calibrate_delay_direct()) != 0) {
> -		pr_info("Calibrating delay using timer specific routine.. ");
> +		print_lpj(cpu, " loop (skipped), value calculated using timer frequency.. ", lpj);
> +	} else if ((lpj = calibrate_delay_direct(cpu)) != 0) {
> +		print_lpj(cpu, " using timer specific routine.. ", lpj);
>  	} else {
> -		pr_info("Calibrating delay loop... ");
>  		lpj = calibrate_delay_converge();
> +		print_lpj(cpu, " delay loop... ", lpj);
>  	}
> -	per_cpu(cpu_loops_per_jiffy, this_cpu) = lpj;
> -	pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
> -			lpj/(500000/HZ),
> -			(lpj/(5000/HZ)) % 100, lpj);
> +	per_cpu(cpu_loops_per_jiffy, cpu) = lpj;
>  
> -	loops_per_jiffy = lpj;
> +	return lpj;
> +}
> +
> +void __cpuinit calibrate_delay(void)
> +{
> +	loops_per_jiffy = __calibrate_delay(smp_processor_id());
>  }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ