lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 17 May 2010 10:14:45 +0800
From:	"Du, Alek" <alek.du@...el.com>
To:	Thomas Gleixner <tglx@...utronix.de>
CC:	Jacob Pan <jacob.jun.pan@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>, Ingo Molnar <mingo@...e.hu>,
	Arjan van de Ven <arjan@...ux.intel.com>,
	"Tang, Feng" <feng.tang@...el.com>,
	LKML <linux-kernel@...r.kernel.org>,
	"Pan, Jacob jun" <jacob.jun.pan@...el.com>
Subject: Re: [PATCH 4/8] x86/mrst: change clock selection logic to support
 medfield

Hi tglx,

Please help to review this patch, it is against the latest patches Jacob sent out:
Basically the idea is to put bus_ratio and fsb in cpuinfo_x86 structure, and the
CPU early_init_intel function will fill the info.

>From 5ae648b2f18778df4eb3f1916a98971332482544 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@...ux.intel.com>
Date: Fri, 14 May 2010 13:45:46 -0700
Subject: [PATCH 1/2] x86/mrst: Auto detect freq for local timers

Some Intel CPUs can directly get fsb frequency and bus ratio from
various MSRs. This patch enables this feature and benefit Medfield
platform.

Signed-off-by: Alek Du <alek.du@...el.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@...ux.intel.com>
---
 arch/x86/include/asm/processor.h |    3 +++
 arch/x86/kernel/cpu/intel.c      |   34 ++++++++++++++++++++++++++++++++++
 arch/x86/kernel/mrst.c           |   32 ++++++++++++++++++++++++--------
 3 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 32428b4..f72107f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -94,6 +94,9 @@ struct cpuinfo_x86 {
 	int			x86_cache_alignment;	/* In bytes */
 	int			x86_power;
 	unsigned long		loops_per_jiffy;
+	/* support TSC and LAPIC non-calibartion way */
+	__u32			bus_ratio;
+	__u32			fsb;	/* In khz */
 #ifdef CONFIG_SMP
 	/* cpus sharing the last level cache: */
 	cpumask_var_t		llc_shared_map;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 85f69cd..f620abc 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -27,6 +27,38 @@
 #include <asm/apic.h>
 #endif
 
+/* MSR_FSB_FREQ in Khz */
+const u32 fsb[] = {266667, 133333, 200000, 166667, 333333, 100000, 400000, 0};
+/* detect Intel cpus that can do TSC/LAPIC non-calibration way */
+static void __cpuinit intel_tsc_fsb(struct cpuinfo_x86 *c)
+{
+	u32 lo, hi;
+
+	if (c->x86 != 6)
+		return;
+	if (c->x86_model != 0xf &&  /* core 2 duo */
+	    c->x86_model != 0x17 && /* core 2 extreme */
+	    c->x86_model != 0x1c && /* atom */
+	    c->x86_model != 0x26 && /* lincroft */
+	    c->x86_model != 0x27) /* penwell */
+		return;
+	rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+	if (lo >> 31)
+		c->bus_ratio = (hi >> 8) & 0x1f;
+	else {
+		rdmsr(MSR_IA32_PLATFORM_ID, lo, hi);
+		c->bus_ratio = (lo >> 8) & 0x1f;
+	}
+	c->fsb = fsb[lo & 0x7];
+	if (c->x86_model == 0x27) { /* penwell special */
+		rdmsr(MSR_FSB_FREQ, lo, hi);
+		if ((lo & 0x7) == 0x7)
+			c->fsb = 83200;
+		else c->fsb = 99840;
+	}
+	printk(KERN_INFO "Detect CPU bus ratio %d, fsb %d khz\n", c->bus_ratio, c->fsb);
+}
+
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 	/* Unmask CPUID levels if masked: */
@@ -46,6 +78,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 		(c->x86 == 0x6 && c->x86_model >= 0x0e))
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 
+	intel_tsc_fsb(c);
+
 	/*
 	 * Atom erratum AAE44/AAF40/AAG38/AAH41:
 	 *
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index 9b62d32..c553d10 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -209,14 +209,31 @@ static unsigned long __init mrst_calibrate_tsc(void)
 {
 	unsigned long flags, fast_calibrate;
 
-	local_irq_save(flags);
-	fast_calibrate = apbt_quick_calibrate();
-	local_irq_restore(flags);
+	if (mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) {
+		fast_calibrate = boot_cpu_data.bus_ratio * boot_cpu_data.fsb;
+		pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
+		lapic_timer_frequency = boot_cpu_data.fsb * 1000 / HZ;
+		/* mark tsc clocksource as reliable */
+		set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
+	} else {
+		/**
+		 * TODO: calibrate lapic timer with apbt, if we use apbt only,
+		 * there is no need to calibrate lapic timer, since they are
+		 * not used.
+		 * if we use lapic timers and apbt, the default calibration
+		 * should work, since we have the global clockevent setup.
+		 * but it would be more efficient if we combine the lapic timer
+		 * with tsc calibration.
+		 */
+		local_irq_save(flags);
+		fast_calibrate = apbt_quick_calibrate();
+		local_irq_restore(flags);
+	}
 
-	if (fast_calibrate)
-		return fast_calibrate;
+	pr_info("tsc lapic calibration results %lu %d\n",
+			fast_calibrate, lapic_timer_frequency);
 
-	return 0;
+	return fast_calibrate;
 }
 
 void __init mrst_time_init(void)
@@ -271,8 +288,7 @@ static void __init mrst_setup_boot_clock(void)
 int mrst_identify_cpu(void)
 {
 	if (boot_cpu_data.x86 == 6 &&
-		boot_cpu_data.x86_model == 0x27 &&
-		boot_cpu_data.x86_mask == 1)
+		boot_cpu_data.x86_model == 0x27)
 		mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
 	else if (boot_cpu_data.x86 == 6 &&
 		boot_cpu_data.x86_model == 0x26)
-- 
1.7.0.4


On Tue, 11 May 2010 22:36:39 +0800
Thomas Gleixner <tglx@...utronix.de> wrote:

> On Fri, 7 May 2010, Jacob Pan wrote:
> 
> > From: Jacob Pan <jacob.jun.pan@...el.com>
> > 
> > Penwell has added always on lapic timers and tsc, we want to treat
> > it as a variant of moorestown so that one binary kernel can boot on both.
> > this patch added clock selction logic so that platform code can set up the
> > optimal configuration for both moorestown and medfield.
> > 
> > This patch will also mark Penwell TSC reliable, thus no need for
> > watchdog clocksource to monitor it.
> > 
> > Signed-off-by: Alek Du <alek.du@...el.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@...el.com>
> > Signed-off-by: Jacob Pan <jacob.jun.pan@...ux.intel.com>
> > ---
> >  arch/x86/include/asm/mrst.h |   30 +++++++++++
> >  arch/x86/kernel/mrst.c      |  119 ++++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 137 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/mrst.h b/arch/x86/include/asm/mrst.h
> > index 451d30e..3054407 100644
> > --- a/arch/x86/include/asm/mrst.h
> > +++ b/arch/x86/include/asm/mrst.h
> > @@ -11,7 +11,37 @@
> >  #ifndef _ASM_X86_MRST_H
> >  #define _ASM_X86_MRST_H
> >  extern int pci_mrst_init(void);
> > +extern unsigned int calibration_result;
> 
> Yuck, why is this in a mrst specific header ?
> 
> > +
> > +#define MRST_TIMER_DEFAULT	0
> > +#define MRST_TIMER_APBT_ONLY	1
> > +#define MRST_TIMER_LAPIC_APBT	2
> 
> enum please, also 
> 
> > +/**
> > + * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
> > + * cmdline option x86_mrst_timer can be used to override the configuration
> > + * to prefer one or the other.
> > + * at runtime, there are basically three timer configurations:
> > + * 1. per cpu apbt clock only
> > + * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
> > + * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
> > + *
> > + * by default (without cmdline option), platform code first detects cpu type
> > + * to see if we are on lincroft or penwell, then set up both lapic or apbt
> > + * clocks accordingly.
> > + * i.e. by default, medfield uses configuration #2, moorestown uses #1.
> > + * config #3 is supported but not recommended on medfield.
> > + *
> > + * rating and feature summary:
> > + * lapic (with C3STOP) --------- 100
> > + * apbt (always-on) ------------ 110
> 
>  apbt sucks performance wise, so why do you consider it a better
>  choice than lapic + broadcast ?
> 
> > + * lapic (always-on,ARAT) ------ 150
> > + */
> > +
> > +int mrst_timer_options __cpuinitdata;
> > +
> >  static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
> >  static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
> > +static u32 mrst_cpu_chip;
> >  int sfi_mtimer_num;
> >  
> >  struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
> > @@ -167,15 +191,16 @@ int __init sfi_parse_mrtc(struct sfi_table_header *table)
> >  	return 0;
> >  }
> >  
> > -/*
> > - * the secondary clock in Moorestown can be APBT or LAPIC clock, default to
> > - * APBT but cmdline option can also override it.
> > - */
> >  static void __cpuinit mrst_setup_secondary_clock(void)
> >  {
> > -	/* restore default lapic clock if disabled by cmdline */
> > -	if (disable_apbt_percpu)
> > -		return setup_secondary_APIC_clock();
> > +	if ((mrst_timer_options == MRST_TIMER_APBT_ONLY))
> > +		return apbt_setup_secondary_clock();
> > +	if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)
> > +		||  (mrst_timer_options == MRST_TIMER_LAPIC_APBT)) {
> > +		pr_info("using lapic timers for secondary clock\n");
> > +		setup_secondary_APIC_clock();
> > +		return;
> 
>   The logic is confusing. 
> 
> > +	}
> >  	apbt_setup_secondary_clock();
> >  }
> >  
> > @@ -183,9 +208,45 @@ static unsigned long __init mrst_calibrate_tsc(void)
> >  {
> >  	unsigned long flags, fast_calibrate;
> >  
> > -	local_irq_save(flags);
> > -	fast_calibrate = apbt_quick_calibrate();
> > -	local_irq_restore(flags);
> > +	if (mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) {
> > +		u32 lo, hi, ratio, fsb;
> > +
> > +		rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
> > +		pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
> > +		ratio = (hi >> 8) & 0x1f;
> > +		pr_debug("ratio is %d\n", ratio);
> > +		if (!ratio) {
> > +			pr_err("read a zero ratio, should be incorrect!\n");
> > +			pr_err("force tsc ratio to 16 ...\n");
> > +			ratio = 16;
> > +		}
> 
>  This is not Penwell specific at all. The ratio can be read out on all
>  Core based CPUs either from MSR_PLATFORM_ID[12:8] or
>  MSR_PERF_STAT[44:40] depending on XE operation enabled
>  (MSR_PERF_STAT[31] == 1)
> 
>  This should be made general available and not burried into the mrst
>  code.
> 
> > +		rdmsr(MSR_FSB_FREQ, lo, hi);
> > +		if ((lo & 0x7) == 0x7)
> > +			fsb = PENWELL_FSB_FREQ_83SKU;
> > +		else
> > +			fsb = PENWELL_FSB_FREQ_100SKU;
> 
>  I guess the 111 is Penwell/MRST specific, right ?
> 
>  According to SDM we have anyway different results for the various CPU
>  families, but we should utilize this in a generic way and have the
>  translation tables for the various CPUs in one place.
> 
> > +		fast_calibrate = ratio * fsb;
> > +		pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
> > +		calibration_result = fsb * 1000 / HZ;
> > +		/* mark tsc clocksource as reliable */
> > +		set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE);
> > +	} else {
> > +		/**
> > +		 * TODO: calibrate lapic timer with apbt, if we use apbt only,
> > +		 * there is no need to calibrate lapic timer, since they are
> > +		 * not used.
> > +		 * if we use lapic timers and apbt, the default calibration
> > +		 * should work, since we have the global clockevent setup.
> > +		 * but it would be more efficient if we combine the lapic timer
> > +		 * with tsc calibration.
> > +		 */
> > +		local_irq_save(flags);
> > +		fast_calibrate = apbt_quick_calibrate();
> > +		local_irq_restore(flags);
> > +	}
> > +
> > +	pr_info("tsc lapic calibration results %lu %d\n",
> > +			fast_calibrate, calibration_result);
> >  
> >  	if (fast_calibrate)
> >  		return fast_calibrate;
> > @@ -195,6 +256,11 @@ static unsigned long __init mrst_calibrate_tsc(void)
> >  
> >  void __init mrst_time_init(void)
> >  {
> > +	/* if cpu is penwell, lapic timer will be used by default */
> > +	if ((mrst_cpu_chip == MRST_CPU_CHIP_PENWELL) &&
> > +		(mrst_timer_options == MRST_TIMER_DEFAULT))
> > +		return;
> > +
> >  	sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
> >  	pre_init_apic_IRQ0();
> >  	apbt_time_init();
> > @@ -211,11 +277,38 @@ void __init mrst_rtc_init(void)
> >   */
> >  static void __init mrst_setup_boot_clock(void)
> >  {
> > -	pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu);
> > -	if (disable_apbt_percpu)
> > +	if (mrst_timer_options == MRST_TIMER_APBT_ONLY)
> > +		return;
> > +	if ((mrst_timer_options == MRST_TIMER_LAPIC_APBT)
> > +		|| (mrst_cpu_chip == MRST_CPU_CHIP_PENWELL))
> >  		setup_boot_APIC_clock();
> >  };
> >  
> > +enum cpuid_regs {
> > +	CR_EAX = 0,
> > +	CR_ECX,
> > +	CR_EDX,
> > +	CR_EBX
> > +};
> > +
> > +int mrst_identify_cpu(void)
> > +{
> > +	u32 regs[4];
> > +
> > +	cpuid(1, &regs[CR_EAX], &regs[CR_EBX], &regs[CR_ECX], &regs[CR_EDX]);
> 
>   Yikes. From which Intel cookbook is this ?
> 
>   Aside of that we have that info in boot_cpu_info already, don't we ?
>   So there is neither a requirement for the extra cpuid call nor for
>   the extra mrst_cpu_chip id magic.
> 
> > +	if ((regs[CR_EAX] & PENWELL_FAMILY) == PENWELL_FAMILY)
> > +		mrst_cpu_chip = MRST_CPU_CHIP_PENWELL;
> > +	else
> > +		mrst_cpu_chip = MRST_CPU_CHIP_LINCROFT;
> 
> 
> > +	pr_debug("cpuid result %x\n", regs[CR_EAX]);
> > +	pr_info("Moorestown CPU %s identified\n",
> > +		(mrst_cpu_chip == MRST_CPU_CHIP_LINCROFT) ?
> > +		"Lincroft" : "Penwell");
> 
>   Are we going to add one of those for each new family ? This is
>   really redundant bloat with no value.
> 
> > +	return mrst_cpu_chip;
> > +}
> > +
> >  /*
> >   * Moorestown specific x86_init function overrides and early setup
> >   * calls.
> > @@ -237,4 +330,6 @@ void __init x86_mrst_early_setup(void)
> >  	x86_init.pci.fixup_irqs = x86_init_noop;
> >  
> >  	legacy_pic = &null_legacy_pic;
> > +
> > +	mrst_identify_cpu();
> >  }
> > -- 
> > 1.6.3.3
> > 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ