lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <20071207135525.GA5588@elte.hu> Date: Fri, 7 Dec 2007 14:55:25 +0100 From: Ingo Molnar <mingo@...e.hu> To: Guillaume Chazarain <guichaz@...oo.fr> Cc: Thomas Gleixner <tglx@...utronix.de>, Stefano Brivio <stefano.brivio@...imi.it>, Robert Love <rml@...h9.net>, linux-kernel@...r.kernel.org, Dave Jones <davej@...hat.com>, "Rafael J. Wysocki" <rjw@...k.pl>, Michael Buesch <mb@...sch.de>, "Andrew Morton"@pimp.vs19.net Subject: [patch] x86: scale cyc_2_nsec according to CPU frequency * Guillaume Chazarain <guichaz@...oo.fr> wrote: > > > Hmrpf. sched_clock() is used for the time stamp of the printks. We > > > need to find some better solution other than killing off the tsc > > > access completely. > > > > Something like http://lkml.org/lkml/2007/3/16/291 that would need > > some refresh? > > And here is a refreshed one just for testing with 2.6-git. The 64 bit > part is a shamelessly untested copy/paste as I cannot test it. Guillaume, i've updated your patch with a handful of changes - see the result below. Firstly, we dont need the 'offset' anymore because cpu_clock() maintains offsets itself. This simplifies the math and speeds up the sched_clock() common case. Secondly, with PER_CPU variables we need to update them for all possible CPUs - otherwise they might end up with a zero scaling factor which is not good. (not all CPUs are cpufreq capable) Thirdly, we can do a bit smarter and faster by using the fact that local_irq_disable() is preempt-safe - so we can use per_cpu() instead of get_cpu_var(). Ingo -----------------> Subject: x86: scale cyc_2_nsec according to CPU frequency From: "Guillaume Chazarain" <guichaz@...oo.fr> scale the sched_clock() cyc_2_nsec scaling factor according to CPU frequency changes. [ mingo@...e.hu: simplified it and fixed it for SMP. ] Signed-off-by: Ingo Molnar <mingo@...e.hu> Signed-off-by: Thomas Gleixner <tglx@...utronix.de> --- arch/x86/kernel/tsc_32.c | 41 +++++++++++++++++++++++++++----- arch/x86/kernel/tsc_64.c | 59 +++++++++++++++++++++++++++++++++++++++-------- include/asm-x86/timer.h | 23 ++++++++++++++---- 3 files changed, 102 insertions(+), 21 deletions(-) Index: linux-x86.q/arch/x86/kernel/tsc_32.c =================================================================== --- linux-x86.q.orig/arch/x86/kernel/tsc_32.c +++ linux-x86.q/arch/x86/kernel/tsc_32.c @@ -5,6 +5,7 @@ #include <linux/jiffies.h> #include <linux/init.h> #include <linux/dmi.h> +#include <linux/percpu.h> #include <asm/delay.h> #include <asm/tsc.h> @@ -78,15 +79,31 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable); * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. * (mathieu.desnoyers@...ymtl.ca) * + * ns += offset to avoid sched_clock jumps with cpufreq + * * -johnstul@...ibm.com "math is hard, lets go shopping!" */ -unsigned long cyc2ns_scale __read_mostly; -#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ +DEFINE_PER_CPU(unsigned long, cyc2ns); -static inline void set_cyc2ns_scale(unsigned long cpu_khz) +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; + unsigned long flags, prev_scale, *scale; + unsigned long long tsc_now, ns_now; + + local_irq_save(flags); + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + prev_scale = *scale; + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + printk("CPU#%d: changed cyc2ns scale from %ld to %ld\n", + cpu, prev_scale, *scale); + local_irq_restore(flags); } /* @@ -239,7 +256,9 @@ time_cpufreq_notifier(struct notifier_bl ref_freq, freq->new); if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { tsc_khz = cpu_khz; - set_cyc2ns_scale(cpu_khz); + preempt_disable(); + set_cyc2ns_scale(cpu_khz, smp_processor_id()); + preempt_enable(); /* * TSC based sched_clock turns * to junk w/ cpufreq @@ -367,6 +386,8 @@ static inline void check_geode_tsc_relia void __init tsc_init(void) { + int cpu; + if (!cpu_has_tsc || tsc_disable) goto out_no_tsc; @@ -380,7 +401,15 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); - set_cyc2ns_scale(cpu_khz); + /* + * Secondary CPUs do not run through tsc_init(), so set up + * all the scale factors for all CPUs, assuming the same + * speed as the bootup CPU. (cpufreq notifiers will fix this + * up if their speed diverges) + */ + for_each_possible_cpu(cpu) + set_cyc2ns_scale(cpu_khz, cpu); + use_tsc_delay(); /* Check and install the TSC clocksource */ Index: linux-x86.q/arch/x86/kernel/tsc_64.c =================================================================== --- linux-x86.q.orig/arch/x86/kernel/tsc_64.c +++ linux-x86.q/arch/x86/kernel/tsc_64.c @@ -10,6 +10,7 @@ #include <asm/hpet.h> #include <asm/timex.h> +#include <asm/timer.h> static int notsc __initdata = 0; @@ -18,16 +19,50 @@ EXPORT_SYMBOL(cpu_khz); unsigned int tsc_khz; EXPORT_SYMBOL(tsc_khz); -static unsigned int cyc2ns_scale __read_mostly; +/* Accelerators for sched_clock() + * convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) + * + * Then we use scaling math (suggested by george@...sta.com) to get: + * ns = cycles * (10^6 * SC / cpu_khz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better precision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@...ymtl.ca) + * + * ns += offset to avoid sched_clock jumps with cpufreq + * + * -johnstul@...ibm.com "math is hard, lets go shopping!" + */ +DEFINE_PER_CPU(unsigned long, cyc2ns); -static inline void set_cyc2ns_scale(unsigned long khz) +static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) { - cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz; -} + unsigned long flags, prev_scale, *scale; + unsigned long long tsc_now, ns_now; -static unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> NS_SCALE; + local_irq_save(flags); + scale = &per_cpu(cyc2ns, cpu); + + rdtscll(tsc_now); + ns_now = __cycles_2_ns(tsc_now); + + prev_scale = *scale; + if (cpu_khz) + *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + + printk("CPU#%d: changed cyc2ns scale from %ld to %ld\n", + cpu, prev_scale, *scale); + + local_irq_restore(flags); } unsigned long long sched_clock(void) @@ -100,7 +135,9 @@ static int time_cpufreq_notifier(struct mark_tsc_unstable("cpufreq changes"); } - set_cyc2ns_scale(tsc_khz_ref); + preempt_disable(); + set_cyc2ns_scale(tsc_khz_ref, smp_processor_id()); + preempt_enable(); return 0; } @@ -151,7 +188,7 @@ static unsigned long __init tsc_read_ref void __init tsc_calibrate(void) { unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; - int hpet = is_hpet_enabled(); + int hpet = is_hpet_enabled(), cpu; local_irq_save(flags); @@ -206,7 +243,9 @@ void __init tsc_calibrate(void) } tsc_khz = tsc2 / tsc1; - set_cyc2ns_scale(tsc_khz); + + for_each_possible_cpu(cpu) + set_cyc2ns_scale(tsc_khz, cpu); } /* Index: linux-x86.q/include/asm-x86/timer.h =================================================================== --- linux-x86.q.orig/include/asm-x86/timer.h +++ linux-x86.q/include/asm-x86/timer.h @@ -2,6 +2,7 @@ #define _ASMi386_TIMER_H #include <linux/init.h> #include <linux/pm.h> +#include <linux/percpu.h> #define TICK_SIZE (tick_nsec / 1000) @@ -16,7 +17,7 @@ extern int recalibrate_cpu_khz(void); #define calculate_cpu_khz() native_calculate_cpu_khz() #endif -/* Accellerators for sched_clock() +/* Accelerators for sched_clock() * convert from cycles(64bits) => nanoseconds (64bits) * basic equation: * ns = cycles / (freq / ns_per_sec) @@ -31,20 +32,32 @@ extern int recalibrate_cpu_khz(void); * And since SC is a constant power of two, we can convert the div * into a shift. * - * We can use khz divisor instead of mhz to keep a better percision, since + * We can use khz divisor instead of mhz to keep a better precision, since * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. * (mathieu.desnoyers@...ymtl.ca) * * -johnstul@...ibm.com "math is hard, lets go shopping!" */ -extern unsigned long cyc2ns_scale __read_mostly; + +DECLARE_PER_CPU(unsigned long, cyc2ns); #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static inline unsigned long long __cycles_2_ns(unsigned long long cyc) { - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; + return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; } +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + unsigned long long ns; + unsigned long flags; + + local_irq_save(flags); + ns = __cycles_2_ns(cyc); + local_irq_restore(flags); + + return ns; +} #endif -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists