lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <20140722120534.GD6462@pd.tnic> Date: Tue, 22 Jul 2014 14:05:34 +0200 From: Borislav Petkov <bp@...en8.de> To: Peter Zijlstra <peterz@...radead.org>, Thomas Gleixner <tglx@...utronix.de> Cc: x86-ml <x86@...nel.org>, lkml <linux-kernel@...r.kernel.org>, Steven Rostedt <rostedt@...dmis.org> Subject: Re: [PATCH] x86, TSC: Add a software TSC offset Ok, here's the preempt-version we were talking about. Please don't look at the vdso hunk - I had to make it build. Will fix properly later once we've established whether this actually makes sense at all first. :-) -- diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb9b258d60e7..8c27e55372fb 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -244,6 +244,7 @@ #define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_TSC_OFFSET X86_BUG(8) /* CPU has skewed but stable TSCs */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 94605c0e9cee..904bc182a16b 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -4,11 +4,11 @@ #ifndef _ASM_X86_TSC_H #define _ASM_X86_TSC_H -#include <asm/processor.h> - #define NS_SCALE 10 /* 2^10, carefully chosen */ #define US_SCALE 32 /* 2^32, arbitralrily chosen */ +DECLARE_PER_CPU(long long, tsc_offset); + /* * Standard way to access the cycle counter. */ @@ -27,7 +27,10 @@ static inline cycles_t get_cycles(void) if (!cpu_has_tsc) return 0; #endif + preempt_disable(); rdtscll(ret); + ret += this_cpu_read_8(tsc_offset); + preempt_enable(); return ret; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 26488487bc61..97293b66fa65 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -28,6 +28,11 @@ static atomic_t start_count; static atomic_t stop_count; /* + * TSC offset helper counters. + */ +static atomic_t set_offset_on_target, offset_done; + +/* * We use a raw spinlock in this exceptional case, because * we want to have the fastest, inlined, non-debug version * of a critical section, to be able to prove TSC time-warps: @@ -36,7 +41,10 @@ static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; static cycles_t last_tsc; static cycles_t max_warp; -static int nr_warps; +static int nr_warps, max_warp_cpu; + +DEFINE_PER_CPU(long long, tsc_offset) = { 0 }; +EXPORT_PER_CPU_SYMBOL_GPL(tsc_offset); /* * TSC-warp measurement loop running on both CPUs: @@ -89,6 +97,10 @@ static void check_tsc_warp(unsigned int timeout) arch_spin_lock(&sync_lock); max_warp = max(max_warp, prev - now); nr_warps++; + + if (prev - now == max_warp) + max_warp_cpu = smp_processor_id(); + arch_spin_unlock(&sync_lock); } } @@ -116,6 +128,69 @@ static inline unsigned int loop_timeout(int cpu) return (cpumask_weight(cpu_core_mask(cpu)) > 1) ? 2 : 20; } +static inline bool cpu_should_save_offset(int cpu) +{ + bool ret = static_cpu_has(X86_FEATURE_CONSTANT_TSC) && + static_cpu_has(X86_FEATURE_NONSTOP_TSC); + + if (ret) + set_cpu_bug(&cpu_data(cpu), X86_BUG_TSC_OFFSET); + + return ret; +} + +/* + * We're saving a per-core TSC offset only on machines which have a + * stable and non-stop TSC but which, for some reason, start their TSCs + * on the different nodes at different points in time, thus causing a + * small constant diff between them. + * + * We do this during the TSC sync check which happens between a source + * and a target CPU. When we detect the diff, we hold the target CPU by + * _not_ incrementing stop_count. What we do instead is we send it into + * compute_tsc_offset() below and store the max_warp difference we have + * measured above in a per-cpu variable. + * + * We do pay attention to which CPU saw the max_warp by writing its + * number into max_warp_cpu so that we can compute whether the offset + * we're going to write into the target's TSC is positive or negative. + * + * It is positive when the target CPU's TSC has started later than the + * source CPU's TSC and thus has a smaller TSC value. + * + * It is negative when the target CPU's TSC has started earlier than the + * source CPU's TSC and thus has a higher TSC value. + * + * Once we've computed the offset, we let both CPUs do the usual + * TSC sync check again, taking the offset into account, see + * get_cycles_aux(). + * + * Called on the target. + */ +static void compute_tsc_offset(int cpu) +{ + long long off; + + /* + * This CPU wrote last the max_warp above, means its TSC is smaller than + * the source CPU which we're doing the sync check with. + */ + if (cpu == max_warp_cpu) + off = max_warp; + else + off = -max_warp; + + per_cpu(tsc_offset, cpu) = off; + pr_info("CPU%d, saved offset: %lld\n", cpu, off); + + nr_warps = 0; + max_warp = 0; + last_tsc = 0; + + atomic_inc(&offset_done); + atomic_set(&set_offset_on_target, 0); +} + /* * Source CPU calls into this - it waits for the freshly booted * target CPU to arrive and then starts the measurement: @@ -138,6 +213,7 @@ void check_tsc_sync_source(int cpu) return; } +restart_src: /* * Reset it - in case this is a second bootup: */ @@ -155,15 +231,27 @@ void check_tsc_sync_source(int cpu) check_tsc_warp(loop_timeout(cpu)); + /* + * Wait for target to finish measurement: + */ while (atomic_read(&stop_count) != cpus-1) cpu_relax(); + /* Analyze measurement */ if (nr_warps) { - pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", - smp_processor_id(), cpu); - pr_warning("Measured %Ld cycles TSC warp between CPUs, " - "turning off TSC clock.\n", max_warp); - mark_tsc_unstable("check_tsc_sync_source failed"); + if (cpu_should_save_offset(cpu) && !atomic_read(&offset_done)) { + pr_warn("TSCs of [CPU#%d -> CPU#%d] %lld cycles out of sync, saving offset.\n", + smp_processor_id(), cpu, max_warp); + + atomic_set(&start_count, 0); + atomic_set(&set_offset_on_target, 1); + + goto restart_src; + } else { + pr_warning("Measured %Ld(%d) cycles TSC warp between CPUs, " + "turning off TSC clock.\n", max_warp, max_warp_cpu); + mark_tsc_unstable("check_tsc_sync_source failed"); + } } else { pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", smp_processor_id(), cpu); @@ -173,6 +261,7 @@ void check_tsc_sync_source(int cpu) * Reset it - just in case we boot another CPU later: */ atomic_set(&start_count, 0); + atomic_set(&offset_done, 0); nr_warps = 0; max_warp = 0; last_tsc = 0; @@ -188,11 +277,16 @@ void check_tsc_sync_source(int cpu) */ void check_tsc_sync_target(void) { + int this_cpu = smp_processor_id(); int cpus = 2; if (unsynchronized_tsc() || tsc_clocksource_reliable) return; +restart_tgt: + if (atomic_read(&set_offset_on_target)) + compute_tsc_offset(this_cpu); + /* * Register this CPU's participation and wait for the * source CPU to start the measurement: @@ -201,7 +295,7 @@ void check_tsc_sync_target(void) while (atomic_read(&start_count) != cpus) cpu_relax(); - check_tsc_warp(loop_timeout(smp_processor_id())); + check_tsc_warp(loop_timeout(this_cpu)); /* * Ok, we are done: @@ -211,6 +305,9 @@ void check_tsc_sync_target(void) /* * Wait for the source CPU to print stuff: */ - while (atomic_read(&stop_count) != cpus) + while (atomic_read(&stop_count) != cpus) { + if (atomic_read(&set_offset_on_target)) + goto restart_tgt; cpu_relax(); + } } diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c index 175cc72c0f68..d5cba62bbf46 100644 --- a/arch/x86/vdso/vdso32/vclock_gettime.c +++ b/arch/x86/vdso/vdso32/vclock_gettime.c @@ -25,6 +25,9 @@ #define BUILD_VDSO32_64 +#undef this_cpu_read_8 +#define this_cpu_read_8(dummy) (0) + #endif #include "../vclock_gettime.c" -- Regards/Gruss, Boris. Sent from a fat crate under my desk. Formatting is fine. -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists