MIPS get_cycles only returns a 32 bits TSC (see timex.h). The assumption there is that the reschedule is done every 8 seconds or so. Given that tracing needs to detect delays longer than 8 seconds, we need a full 64-bits TSC, which is provided by trace-clock-32-to-64. I leave the "depends on !CPU_R4400_WORKAROUNDS" in Kconfig because the solution proposed by Ralf to deal with the R4400 bug is racy, so let's just not support this broken architecture. :( Note for Peter Zijlstra : You should probably have a look at lockdep.c raw_spinlock_t lockdep_lock usage. I suspect it may be used with preemption enabled in graph_lock(). (not sure though, but it's worth double-checking. This patch uses the same cache-line bouncing algorithm used for x86. This is a best-effort to support architectures lacking synchronized TSC without adding a lot of complexity too soon. This keeps room for improvement in a second phase. Signed-off-by: Mathieu Desnoyers CC: Ralf Baechle CC: Peter Zijlstra --- arch/mips/Kconfig | 3 arch/mips/include/asm/timex.h | 17 +++ arch/mips/include/asm/trace-clock.h | 68 ++++++++++++++ arch/mips/kernel/Makefile | 2 arch/mips/kernel/trace-clock.c | 172 ++++++++++++++++++++++++++++++++++++ 5 files changed, 261 insertions(+), 1 deletion(-) Index: linux.trees.git/arch/mips/Kconfig =================================================================== --- linux.trees.git.orig/arch/mips/Kconfig 2008-11-07 00:10:10.000000000 -0500 +++ linux.trees.git/arch/mips/Kconfig 2008-11-07 00:16:42.000000000 -0500 @@ -1614,6 +1614,9 @@ config CPU_R4400_WORKAROUNDS config HAVE_GET_CYCLES_32 def_bool y depends on !CPU_R4400_WORKAROUNDS + select HAVE_TRACE_CLOCK + select HAVE_TRACE_CLOCK_32_TO_64 + select HAVE_UNSYNCHRONIZED_TSC # # Use the generic interrupt handling code in kernel/irq/: Index: linux.trees.git/arch/mips/include/asm/trace-clock.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux.trees.git/arch/mips/include/asm/trace-clock.h 2008-11-07 00:16:42.000000000 -0500 @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2005,2008 Mathieu Desnoyers + * + * Trace clock MIPS definitions. + */ + +#ifndef _ASM_MIPS_TRACE_CLOCK_H +#define _ASM_MIPS_TRACE_CLOCK_H + +#include +#include + +#define TRACE_CLOCK_MIN_PROBE_DURATION 200 + +extern u64 trace_clock_read_synthetic_tsc(void); + +/* + * MIPS get_cycles only returns a 32 bits TSC (see timex.h). The assumption + * there is that the reschedule is done every 8 seconds or so. Given that + * tracing needs to detect delays longer than 8 seconds, we need a full 64-bits + * TSC, whic is provided by trace-clock-32-to-64. +*/ +extern u64 trace_clock_async_tsc_read(void); + +static inline u32 trace_clock_read32(void) +{ + u32 cycles; + + if (likely(tsc_is_sync())) + cycles = (u32)get_cycles(); /* only need the 32 LSB */ + else + cycles = (u32)trace_clock_async_tsc_read(); + return cycles; +} + +static inline u64 trace_clock_read64(void) +{ + u64 cycles; + + if (likely(tsc_is_sync())) + cycles = trace_clock_read_synthetic_tsc(); + else + cycles = trace_clock_async_tsc_read(); + return cycles; +} + +static inline void trace_clock_add_timestamp(unsigned long ticks) +{ } + +static inline unsigned int trace_clock_frequency(void) +{ + return mips_hpt_frequency; +} + +static inline u32 trace_clock_freq_scale(void) +{ + return 1; +} + +extern void get_trace_clock(void); +extern void put_trace_clock(void); +extern void get_synthetic_tsc(void); +extern void put_synthetic_tsc(void); + +static inline void set_trace_clock_is_sync(int state) +{ +} +#endif /* _ASM_MIPS_TRACE_CLOCK_H */ Index: linux.trees.git/arch/mips/kernel/trace-clock.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux.trees.git/arch/mips/kernel/trace-clock.c 2008-11-07 00:16:42.000000000 -0500 @@ -0,0 +1,172 @@ +/* + * arch/mips/kernel/trace-clock.c + * + * Trace clock for mips. + * + * Mathieu Desnoyers , October 2008 + */ + +#include +#include +#include +#include +#include +#include + +static u64 trace_clock_last_tsc; +static DEFINE_PER_CPU(struct timer_list, update_timer); +static DEFINE_MUTEX(async_tsc_mutex); +static int async_tsc_refcount; /* Number of readers */ +static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */ + +/* + * Support for architectures with non-sync TSCs. + * When the local TSC is discovered to lag behind the highest TSC counter, we + * increment the TSC count of an amount that should be, ideally, lower than the + * execution time of this routine, in cycles : this is the granularity we look + * for : we must be able to order the events. + */ + +#if BITS_PER_LONG == 64 +notrace u64 trace_clock_async_tsc_read(void) +{ + u64 new_tsc, last_tsc; + + WARN_ON(!async_tsc_refcount || !async_tsc_enabled); + new_tsc = trace_clock_read_synthetic_tsc(); + do { + last_tsc = trace_clock_last_tsc; + if (new_tsc < last_tsc) + new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION; + /* + * If cmpxchg fails with a value higher than the new_tsc, don't + * retry : the value has been incremented and the events + * happened almost at the same time. + * We must retry if cmpxchg fails with a lower value : + * it means that we are the CPU with highest frequency and + * therefore MUST update the value. + */ + } while (cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc) < new_tsc); + return new_tsc; +} +EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read); +#else +/* + * Emulate an atomic 64-bits update with a spinlock. + * Note : preempt_disable or irq save must be explicit with raw_spinlock_t. + * Given we use a spinlock for this time base, we should never be called from + * NMI context. + */ +static raw_spinlock_t trace_clock_lock = + (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + +static inline u64 trace_clock_cmpxchg64(u64 *ptr, u64 old, u64 new) +{ + u64 val; + + val = *ptr; + if (likely(val == old)) + *ptr = val = new; + return val; +} + +notrace u64 trace_clock_async_tsc_read(void) +{ + u64 new_tsc, last_tsc; + unsigned long flags; + + WARN_ON(!async_tsc_refcount || !async_tsc_enabled); + local_irq_save(flags); + __raw_spin_lock(&trace_clock_lock); + new_tsc = trace_clock_read_synthetic_tsc(); + do { + last_tsc = trace_clock_last_tsc; + if (new_tsc < last_tsc) + new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION; + /* + * If cmpxchg fails with a value higher than the new_tsc, don't + * retry : the value has been incremented and the events + * happened almost at the same time. + * We must retry if cmpxchg fails with a lower value : + * it means that we are the CPU with highest frequency and + * therefore MUST update the value. + */ + } while (trace_clock_cmpxchg64(&trace_clock_last_tsc, last_tsc, + new_tsc) < new_tsc); + __raw_spin_unlock(&trace_clock_lock); + local_irq_restore(flags); + return new_tsc; +} +EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read); +#endif + + +static void update_timer_ipi(void *info) +{ + (void)trace_clock_async_tsc_read(); +} + +/* + * update_timer_fct : - Timer function to resync the clocks + * @data: unused + * + * Fires every jiffy. + */ +static void update_timer_fct(unsigned long data) +{ + (void)trace_clock_async_tsc_read(); + + per_cpu(update_timer, smp_processor_id()).expires = jiffies + 1; + add_timer_on(&per_cpu(update_timer, smp_processor_id()), + smp_processor_id()); +} + +static void enable_trace_clock(int cpu) +{ + init_timer(&per_cpu(update_timer, cpu)); + per_cpu(update_timer, cpu).function = update_timer_fct; + per_cpu(update_timer, cpu).expires = jiffies + 1; + smp_call_function_single(cpu, update_timer_ipi, NULL, 1); + add_timer_on(&per_cpu(update_timer, cpu), cpu); +} + +static void disable_trace_clock(int cpu) +{ + del_timer_sync(&per_cpu(update_timer, cpu)); +} + +void get_trace_clock(void) +{ + int cpu; + + mutex_lock(&async_tsc_mutex); + if (async_tsc_refcount++ || tsc_is_sync()) + goto end; + + async_tsc_enabled = 1; + for_each_online_cpu(cpu) + enable_trace_clock(cpu); +end: + mutex_unlock(&async_tsc_mutex); + get_synthetic_tsc(); +} +EXPORT_SYMBOL_GPL(get_trace_clock); + +void put_trace_clock(void) +{ + int cpu; + + put_synthetic_tsc(); + mutex_lock(&async_tsc_mutex); + WARN_ON(async_tsc_refcount <= 0); + if (async_tsc_refcount != 1 || !async_tsc_enabled) + goto end; + + for_each_online_cpu(cpu) + disable_trace_clock(cpu); + async_tsc_enabled = 0; +end: + async_tsc_refcount--; + mutex_unlock(&async_tsc_mutex); +} +EXPORT_SYMBOL_GPL(put_trace_clock); Index: linux.trees.git/arch/mips/include/asm/timex.h =================================================================== --- linux.trees.git.orig/arch/mips/include/asm/timex.h 2008-11-07 00:16:17.000000000 -0500 +++ linux.trees.git/arch/mips/include/asm/timex.h 2008-11-07 00:16:42.000000000 -0500 @@ -42,7 +42,7 @@ typedef unsigned int cycles_t; -#ifdef HAVE_GET_CYCLES_32 +#ifdef CONFIG_HAVE_GET_CYCLES_32 static inline cycles_t get_cycles(void) { return read_c0_count(); @@ -91,6 +91,21 @@ static inline void write_tsc(u32 val1, u extern unsigned int mips_hpt_frequency; +/* + * Currently unused, should update internal tsc-related timekeeping sources. + */ +static inline void mark_tsc_unstable(char *reason) +{ +} + +/* + * Currently simply use the tsc_is_sync value. + */ +static inline int unsynchronized_tsc(void) +{ + return !tsc_is_sync(); +} + #endif /* __KERNEL__ */ #endif /* _ASM_TIMEX_H */ Index: linux.trees.git/arch/mips/kernel/Makefile =================================================================== --- linux.trees.git.orig/arch/mips/kernel/Makefile 2008-10-30 20:22:50.000000000 -0400 +++ linux.trees.git/arch/mips/kernel/Makefile 2008-11-07 00:16:42.000000000 -0500 @@ -85,6 +85,8 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_HAVE_GET_CYCLES_32) += trace-clock.o + CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi) obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT) += 8250-platform.o -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/