arch/x86/kernel/entry_64.S | 5 +++ include/linux/timekeeper_internal.h | 1 + kernel/time/timekeeping.c | 78 +++++++++++++++++++++++++++++++++++-- 3 files changed, 81 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9ebaf63ba182..0a4c34b4658e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -312,6 +312,11 @@ ENTRY(save_paranoid) CFI_ENDPROC END(save_paranoid) +ENTRY(save_back_trace) + movq %rbp,%rdi + jmp do_save_back_trace +END(save_back_trace) + /* * A newly forked process directly context switches into this address. * diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 05af9a334893..0fcb60d77079 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -32,6 +32,7 @@ struct tk_read_base { cycle_t (*read)(struct clocksource *cs); cycle_t mask; cycle_t cycle_last; + cycle_t cycle_error; u32 mult; u32 shift; u64 xtime_nsec; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6a931852082f..1c924c80b462 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -140,6 +140,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock) tk->tkr.read = clock->read; tk->tkr.mask = clock->mask; tk->tkr.cycle_last = tk->tkr.read(clock); + tk->tkr.cycle_error = 0; /* Do the ns -> cycle conversion first, using original mult */ tmp = NTP_INTERVAL_LENGTH; @@ -191,16 +192,59 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset; static inline u32 arch_gettimeoffset(void) { return 0; } #endif +unsigned long tracebuffer[16]; + +extern void save_back_trace(long dummy, void *ptr); + +void do_save_back_trace(long rbp, void *ptr) +{ + int i; + unsigned long frame = rbp; + + for (i = 0; i < 15; i++) { + unsigned long nextframe = ((unsigned long *)frame)[0]; + unsigned long rip = ((unsigned long *)frame)[1]; + tracebuffer[i] = rip; + if ((nextframe ^ frame) >> 13) + break; + if (nextframe <= frame) + break; + frame = nextframe; + } + tracebuffer[i] = 0; +} + +/* + * At read time, we read "cycle_last" *before* we read + * the clock. + * + * At write time, we read the clock before we update + * 'cycle_last'. + * + * Thus, any 'cycle_last' value read here *must* be smaller + * than the clock read. Unless the clock is buggy. + */ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) { - cycle_t cycle_now, delta; + cycle_t cycle_last, cycle_now, delta; s64 nsec; + /* Read previous cycle - *before* reading clocksource */ + cycle_last = smp_load_acquire(&tkr->cycle_last); + /* read clocksource: */ - cycle_now = tkr->read(tkr->clock); + cycle_now = smp_load_acquire(&tkr->cycle_error); + cycle_now += tkr->read(tkr->clock); /* calculate the delta since the last update_wall_time: */ - delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask); + delta = clocksource_delta(cycle_now, cycle_last, tkr->mask); + + /* Hmm? This is really not good, we're too close to overflowing */ + if (unlikely(delta > (tkr->mask >> 3))) { + smp_store_release(&tkr->cycle_error, delta); + delta = 0; + save_back_trace(0, tracebuffer); + } nsec = delta * tkr->mult + tkr->xtime_nsec; nsec >>= tkr->shift; @@ -465,6 +509,28 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) update_fast_timekeeper(tk); } +static void check_cycle_error(struct tk_read_base *tkr) +{ + cycle_t error = tkr->cycle_error; + + if (unlikely(error)) { + int i; + const char *sign = ""; + tkr->cycle_error = 0; + if (error > tkr->mask/2) { + error = tkr->mask - error + 1; + sign = "-"; + } + pr_err("Clocksource %s had cycles off by %s%llu\n", tkr->clock->name, sign, error); + for (i = 0; i < 16; i++) { + unsigned long rip = tracebuffer[i]; + if (!rip) + break; + printk(" %pS\n", (void *)rip); + } + } +} + /** * timekeeping_forward_now - update clock to the current time * @@ -481,6 +547,7 @@ static void timekeeping_forward_now(struct timekeeper *tk) cycle_now = tk->tkr.read(clock); delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask); tk->tkr.cycle_last = cycle_now; + check_cycle_error(&tk->tkr); tk->tkr.xtime_nsec += delta * tk->tkr.mult; @@ -1237,6 +1304,7 @@ static void timekeeping_resume(void) /* Re-base the last cycle value */ tk->tkr.cycle_last = cycle_now; + tk->tkr.cycle_error = 0; tk->ntp_error = 0; timekeeping_suspended = 0; timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); @@ -1591,11 +1659,15 @@ void update_wall_time(void) if (unlikely(timekeeping_suspended)) goto out; + check_cycle_error(&real_tk->tkr); + #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET offset = real_tk->cycle_interval; #else offset = clocksource_delta(tk->tkr.read(tk->tkr.clock), tk->tkr.cycle_last, tk->tkr.mask); + if (unlikely(offset > (tk->tkr.mask >> 3))) + pr_err("Cutting it too close for %s in in update_wall_time (offset = %llu)\n", tk->tkr.clock->name, offset); #endif /* Check if there's really nothing to do */