catchup_timer_jiffies() has been applied blindly to several functions without looking for possible better ways to do it. 1) internal_add_timer() Move the update to base->all_timers before we actually insert the timer into the wheel. 2) detach_if_pending() Again the update to base->all_timers allows us to explicitely do the timer_jiffies update in place, if this was the last timer which got removed. 3) __run_timers() We only check on entry, which is silly, because base->timer_jiffies can be behind - especially on NOHZ kernels - and if there is a single deferrable timer somewhere between base->timer_jiffies and jiffies we expire it and then loop until base->timer_jiffies == jiffies. Move it into the loop. Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) Index: tip/kernel/time/timer.c =================================================================== --- tip.orig/kernel/time/timer.c +++ tip/kernel/time/timer.c @@ -356,7 +356,7 @@ EXPORT_SYMBOL_GPL(set_timer_slack); * The caller must hold the tvec_base lock. Returns true if the list * was empty and therefore ->timer_jiffies was updated. */ -static bool catchup_timer_jiffies(struct tvec_base *base) +static inline bool catchup_timer_jiffies(struct tvec_base *base) { if (!base->all_timers) { base->timer_jiffies = jiffies; @@ -411,7 +411,10 @@ __internal_add_timer(struct tvec_base *b static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { - (void)catchup_timer_jiffies(base); + /* Advance base->jiffies, if the base is empty */ + if (!base->all_timers++) + base->timer_jiffies = jiffies; + __internal_add_timer(base, timer); /* * Update base->active_timers and base->next_timer @@ -421,7 +424,6 @@ static void internal_add_timer(struct tv time_before(timer->expires, base->next_timer)) base->next_timer = timer->expires; } - base->all_timers++; /* * Check whether the other CPU is in dynticks mode and needs @@ -719,7 +721,6 @@ detach_expired_timer(struct timer_list * if (!tbase_get_deferrable(timer->base)) base->active_timers--; base->all_timers--; - (void)catchup_timer_jiffies(base); } static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, @@ -734,8 +735,9 @@ static int detach_if_pending(struct time if (timer->expires == base->next_timer) base->next_timer = base->timer_jiffies; } - base->all_timers--; - (void)catchup_timer_jiffies(base); + /* If this was the last timer, advance base->jiffies */ + if (!--base->all_timers) + base->timer_jiffies = jiffies; return 1; } @@ -1185,14 +1187,16 @@ static inline void __run_timers(struct t struct timer_list *timer; spin_lock_irq(&base->lock); - if (catchup_timer_jiffies(base)) { - spin_unlock_irq(&base->lock); - return; - } + while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list; struct list_head *head = &work_list; - int index = base->timer_jiffies & TVR_MASK; + int index; + + if (catchup_timer_jiffies(base)) + break; + + index = base->timer_jiffies & TVR_MASK; /* * Cascade timers: -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/