[ Impact: implement TIMER feature to diminish the latencies induced by wakeups performed by timer callbacks ] Ensure that timer callbacks triggering wakeups get served ASAP by giving timer-driven wakeups next-buddy affinity. My test program is wakeup-latency.c, provided by Nokia originally. A 10ms timer spawns a thread which reads the time, and shows a warning if the expected deadline has been missed by too much. It also warns about timer overruns. Without the TIMER and TIMER_FORK_EXPEDITED features: min priority: 0, max priority: 0 [....] maximum latency: 41453.6 µs average latency: 4127.0 µs missed timer events: 0 With the features enabled: min priority: 0, max priority: 0 [...] maximum latency: 10013.5 µs average latency: 162.9 µs missed timer events: 0 Signed-off-by: Mathieu Desnoyers CC: Peter Zijlstra --- include/linux/sched.h | 16 +++++++++++++++- kernel/hrtimer.c | 2 ++ kernel/itimer.c | 2 ++ kernel/posix-cpu-timers.c | 2 ++ kernel/posix-timers.c | 2 ++ kernel/sched.c | 9 +++++++++ kernel/sched_fair.c | 11 ++++++++--- kernel/sched_features.h | 4 ++++ kernel/timer.c | 2 ++ 9 files changed, 46 insertions(+), 4 deletions(-) Index: linux-2.6-lttng.laptop/include/linux/sched.h =================================================================== --- linux-2.6-lttng.laptop.orig/include/linux/sched.h +++ linux-2.6-lttng.laptop/include/linux/sched.h @@ -1027,12 +1027,14 @@ struct sched_domain; #define WF_SYNC (1 << 0) /* waker goes to sleep after wakup */ #define WF_FORK (1 << 1) /* child wakeup after fork */ #define WF_INTERACTIVE (1 << 2) /* interactivity-driven wakeup */ +#define WF_TIMER (1 << 3) /* timer-driven wakeup */ #define ENQUEUE_WAKEUP (1 << 0) #define ENQUEUE_WAKING (1 << 1) #define ENQUEUE_HEAD (1 << 2) #define ENQUEUE_IO (1 << 3) #define ENQUEUE_LATENCY (1 << 4) +#define ENQUEUE_TIMER (1 << 5) #define DEQUEUE_SLEEP (1 << 0) @@ -1128,7 +1130,8 @@ struct sched_entity { struct rb_node run_node; struct list_head group_node; unsigned int on_rq:1, - interactive:1; + interactive:1, + timer:1; u64 exec_start; u64 sum_exec_runtime; @@ -1242,6 +1245,7 @@ struct task_struct { unsigned sched_reset_on_fork:1; /* Revert to default * priority/policy on fork */ unsigned sched_wake_interactive:4; /* User-driven wakeup */ + unsigned sched_wake_timer:4; /* Timer-driven wakeup */ pid_t pid; pid_t tgid; @@ -1517,6 +1521,16 @@ static inline void sched_wake_interactiv current->sched_wake_interactive--; } +static inline void sched_wake_timer_enable(void) +{ + current->sched_wake_timer++; +} + +static inline void sched_wake_timer_disable(void) +{ + current->sched_wake_timer--; +} + /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) Index: linux-2.6-lttng.laptop/kernel/sched_features.h =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/sched_features.h +++ linux-2.6-lttng.laptop/kernel/sched_features.h @@ -58,6 +58,10 @@ SCHED_FEAT(DYN_MIN_VRUNTIME, 0) * Input subsystem next buddy affinity. Not transitive across new task wakeups. */ SCHED_FEAT(INTERACTIVE, 0) +/* + * Timer subsystem next buddy affinity. Not transitive across new task wakeups. + */ +SCHED_FEAT(TIMER, 0) /* * Spin-wait on mutex acquisition when the mutex owner is running on Index: linux-2.6-lttng.laptop/kernel/sched.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/sched.c +++ linux-2.6-lttng.laptop/kernel/sched.c @@ -2295,6 +2295,13 @@ static int try_to_wake_up(struct task_st en_flags |= ENQUEUE_LATENCY; } + if (sched_feat(TIMER) && !(wake_flags & WF_FORK)) { + if (current->sched_wake_timer || + wake_flags & WF_TIMER || + current->se.timer) + en_flags |= ENQUEUE_TIMER; + } + this_cpu = get_cpu(); smp_wmb(); @@ -3623,6 +3630,8 @@ need_resched_nonpreemptible: else { if (sched_feat(INTERACTIVE)) prev->se.interactive = 0; + if (sched_feat(TIMER)) + prev->se.timer = 0; deactivate_task(rq, prev, DEQUEUE_SLEEP); } switch_count = &prev->nvcsw; Index: linux-2.6-lttng.laptop/kernel/sched_fair.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/sched_fair.c +++ linux-2.6-lttng.laptop/kernel/sched_fair.c @@ -777,6 +777,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, st if (sched_feat(INTERACTIVE) && flags & ENQUEUE_LATENCY && !(flags & ENQUEUE_IO)) se->interactive = 1; + if (sched_feat(TIMER) + && flags & ENQUEUE_TIMER && !(flags & ENQUEUE_IO)) + se->timer = 1; place_entity(cfs_rq, se, 0); enqueue_sleeper(cfs_rq, se); } @@ -923,7 +926,8 @@ static struct sched_entity *pick_next_en se = cfs_rq->last; /* - * Prefer the next buddy, only set through the interactivity logic. + * Prefer the next buddy, only set through the interactivity and timer + * logic. */ if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) se = cfs_rq->next; @@ -1674,8 +1678,9 @@ static void check_preempt_wakeup(struct if (unlikely(se == pse)) return; - if (sched_feat(INTERACTIVE) - && !(wake_flags & WF_FORK) && pse->interactive) { + if (!(wake_flags & WF_FORK) + && ((sched_feat(INTERACTIVE) && pse->interactive) + || (sched_feat(TIMER) && pse->timer))) { clear_buddies(cfs_rq, NULL); set_next_buddy(pse); preempt = 1; Index: linux-2.6-lttng.laptop/kernel/posix-timers.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/posix-timers.c +++ linux-2.6-lttng.laptop/kernel/posix-timers.c @@ -402,6 +402,7 @@ static enum hrtimer_restart posix_timer_ int si_private = 0; enum hrtimer_restart ret = HRTIMER_NORESTART; + sched_wake_timer_enable(); timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); @@ -456,6 +457,7 @@ static enum hrtimer_restart posix_timer_ } unlock_timer(timr, flags); + sched_wake_timer_disable(); return ret; } Index: linux-2.6-lttng.laptop/kernel/timer.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/timer.c +++ linux-2.6-lttng.laptop/kernel/timer.c @@ -1038,6 +1038,7 @@ static void call_timer_fn(struct timer_l */ struct lockdep_map lockdep_map = timer->lockdep_map; #endif + sched_wake_timer_enable(); /* * Couple the lock chain with the lock chain at * del_timer_sync() by acquiring the lock_map around the fn() @@ -1062,6 +1063,7 @@ static void call_timer_fn(struct timer_l */ preempt_count() = preempt_count; } + sched_wake_timer_disable(); } #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) Index: linux-2.6-lttng.laptop/kernel/hrtimer.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/hrtimer.c +++ linux-2.6-lttng.laptop/kernel/hrtimer.c @@ -1212,6 +1212,7 @@ static void __run_hrtimer(struct hrtimer WARN_ON(!irqs_disabled()); + sched_wake_timer_enable(); debug_deactivate(timer); __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); timer_stats_account_hrtimer(timer); @@ -1238,6 +1239,7 @@ static void __run_hrtimer(struct hrtimer enqueue_hrtimer(timer, base); } timer->state &= ~HRTIMER_STATE_CALLBACK; + sched_wake_timer_disable(); } #ifdef CONFIG_HIGH_RES_TIMERS Index: linux-2.6-lttng.laptop/kernel/itimer.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/itimer.c +++ linux-2.6-lttng.laptop/kernel/itimer.c @@ -129,7 +129,9 @@ enum hrtimer_restart it_real_fn(struct h trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0); trace_timer_itimer_expired(sig); + sched_wake_timer_enable(); kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid); + sched_wake_timer_disable(); return HRTIMER_NORESTART; } Index: linux-2.6-lttng.laptop/kernel/posix-cpu-timers.c =================================================================== --- linux-2.6-lttng.laptop.orig/kernel/posix-cpu-timers.c +++ linux-2.6-lttng.laptop/kernel/posix-cpu-timers.c @@ -610,6 +610,7 @@ static void arm_timer(struct k_itimer *t */ static void cpu_timer_fire(struct k_itimer *timer) { + sched_wake_timer_enable(); if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) { /* * User don't want any signal. @@ -637,6 +638,7 @@ static void cpu_timer_fire(struct k_itim */ posix_cpu_timer_schedule(timer); } + sched_wake_timer_disable(); } /* -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/