We didn't balance the runtime when throttled, this can cause large wakeup latencies. Suppose a task is migrated to another cpu right before the group quota runs out - its likely that the previuos cpu had a large amount of the group runtime, whereas the new cpu would be almost depleted (due to it being handed the other cpu). Now we exceed the runtime and get throttled - the period rollover tick will subtract the cpu quota from the runtime and check if we're below quota. However with this cpu having a very small portion of the runtime it will not refresh as fast as it should. Therefore, also rebalance the runtime when we're throttled. Signed-off-by: Peter Zijlstra --- kernel/sched_rt.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) Index: linux-2.6/kernel/sched_rt.c =================================================================== --- linux-2.6.orig/kernel/sched_rt.c +++ linux-2.6/kernel/sched_rt.c @@ -222,6 +222,28 @@ static inline struct rt_bandwidth *sched #endif +#ifdef CONFIG_SMP +static int do_balance_runtime(struct rt_rq *rt_rq); + +static int balance_runtime(struct rt_rq *rt_rq) +{ + int more = 0; + + if (rt_rq->rt_time > rt_rq->rt_runtime) { + spin_unlock(&rt_rq->rt_runtime_lock); + more = do_balance_runtime(rt_rq); + spin_lock(&rt_rq->rt_runtime_lock); + } + + return more; +} +#else +static inline int balance_runtime(struct rt_rq *rt_rq) +{ + return 0; +} +#endif + static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) { int i, idle = 1; @@ -241,6 +263,8 @@ static int do_sched_rt_period_timer(stru u64 runtime; spin_lock(&rt_rq->rt_runtime_lock); + if (rt_rq->rt_throttled) + balance_runtime(rt_rq); runtime = rt_rq->rt_runtime; rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { @@ -261,7 +285,7 @@ static int do_sched_rt_period_timer(stru } #ifdef CONFIG_SMP -static int balance_runtime(struct rt_rq *rt_rq) +static int do_balance_runtime(struct rt_rq *rt_rq) { struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); struct root_domain *rd = cpu_rq(smp_processor_id())->rd; @@ -422,17 +446,10 @@ static int sched_rt_runtime_exceeded(str if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) return 0; -#ifdef CONFIG_SMP - if (rt_rq->rt_time > runtime) { - spin_unlock(&rt_rq->rt_runtime_lock); - balance_runtime(rt_rq); - spin_lock(&rt_rq->rt_runtime_lock); - - runtime = sched_rt_runtime(rt_rq); - if (runtime == RUNTIME_INF) - return 0; - } -#endif + balance_runtime(rt_rq); + runtime = sched_rt_runtime(rt_rq); + if (runtime == RUNTIME_INF) + return 0; if (rt_rq->rt_time > runtime) { rt_rq->rt_throttled = 1; -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/