We didn't balance the runtime when throttled, this can cause large wakeup
latencies. Suppose a task is migrated to another cpu right before the group
quota runs out - its likely that the previuos cpu had a large amount of the
group runtime, whereas the new cpu would be almost depleted (due to it being 
handed the other cpu).

Now we exceed the runtime and get throttled - the period rollover tick will
subtract the cpu quota from the runtime and check if we're below quota. However
with this cpu having a very small portion of the runtime it will not refresh
as fast as it should.

Therefore, also rebalance the runtime when we're throttled.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched_rt.c |   41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

Index: linux-2.6/kernel/sched_rt.c
===================================================================
--- linux-2.6.orig/kernel/sched_rt.c
+++ linux-2.6/kernel/sched_rt.c
@@ -222,6 +222,28 @@ static inline struct rt_bandwidth *sched
 
 #endif
 
+#ifdef CONFIG_SMP
+static int do_balance_runtime(struct rt_rq *rt_rq);
+
+static int balance_runtime(struct rt_rq *rt_rq)
+{
+	int more = 0;
+
+	if (rt_rq->rt_time > rt_rq->rt_runtime) {
+		spin_unlock(&rt_rq->rt_runtime_lock);
+		more = do_balance_runtime(rt_rq);
+		spin_lock(&rt_rq->rt_runtime_lock);
+	}
+
+	return more;
+}
+#else
+static inline int balance_runtime(struct rt_rq *rt_rq)
+{
+	return 0;
+}
+#endif
+
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 {
 	int i, idle = 1;
@@ -241,6 +263,8 @@ static int do_sched_rt_period_timer(stru
 			u64 runtime;
 
 			spin_lock(&rt_rq->rt_runtime_lock);
+			if (rt_rq->rt_throttled)
+				balance_runtime(rt_rq);
 			runtime = rt_rq->rt_runtime;
 			rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
 			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
@@ -261,7 +285,7 @@ static int do_sched_rt_period_timer(stru
 }
 
 #ifdef CONFIG_SMP
-static int balance_runtime(struct rt_rq *rt_rq)
+static int do_balance_runtime(struct rt_rq *rt_rq)
 {
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
@@ -422,17 +446,10 @@ static int sched_rt_runtime_exceeded(str
 	if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
 		return 0;
 
-#ifdef CONFIG_SMP
-	if (rt_rq->rt_time > runtime) {
-		spin_unlock(&rt_rq->rt_runtime_lock);
-		balance_runtime(rt_rq);
-		spin_lock(&rt_rq->rt_runtime_lock);
-
-		runtime = sched_rt_runtime(rt_rq);
-		if (runtime == RUNTIME_INF)
-			return 0;
-	}
-#endif
+	balance_runtime(rt_rq);
+	runtime = sched_rt_runtime(rt_rq);
+	if (runtime == RUNTIME_INF)
+		return 0;
 
 	if (rt_rq->rt_time > runtime) {
 		rt_rq->rt_throttled = 1;

-- 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/