lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1333792489.12677.58.camel@marge.simpson.net>
Date:	Sat, 07 Apr 2012 11:54:49 +0200
From:	Mike Galbraith <efault@....de>
To:	LKML <linux-kernel@...r.kernel.org>
Cc:	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Ingo Molnar <mingo@...hat.com>
Subject: RFC [patch] sched,cgroup_sched: convince RT_GROUP_SCHED throttle to
 work

Greetings,

I'm having trouble with RT_GROUP_SCHED throttle kicking in and staying
engaged (timer troubles).  Either groups execute one after the other
(frob timer above you), or bandwidth is wrong, or the reason I started
squabbling with this thing in the first place happens, one or the other
group gets stuck, even with only two groups, with the root task group
throttled, and the victim is marooned until I kill the cgroup setup.  If
(say) grp1 starts first, grp2 is screwed, or the other way around.

With this patch, the thing appears to work perfectly, but it doesn't
look correct, since I'm futzing with ->rt_time where I should not.

Not so pretty ascii-art:

/----------/system cpu 0-2, rt 300000-----/foo cpu 2, rt 100000
    \
     \
      \----/rtcpus cpu 3, rt 300000---\---/bar cpu 3, rt 100000
                                       \
                                        \-/baz cpu 3, rt 100000

It only needs to be two groups, grp1 containing most of the system, the
other rt only.  With the patch, the above setup works (last setup I
prodded box with), and bandwidth looked fine, twiddle budgets or not.

I just happened to notice the throttle wasn't doing it's thing right
after discovering that isolcpus is busted with RT_GROUP_SCHED.  Thought
I should probably beat on it a little.  The darn thing beat me back :)

---
 kernel/sched/rt.c |   76 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 45 insertions(+), 31 deletions(-)

--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -776,55 +776,69 @@ static inline int balance_runtime(struct
 }
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_RT_GROUP_SCHED
+#define for_each_rt_rq_up_from(rt_rq, iter, rq)			\
+	for (iter = rt_rq->tg; iter; iter = iter->parent,	\
+		rt_rq = iter ? iter->rt_rq[cpu_of(rq)] : NULL)
+#else
+#define for_each_rt_rq_up_from(rt_rq, iter, rq)			\
+	for (iter = rt_rq; iter; iter = NULL)
+#endif
+
 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 {
 	int i, idle = 1, throttled = 0;
 	const struct cpumask *span;
+	rt_rq_iter_t iter;
 
 	span = sched_rt_period_mask();
 	for_each_cpu(i, span) {
-		int enqueue = 0;
+		int enqueue = 0, depth = 0;
 		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
-		struct rq *rq = rq_of_rt_rq(rt_rq);
+		struct rq *rq = cpu_rq(i);
 
 		raw_spin_lock(&rq->lock);
-		if (rt_rq->rt_time) {
-			u64 runtime;
-
-			raw_spin_lock(&rt_rq->rt_runtime_lock);
-			if (rt_rq->rt_throttled)
-				balance_runtime(rt_rq);
-			runtime = rt_rq->rt_runtime;
-			rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
-			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
-				rt_rq->rt_throttled = 0;
+		for_each_rt_rq_up_from(rt_rq, iter, rq) {
+			if (rt_rq->rt_time) {
+				u64 runtime;
+
+				raw_spin_lock(&rt_rq->rt_runtime_lock);
+				if (rt_rq->rt_throttled)
+					balance_runtime(rt_rq);
+				runtime = rt_rq->rt_runtime;
+				rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
+				if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
+					rt_rq->rt_throttled = 0;
+					enqueue = 1;
+
+					/*
+					 * Force a clock update if the CPU was idle,
+					 * lest wakeup -> unthrottle time accumulate.
+					 */
+					if (rt_rq->rt_nr_running && rq->curr == rq->idle)
+						rq->skip_clock_update = -1;
+				}
+				raw_spin_unlock(&rt_rq->rt_runtime_lock);
+			} else if (!rt_rq_throttled(rt_rq))
 				enqueue = 1;
 
-				/*
-				 * Force a clock update if the CPU was idle,
-				 * lest wakeup -> unthrottle time accumulate.
-				 */
-				if (rt_rq->rt_nr_running && rq->curr == rq->idle)
-					rq->skip_clock_update = -1;
+			if (enqueue)
+				sched_rt_rq_enqueue(rt_rq);
+
+			if (!depth++) {
+				if (rt_rq->rt_throttled) {
+					throttled = 1;
+					idle = 0;
+				} else if (rt_rq->rt_time || rt_rq->rt_nr_running)
+					idle = 0;
 			}
-			if (rt_rq->rt_time || rt_rq->rt_nr_running)
-				idle = 0;
-			raw_spin_unlock(&rt_rq->rt_runtime_lock);
-		} else if (rt_rq->rt_nr_running) {
-			idle = 0;
-			if (!rt_rq_throttled(rt_rq))
-				enqueue = 1;
-		}
-		if (rt_rq->rt_throttled)
-			throttled = 1;
 
-		if (enqueue)
-			sched_rt_rq_enqueue(rt_rq);
+		}
 		raw_spin_unlock(&rq->lock);
 	}
 
 	if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
-		return 1;
+		idle = 1;
 
 	return idle;
 }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ