linux-kernel - Re: [PATCH] sched/rt: don't try to balance rt

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20140519124052.GA30445@twins.programming.kicks-ass.net>
Date:	Mon, 19 May 2014 14:40:52 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Paul Gortmaker <paul.gortmaker@...driver.com>
Cc:	linux-kernel@...r.kernel.org, linux-rt-users@...r.kernel.org,
	Ingo Molnar <mingo@...hat.com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
Subject: Re: [PATCH] sched/rt: don't try to balance rt_runtime when it is
 futile

On Wed, May 14, 2014 at 11:08:35AM -0400, Paul Gortmaker wrote:
> As of the old commit ac086bc22997a2be24fc40fc8d46522fe7e03d11
> ("sched: rt-group: smp balancing") the concept of borrowing per
> cpu rt_runtime from one core to another was introduced.
> 
> However, this prevents the RT throttling message from ever being
> emitted when someone does a common (but mistaken) attempt at
> using too much CPU in RT context.  Consider the following test:


So the alternative approach is something like the below, where we will
not let it borrow more than the global bandwidth per cpu.

This whole sharing thing is completely fail anyway, but I really
wouldn't know what else to do and keep allowing RT tasks to set random
cpu affinities.


---
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7386,10 +7386,59 @@ static int __rt_schedulable(struct task_
 	return ret;
 }
 
+/*
+ * ret := (a * b) / d
+ */
+static u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d)
+{
+	/*
+	 * Compute the 128bit product:
+	 *   a * b ->
+	 *     [ a = (ah * 2^32 + al),  b = (bh * 2^32 + bl) ]
+	 *   -> (ah * bh) * 2^64 + (ah * bl + al * bh) * 2^32 + al * bl
+	 */
+	u32 ah = (a >> 32);
+	u32 bh = (b >> 32);
+	u32 al = a;
+	u32 bl = b;
+
+	u64 mh, mm, ml;
+
+	mh = (u64)ah * bh;
+	mm = (u64)ah * bl + (u64)al * bh;
+	ml = (u64)al * bl;
+
+	mh += mm >> 32;
+	mm <<= 32;
+
+	ml += mm;
+	if (ml < mm) /* overflow */
+		mh++;
+
+	/*
+	 * Reduce the 128bit result to fit in a 64bit dividend:
+	 *   m / d -> (m / 2^n) / (d / 2^n)
+	 */
+	while (mh) {
+		ml >>= 1;
+		if (mh & 1)
+			ml |= 1ULL << 63;
+		mh >>= 1;
+		d >>= 1;
+	}
+
+	if (unlikely(!d))
+		return ml;
+
+	return div64_u64(ml, d);
+}
+
 static int tg_set_rt_bandwidth(struct task_group *tg,
 		u64 rt_period, u64 rt_runtime)
 {
 	int i, err = 0;
+	u64 g_period = global_rt_period();
+	u64 g_runtime = global_rt_runtime();
 
 	mutex_lock(&rt_constraints_mutex);
 	read_lock(&tasklist_lock);
@@ -7400,6 +7449,9 @@ static int tg_set_rt_bandwidth(struct ta
 	raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
 	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
 	tg->rt_bandwidth.rt_runtime = rt_runtime;
+	tg->rt_bandwidth.rt_max_runtime = (g_runtime == RUNTIME_INF) ?
+		rt_period :
+		mul_u64_u64_div_u64(rt_period, g_runtime, g_period);
 
 	for_each_possible_cpu(i) {
 		struct rt_rq *rt_rq = tg->rt_rq[i];
@@ -7577,6 +7629,7 @@ static int sched_rt_global_validate(void
 static void sched_rt_do_global(void)
 {
 	def_rt_bandwidth.rt_runtime = global_rt_runtime();
+	def_rt_bandwidth.rt_max_runtime = global_rt_runtime();
 	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
 }
 
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -614,12 +614,12 @@ static int do_balance_runtime(struct rt_
 	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
 	struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
 	int i, weight, more = 0;
-	u64 rt_period;
+	u64 rt_max_runtime;
 
 	weight = cpumask_weight(rd->span);
 
 	raw_spin_lock(&rt_b->rt_runtime_lock);
-	rt_period = ktime_to_ns(rt_b->rt_period);
+	rt_max_runtime = rt_b->rt_max_runtime;
 	for_each_cpu(i, rd->span) {
 		struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
 		s64 diff;
@@ -643,12 +643,12 @@ static int do_balance_runtime(struct rt_
 		diff = iter->rt_runtime - iter->rt_time;
 		if (diff > 0) {
 			diff = div_u64((u64)diff, weight);
-			if (rt_rq->rt_runtime + diff > rt_period)
-				diff = rt_period - rt_rq->rt_runtime;
+			if (rt_rq->rt_runtime + diff > rt_max_runtime)
+				diff = rt_max_runtime - rt_rq->rt_runtime;
 			iter->rt_runtime -= diff;
 			rt_rq->rt_runtime += diff;
 			more = 1;
-			if (rt_rq->rt_runtime == rt_period) {
+			if (rt_rq->rt_runtime == rt_max_runtime) {
 				raw_spin_unlock(&iter->rt_runtime_lock);
 				break;
 			}
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -124,6 +124,7 @@ struct rt_bandwidth {
 	raw_spinlock_t		rt_runtime_lock;
 	ktime_t			rt_period;
 	u64			rt_runtime;
+	u64			rt_max_runtime;
 	struct hrtimer		rt_period_timer;
 };
 /*

Content of type "application/pgp-signature" skipped