linux-kernel - [RFC PATCH] sched: select_idle_core should select least utilized core

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1496949992-629076-1-git-send-email-subhra.mazumdar@oracle.com>
Date:   Thu,  8 Jun 2017 15:26:32 -0400
From:   Subhra Mazumdar <subhra.mazumdar@...cle.com>
To:     linux-kernel@...r.kernel.org
Cc:     peterz@...radead.org, mingo@...nel.org
Subject: [RFC PATCH] sched: select_idle_core should select least utilized core

Current select_idle_core tries to find a fully idle core and if it fails
select_idle_cpu next returns any idle cpu in the llc domain. This is not optimal
for architectures with many (more than 2) hyperthreads in a core. This patch
changes select_idle_core to find the core with least number of busy
hyperthreads and return an idle cpu in that core.

Signed-off-by: Subhra Mazumdar <subhra.mazumdar@...cle.com>
---
 kernel/sched/fair.c      |  113 +++++++++-------------------------------------
 kernel/sched/idle_task.c |    1 -
 kernel/sched/sched.h     |   10 ----
 3 files changed, 21 insertions(+), 103 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d711093..eb2c33c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5679,111 +5679,49 @@ static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *
 
 #ifdef CONFIG_SCHED_SMT
 
-static inline void set_idle_cores(int cpu, int val)
-{
-	struct sched_domain_shared *sds;
-
-	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-	if (sds)
-		WRITE_ONCE(sds->has_idle_cores, val);
-}
-
-static inline bool test_idle_cores(int cpu, bool def)
-{
-	struct sched_domain_shared *sds;
-
-	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-	if (sds)
-		return READ_ONCE(sds->has_idle_cores);
-
-	return def;
-}
-
 /*
- * Scans the local SMT mask to see if the entire core is idle, and records this
- * information in sd_llc_shared->has_idle_cores.
- *
- * Since SMT siblings share all cache levels, inspecting this limited remote
- * state should be fairly cheap.
- */
-void __update_idle_core(struct rq *rq)
-{
-	int core = cpu_of(rq);
-	int cpu;
-
-	rcu_read_lock();
-	if (test_idle_cores(core, true))
-		goto unlock;
-
-	for_each_cpu(cpu, cpu_smt_mask(core)) {
-		if (cpu == core)
-			continue;
-
-		if (!idle_cpu(cpu))
-			goto unlock;
-	}
-
-	set_idle_cores(core, 1);
-unlock:
-	rcu_read_unlock();
-}
-
-/*
- * Scan the entire LLC domain for idle cores; this dynamically switches off if
- * there are no idle cores left in the system; tracked through
- * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
+ * Scan the entire LLC domain for idle cores; Find the core with minimum number
+ * of busy strands and return a idle strand in that core
  */
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu, wrap;
+	int core, cpu, wrap, min_util = INT_MAX, min_cpu = -1;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
 
-	if (!test_idle_cores(target, false))
-		return -1;
-
 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
 
 	for_each_cpu_wrap(core, cpus, target, wrap) {
 		bool idle = true;
+		int util = 0;
+		int cp = -1;
 
 		for_each_cpu(cpu, cpu_smt_mask(core)) {
 			cpumask_clear_cpu(cpu, cpus);
-			if (!idle_cpu(cpu))
+			if (!idle_cpu(cpu)) {
 				idle = false;
+				util++;
+			} else if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
+				cp = cpu;
+			}
 		}
 
 		if (idle)
 			return core;
-	}
 
-	/*
-	 * Failed to find an idle core; stop looking for one.
-	 */
-	set_idle_cores(target, 0);
+		if (util < min_util && cp != -1) {
+			min_util = util;
+			min_cpu = cp;
+		}
+	}
 
-	return -1;
+	return min_cpu;
 }
 
-/*
- * Scan the local SMT mask for idle CPUs.
- */
-static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
-	int cpu;
-
-	if (!static_branch_likely(&sched_smt_present))
-		return -1;
-
-	for_each_cpu(cpu, cpu_smt_mask(target)) {
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-			continue;
-		if (idle_cpu(cpu))
-			return cpu;
-	}
-
 	return -1;
 }
 
@@ -5794,13 +5732,6 @@ static inline int select_idle_core(struct task_struct *p, struct sched_domain *s
 	return -1;
 }
 
-static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
-{
-	return -1;
-}
-
-#endif /* CONFIG_SCHED_SMT */
-
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -5830,8 +5761,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	time = local_clock();
 
 	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-			continue;
+			if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+				continue;
 		if (idle_cpu(cpu))
 			break;
 	}
@@ -5844,6 +5775,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	return cpu;
 }
 
+#endif /* CONFIG_SCHED_SMT */
+
 /*
  * Try and locate an idle core/thread in the LLC cache domain.
  */
@@ -5873,10 +5806,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if ((unsigned)i < nr_cpumask_bits)
 		return i;
 
-	i = select_idle_smt(p, sd, target);
-	if ((unsigned)i < nr_cpumask_bits)
-		return i;
-
 	return target;
 }
 
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 0c00172..a3d5a7c 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -27,7 +27,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
 pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
 	put_prev_task(rq, prev);
-	update_idle_core(rq);
 	schedstat_inc(rq->sched_goidle);
 	return rq->idle;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6dda2aa..96ef012 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -772,16 +772,6 @@ static inline int cpu_of(struct rq *rq)
 
 extern struct static_key_false sched_smt_present;
 
-extern void __update_idle_core(struct rq *rq);
-
-static inline void update_idle_core(struct rq *rq)
-{
-	if (static_branch_unlikely(&sched_smt_present))
-		__update_idle_core(rq);
-}
-
-#else
-static inline void update_idle_core(struct rq *rq) { }
 #endif
 
 DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-- 
1.7.1