lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1506384126-2862-3-git-send-email-rohit.k.jain@oracle.com>
Date:   Mon, 25 Sep 2017 17:02:05 -0700
From:   Rohit Jain <rohit.k.jain@...cle.com>
To:     linux-kernel@...r.kernel.org, eas-dev@...ts.linaro.org
Cc:     peterz@...radead.org, mingo@...hat.com, joelaf@...gle.com,
        atish.patra@...cle.com, vincent.guittot@...aro.org,
        dietmar.eggemann@....com, morten.rasmussen@....com
Subject: [PATCH 2/3] sched/fair: Introduce scaled capacity awareness in select_idle_sibling code path

While looking for CPUs to place running tasks on, the scheduler
completely ignores the capacity stolen away by RT/IRQ tasks.

This patch fixes that.

Signed-off-by: Rohit Jain <rohit.k.jain@...cle.com>
---
 kernel/sched/fair.c | 54 ++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index afb701f..19ff2c3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6040,7 +6040,10 @@ void __update_idle_core(struct rq *rq)
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu;
+	int core, cpu, rcpu, rcpu_backup;
+	unsigned int backup_cap = 0;
+
+	rcpu = rcpu_backup = -1;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -6057,10 +6060,20 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 			cpumask_clear_cpu(cpu, cpus);
 			if (!idle_cpu(cpu))
 				idle = false;
+
+			if (full_capacity(cpu)) {
+				rcpu = cpu;
+			} else if ((rcpu == -1) && (capacity_of(cpu) > backup_cap)) {
+				backup_cap = capacity_of(cpu);
+				rcpu_backup = cpu;
+			}
 		}
 
-		if (idle)
-			return core;
+		if (idle) {
+			if (rcpu == -1)
+				return (rcpu_backup != -1 ? rcpu_backup : core);
+			return rcpu;
+		}
 	}
 
 	/*
@@ -6076,7 +6089,8 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
  */
 static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
 {
-	int cpu;
+	int cpu, backup_cpu = -1;
+	unsigned int backup_cap = 0;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -6084,11 +6098,17 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
-		if (idle_cpu(cpu))
-			return cpu;
+		if (idle_cpu(cpu)) {
+			if (full_capacity(cpu))
+				return cpu;
+			if (capacity_of(cpu) > backup_cap) {
+				backup_cap = capacity_of(cpu);
+				backup_cpu = cpu;
+			}
+		}
 	}
 
-	return -1;
+	return backup_cpu;
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -6117,6 +6137,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	u64 time, cost;
 	s64 delta;
 	int cpu, nr = INT_MAX;
+	int backup_cpu = -1;
+	unsigned int backup_cap = 0;
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
@@ -6147,10 +6169,19 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 			return -1;
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
-		if (idle_cpu(cpu))
-			break;
+		if (idle_cpu(cpu)) {
+			if (full_capacity(cpu)) {
+				backup_cpu = -1;
+				break;
+			} else if (capacity_of(cpu) > backup_cap) {
+				backup_cap = capacity_of(cpu);
+				backup_cpu = cpu;
+			}
+		}
 	}
 
+	if (backup_cpu >= 0)
+		cpu = backup_cpu;
 	time = local_clock() - time;
 	cost = this_sd->avg_scan_cost;
 	delta = (s64)(time - cost) / 8;
@@ -6167,13 +6198,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	struct sched_domain *sd;
 	int i;
 
-	if (idle_cpu(target))
+	if (idle_cpu(target) && full_capacity(target))
 		return target;
 
 	/*
 	 * If the previous cpu is cache affine and idle, don't be stupid.
 	 */
-	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)
+	    && full_capacity(prev))
 		return prev;
 
 	sd = rcu_dereference(per_cpu(sd_llc, target));
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ