lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <71a578a0c5b39169fe74ad378ee41eaf546844ac.1543229820.git.viresh.kumar@linaro.org>
Date:   Mon, 26 Nov 2018 16:50:24 +0530
From:   Viresh Kumar <viresh.kumar@...aro.org>
To:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>
Cc:     Viresh Kumar <viresh.kumar@...aro.org>,
        linux-kernel@...r.kernel.org,
        Vincent Guittot <vincent.guittot@...aro.org>, tkjos@...gle.com,
        Daniel Lezcano <daniel.lezcano@...aro.org>,
        quentin.perret@...aro.org, chris.redpath@....com,
        Dietmar.Eggemann@....com
Subject: [RFC][PATCH 2/2] sched: Enqueue tasks on a cpu with only SCHED_IDLE tasks

The scheduler tries to schedule a newly wakeup task on an idle CPU to
make sure the new task gets chance to run as soon as possible, for
performance reasons.

The SCHED_IDLE scheduling policy is used for tasks which have the lowest
priority and there is no hurry in running them. If all the tasks
currently enqueued on a CPU have their policy set to SCHED_IDLE, then
any new task (non SCHED_IDLE) enqueued on that CPU should normally get a
chance to run immediately. This patch takes advantage of this to save
power in some cases by avoiding waking up an idle CPU (which may be in
some deep idle state) and enqueuing the new task on a CPU which only has
SCHED_IDLE tasks.

Signed-off-by: Viresh Kumar <viresh.kumar@...aro.org>
---
 kernel/sched/core.c  | 23 ++++++++++++++++++++
 kernel/sched/fair.c  | 50 +++++++++++++++++++++++++++++++-------------
 kernel/sched/sched.h |  3 +++
 3 files changed, 62 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3d87a28da378..176eed77b18e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4020,6 +4020,29 @@ int available_idle_cpu(int cpu)
 	return 1;
 }
 
+/* CPU only has SCHED_IDLE tasks enqueued */
+int cpu_only_has_sched_idle_tasks(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+
+	return unlikely(rq->nr_running &&
+			rq->nr_running == rq->cfs.idle_h_nr_running);
+}
+
+int available_sched_idle_cpu(int cpu)
+{
+	if (vcpu_is_preempted(cpu))
+		return 0;
+
+	if (idle_cpu(cpu))
+		return 1;
+
+	if (cpu_only_has_sched_idle_tasks(cpu))
+		return 1;
+
+	return 0;
+}
+
 /**
  * idle_task - return the idle task for a given CPU.
  * @cpu: the processor in question.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ad0b09ddddc0..3a029c740d51 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5623,9 +5623,10 @@ wake_affine_idle(int this_cpu, int prev_cpu, int sync)
 	 * on one CPU.
 	 */
 	if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
-		return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
+		return available_sched_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
 
-	if (sync && cpu_rq(this_cpu)->nr_running == 1)
+	if ((sync && cpu_rq(this_cpu)->nr_running == 1) ||
+	    cpu_only_has_sched_idle_tasks(this_cpu))
 		return this_cpu;
 
 	return nr_cpumask_bits;
@@ -5888,6 +5889,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
 				latest_idle_timestamp = rq->idle_stamp;
 				shallowest_idle_cpu = i;
 			}
+		} else if (cpu_only_has_sched_idle_tasks(i) && !vcpu_is_preempted(i)) {
+			/* Prefer CPU with only SCHED_IDLE tasks */
+			return i;
 		} else if (shallowest_idle_cpu == -1) {
 			load = weighted_cpuload(cpu_rq(i));
 			if (load < min_load) {
@@ -6049,7 +6053,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
  */
 static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
 {
-	int cpu;
+	int cpu, last_idle_cpu = -1;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -6057,11 +6061,18 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
-		if (available_idle_cpu(cpu))
-			return cpu;
+		if (!vcpu_is_preempted(cpu)) {
+			if (idle_cpu(cpu)) {
+				/* Prefer CPU with only SCHED_IDLE tasks */
+				last_idle_cpu = cpu;
+				continue;
+			}
+			if (cpu_only_has_sched_idle_tasks(cpu))
+				return cpu;
+		}
 	}
 
-	return -1;
+	return last_idle_cpu;
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -6089,7 +6100,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	u64 avg_cost, avg_idle;
 	u64 time, cost;
 	s64 delta;
-	int cpu, nr = INT_MAX;
+	int cpu, nr = INT_MAX, last_idle_cpu = -1;
 
 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
 	if (!this_sd)
@@ -6116,12 +6127,23 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 	time = local_clock();
 
 	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
-		if (!--nr)
-			return -1;
+		if (!--nr) {
+			if (last_idle_cpu == -1)
+				return -1;
+			cpu = last_idle_cpu;
+			break;
+		}
 		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
 			continue;
-		if (available_idle_cpu(cpu))
-			break;
+		if (!vcpu_is_preempted(cpu)) {
+			if (idle_cpu(cpu)) {
+				/* Prefer CPU with only SCHED_IDLE tasks */
+				last_idle_cpu = cpu;
+				continue;
+			}
+			if (cpu_only_has_sched_idle_tasks(cpu))
+				break;
+		}
 	}
 
 	time = local_clock() - time;
@@ -6140,13 +6162,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	struct sched_domain *sd;
 	int i, recent_used_cpu;
 
-	if (available_idle_cpu(target))
+	if (available_sched_idle_cpu(target))
 		return target;
 
 	/*
 	 * If the previous CPU is cache affine and idle, don't be stupid:
 	 */
-	if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev))
+	if (prev != target && cpus_share_cache(prev, target) && available_sched_idle_cpu(prev))
 		return prev;
 
 	/* Check a recently used CPU as a potential idle candidate: */
@@ -6154,7 +6176,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 	if (recent_used_cpu != prev &&
 	    recent_used_cpu != target &&
 	    cpus_share_cache(recent_used_cpu, target) &&
-	    available_idle_cpu(recent_used_cpu) &&
+	    available_sched_idle_cpu(recent_used_cpu) &&
 	    cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
 		/*
 		 * Replace recent_used_cpu with prev as it is a potential
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 86a388c506ac..ecd016c64ee2 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1828,6 +1828,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
+extern int cpu_only_has_sched_idle_tasks(int cpu);
+extern int available_sched_idle_cpu(int cpu);
+
 #ifdef CONFIG_SCHED_HRTICK
 
 /*
-- 
2.19.1.568.g152ad8e3369a

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ