linux-kernel - [PATCH 2/2] sched: Actual changes after adding SCHED_SOFT_AFFINITY to make it work with the scheduler

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1505860632-11275-3-git-send-email-rohit.k.jain@oracle.com>
Date:   Tue, 19 Sep 2017 15:37:12 -0700
From:   Rohit Jain <rohit.k.jain@...cle.com>
To:     linux-kernel@...r.kernel.org, eas-dev@...ts.linaro.org
Cc:     peterz@...radead.org, mingo@...hat.com, joelaf@...gle.com
Subject: [PATCH 2/2] sched: Actual changes after adding SCHED_SOFT_AFFINITY to make it work with the scheduler

These are the changes which change the scheduler behavior based on the
cpus_preferred mask. Keep in mind that when the system call changes
cpus_allowed mask, cpus_preferred and cpus_allowed become the same.

Signed-off-by: Rohit Jain <rohit.k.jain@...cle.com>
---
 kernel/sched/cpudeadline.c |   4 +-
 kernel/sched/cpupri.c      |   4 +-
 kernel/sched/fair.c        | 116 +++++++++++++++++++++++++++++++++------------
 3 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 8d9562d..32135b9 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -127,13 +127,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	const struct sched_dl_entity *dl_se = &p->dl;
 
 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_allowed)) {
+	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_preferred)) {
 		return 1;
 	} else {
 		int best_cpu = cpudl_maximum(cp);
 		WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
 
-		if (cpumask_test_cpu(best_cpu, &p->cpus_allowed) &&
+		if (cpumask_test_cpu(best_cpu, &p->cpus_preferred) &&
 		    dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
 			if (later_mask)
 				cpumask_set_cpu(best_cpu, later_mask);
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 2511aba..9641b8d 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -103,11 +103,11 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 		if (skip)
 			continue;
 
-		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
+		if (cpumask_any_and(&p->cpus_preferred, vec->mask) >= nr_cpu_ids)
 			continue;
 
 		if (lowest_mask) {
-			cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
+			cpumask_and(lowest_mask, &p->cpus_preferred, vec->mask);
 
 			/*
 			 * We have to ensure that we have at least one bit
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eca6a57..35e73c7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5805,7 +5805,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 
 		/* Skip over this group if it has no CPUs allowed */
 		if (!cpumask_intersects(sched_group_span(group),
-					&p->cpus_allowed))
+					&p->cpus_preferred))
 			continue;
 
 		local_group = cpumask_test_cpu(this_cpu,
@@ -5925,7 +5925,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
 		return cpumask_first(sched_group_span(group));
 
 	/* Traverse only the allowed CPUs */
-	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
+	for_each_cpu_and(i, sched_group_span(group), &p->cpus_preferred) {
 		if (idle_cpu(i)) {
 			struct rq *rq = cpu_rq(i);
 			struct cpuidle_state *idle = idle_get_state(rq);
@@ -6011,6 +6011,27 @@ void __update_idle_core(struct rq *rq)
 	rcu_read_unlock();
 }
 
+static inline int
+scan_cpu_mask_for_idle_cores(struct cpumask *cpus, int target)
+{
+	int core, cpu;
+
+	for_each_cpu_wrap(core, cpus, target) {
+		bool idle = true;
+
+		for_each_cpu(cpu, cpu_smt_mask(core)) {
+			cpumask_clear_cpu(cpu, cpus);
+			if (!idle_cpu(cpu))
+				idle = false;
+		}
+
+		if (idle)
+			return core;
+	}
+
+	return -1;
+}
+
 /*
  * Scan the entire LLC domain for idle cores; this dynamically switches off if
  * there are no idle cores left in the system; tracked through
@@ -6019,7 +6040,8 @@ void __update_idle_core(struct rq *rq)
 static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
-	int core, cpu;
+	struct cpumask *pcpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+	int core;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
@@ -6028,20 +6050,21 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 		return -1;
 
 	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
+	cpumask_and(pcpus, cpus, &p->cpus_preferred);
+	core = scan_cpu_mask_for_idle_cores(pcpus, target);
 
-	for_each_cpu_wrap(core, cpus, target) {
-		bool idle = true;
+	if (core >= 0)
+		return core;
 
-		for_each_cpu(cpu, cpu_smt_mask(core)) {
-			cpumask_clear_cpu(cpu, cpus);
-			if (!idle_cpu(cpu))
-				idle = false;
-		}
+	if (cpumask_equal(cpus, pcpus))
+		goto out;
 
-		if (idle)
-			return core;
-	}
+	cpumask_andnot(cpus, cpus, pcpus);
+	core = scan_cpu_mask_for_idle_cores(cpus, target);
 
+	if (core >= 0)
+		return core;
+out:
 	/*
 	 * Failed to find an idle core; stop looking for one.
 	 */
@@ -6050,24 +6073,40 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
 	return -1;
 }
 
+static inline int
+scan_cpu_mask_for_idle_smt(struct cpumask *cpus, int target)
+{
+	int cpu;
+
+	for_each_cpu(cpu, cpu_smt_mask(target)) {
+		if (!cpumask_test_cpu(cpu, cpus))
+			continue;
+		if (idle_cpu(cpu))
+			return cpu;
+	}
+
+	return -1;
+}
+
 /*
  * Scan the local SMT mask for idle CPUs.
  */
 static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
 {
+	struct cpumask *cpus = &p->cpus_allowed;
 	int cpu;
 
 	if (!static_branch_likely(&sched_smt_present))
 		return -1;
 
-	for_each_cpu(cpu, cpu_smt_mask(target)) {
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-			continue;
-		if (idle_cpu(cpu))
-			return cpu;
-	}
+	cpu = scan_cpu_mask_for_idle_smt(&p->cpus_preferred, target);
 
-	return -1;
+	if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, cpus))
+		return cpu;
+
+	cpumask_andnot(cpus, cpus, &p->cpus_preferred);
+
+	return scan_cpu_mask_for_idle_smt(cpus, target);
 }
 
 #else /* CONFIG_SCHED_SMT */
@@ -6084,6 +6123,24 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 
 #endif /* CONFIG_SCHED_SMT */
 
+static inline int
+scan_cpu_mask_for_idle_cpu(struct cpumask *cpus, int target,
+			   struct sched_domain *sd, int nr)
+{
+	int cpu;
+
+	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+		if (!--nr)
+			return -1;
+		if (!cpumask_test_cpu(cpu, cpus))
+			continue;
+		if (idle_cpu(cpu))
+			break;
+	}
+
+	return cpu;
+}
+
 /*
  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
@@ -6092,6 +6149,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd
 static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
 {
 	struct sched_domain *this_sd;
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
 	u64 avg_cost, avg_idle;
 	u64 time, cost;
 	s64 delta;
@@ -6121,15 +6179,15 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
 
 	time = local_clock();
 
-	for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
-		if (!--nr)
-			return -1;
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
-			continue;
-		if (idle_cpu(cpu))
-			break;
-	}
+	cpu = scan_cpu_mask_for_idle_cpu(&p->cpus_preferred, target, sd, nr);
+
+	if (cpu >= 0 || cpumask_equal(&p->cpus_preferred, &p->cpus_allowed))
+		goto out;
 
+	cpumask_andnot(cpus, &p->cpus_allowed, &p->cpus_preferred);
+
+	cpu = scan_cpu_mask_for_idle_cpu(cpus, target, sd, nr);
+out:
 	time = local_clock() - time;
 	cost = this_sd->avg_scan_cost;
 	delta = (s64)(time - cost) / 8;
@@ -6279,7 +6337,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
 		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
-			      && cpumask_test_cpu(cpu, &p->cpus_allowed);
+			      && cpumask_test_cpu(cpu, &p->cpus_preferred);
 	}
 
 	rcu_read_lock();
-- 
2.7.4