lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200311181601.18314-10-valentin.schneider@arm.com>
Date:   Wed, 11 Mar 2020 18:16:01 +0000
From:   Valentin Schneider <valentin.schneider@....com>
To:     linux-kernel@...r.kernel.org
Cc:     mingo@...nel.org, peterz@...radead.org, vincent.guittot@...aro.org,
        dietmar.eggemann@....com
Subject: [PATCH v2 9/9] sched/topology: Define and use shortcut pointers for wakeup sd_flag scan

Reworking select_task_rq_fair()'s domain walk exposed that !want_affine
wakeups only look for highest sched_domain with the required sd_flag
set. This is something we can cache at sched domain build time to slightly
optimize select_task_rq_fair(). Note that this isn't a "free" optimization:
it costs us 3 pointers per CPU.

Add cached per-CPU pointers for the highest domains with SD_BALANCE_WAKE,
SD_BALANCE_EXEC and SD_BALANCE_FORK. Use them in select_task_rq_fair().

Signed-off-by: Valentin Schneider <valentin.schneider@....com>
---
 kernel/sched/fair.c     | 25 +++++++++++++------------
 kernel/sched/sched.h    |  3 +++
 kernel/sched/topology.c | 12 ++++++++++++
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a6fca6817e92..40fb97062157 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6595,17 +6595,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	int want_affine = 0;
 	int sd_flag;
 
-	switch (wake_flags & (WF_TTWU | WF_FORK | WF_EXEC)) {
-	case WF_TTWU:
-		sd_flag = SD_BALANCE_WAKE;
-		break;
-	case WF_FORK:
-		sd_flag = SD_BALANCE_FORK;
-		break;
-	default:
-		sd_flag = SD_BALANCE_EXEC;
-	}
-
 	if (wake_flags & WF_TTWU) {
 		record_wakee(p);
 
@@ -6621,7 +6610,19 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 
 	rcu_read_lock();
 
-	sd = highest_flag_domain(cpu, sd_flag);
+	switch (wake_flags & (WF_TTWU | WF_FORK | WF_EXEC)) {
+	case WF_TTWU:
+		sd_flag = SD_BALANCE_WAKE;
+		sd = rcu_dereference(per_cpu(sd_balance_wake, cpu));
+		break;
+	case WF_FORK:
+		sd_flag = SD_BALANCE_FORK;
+		sd = rcu_dereference(per_cpu(sd_balance_fork, cpu));
+		break;
+	default:
+		sd_flag = SD_BALANCE_EXEC;
+		sd = rcu_dereference(per_cpu(sd_balance_exec, cpu));
+	}
 
 	/*
 	 * If !want_affine, we just look for the highest domain where
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ad2106245e12..3a0e38f2f713 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1393,6 +1393,9 @@ DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
 DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
 DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_balance_wake);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_balance_fork);
+DECLARE_PER_CPU(struct sched_domain __rcu *, sd_balance_exec);
 DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
 DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
 extern struct static_key_false sched_asym_cpucapacity;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 6077b23f9723..0270252a964b 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -610,6 +610,9 @@ DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
 DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_balance_wake);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_balance_fork);
+DEFINE_PER_CPU(struct sched_domain __rcu *, sd_balance_exec);
 DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
 DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
 DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
@@ -636,6 +639,15 @@ static void update_top_cache_domain(int cpu)
 	sd = lowest_flag_domain(cpu, SD_NUMA);
 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
 
+	sd = highest_flag_domain(cpu, SD_BALANCE_WAKE);
+	rcu_assign_pointer(per_cpu(sd_balance_wake, cpu), sd);
+
+	sd = highest_flag_domain(cpu, SD_BALANCE_FORK);
+	rcu_assign_pointer(per_cpu(sd_balance_fork, cpu), sd);
+
+	sd = highest_flag_domain(cpu, SD_BALANCE_EXEC);
+	rcu_assign_pointer(per_cpu(sd_balance_exec, cpu), sd);
+
 	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
 	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
 
-- 
2.24.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ