lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190830174944.21741-7-subhra.mazumdar@oracle.com>
Date:   Fri, 30 Aug 2019 10:49:41 -0700
From:   subhra mazumdar <subhra.mazumdar@...cle.com>
To:     linux-kernel@...r.kernel.org
Cc:     peterz@...radead.org, mingo@...hat.com, tglx@...utronix.de,
        steven.sistare@...cle.com, dhaval.giani@...cle.com,
        daniel.lezcano@...aro.org, vincent.guittot@...aro.org,
        viresh.kumar@...aro.org, tim.c.chen@...ux.intel.com,
        mgorman@...hsingularity.net, parth@...ux.ibm.com,
        patrick.bellasi@....com
Subject: [RFC PATCH 6/9] x86/smpboot: Optimize cpumask_weight_sibling macro for x86

Use per-CPU variable for cpumask_weight_sibling macro in case of x86 for
fast lookup in select_idle_cpu. This avoids reading multiple cache lines
in case of systems with large numbers of CPUs where bitmask can span
multiple cache lines. Even if bitmask spans only one cache line this avoids
looping through it to find the number of bits and gets it in O(1).

Signed-off-by: subhra mazumdar <subhra.mazumdar@...cle.com>
---
 arch/x86/include/asm/smp.h      |  1 +
 arch/x86/include/asm/topology.h |  1 +
 arch/x86/kernel/smpboot.c       | 17 ++++++++++++++++-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index da545df..1e90cbd 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -22,6 +22,7 @@ extern int smp_num_siblings;
 extern unsigned int num_processors;
 
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU_READ_MOSTLY(unsigned int, cpumask_weight_sibling);
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
 /* cpus sharing the last level cache: */
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 453cf38..dd19c71 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -111,6 +111,7 @@ extern const struct cpumask *cpu_coregroup_mask(int cpu);
 #ifdef CONFIG_SMP
 #define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
 #define topology_sibling_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
+#define topology_sibling_weight(cpu)	(per_cpu(cpumask_weight_sibling, cpu))
 
 extern unsigned int __max_logical_packages;
 #define topology_max_packages()			(__max_logical_packages)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 362dd89..57ad88d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -85,6 +85,9 @@
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
 EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 
+/* representing number of HT siblings of each CPU */
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, cpumask_weight_sibling);
+
 /* representing HT and core siblings of each logical CPU */
 DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
@@ -520,6 +523,8 @@ void set_cpu_sibling_map(int cpu)
 
 	if (!has_mp) {
 		cpumask_set_cpu(cpu, topology_sibling_cpumask(cpu));
+		per_cpu(cpumask_weight_sibling, cpu) =
+		    cpumask_weight(topology_sibling_cpumask(cpu));
 		cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
 		cpumask_set_cpu(cpu, topology_core_cpumask(cpu));
 		c->booted_cores = 1;
@@ -529,8 +534,13 @@ void set_cpu_sibling_map(int cpu)
 	for_each_cpu(i, cpu_sibling_setup_mask) {
 		o = &cpu_data(i);
 
-		if ((i == cpu) || (has_smt && match_smt(c, o)))
+		if ((i == cpu) || (has_smt && match_smt(c, o))) {
 			link_mask(topology_sibling_cpumask, cpu, i);
+			per_cpu(cpumask_weight_sibling, cpu) =
+			    cpumask_weight(topology_sibling_cpumask(cpu));
+			per_cpu(cpumask_weight_sibling, i) =
+			    cpumask_weight(topology_sibling_cpumask(i));
+		}
 
 		if ((i == cpu) || (has_mp && match_llc(c, o)))
 			link_mask(cpu_llc_shared_mask, cpu, i);
@@ -1173,6 +1183,8 @@ static __init void disable_smp(void)
 	else
 		physid_set_mask_of_physid(0, &phys_cpu_present_map);
 	cpumask_set_cpu(0, topology_sibling_cpumask(0));
+	per_cpu(cpumask_weight_sibling, 0) =
+	    cpumask_weight(topology_sibling_cpumask(0));
 	cpumask_set_cpu(0, topology_core_cpumask(0));
 }
 
@@ -1482,6 +1494,8 @@ static void remove_siblinginfo(int cpu)
 
 	for_each_cpu(sibling, topology_core_cpumask(cpu)) {
 		cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
+		per_cpu(cpumask_weight_sibling, sibling) =
+		    cpumask_weight(topology_sibling_cpumask(sibling));
 		/*/
 		 * last thread sibling in this cpu core going down
 		 */
@@ -1495,6 +1509,7 @@ static void remove_siblinginfo(int cpu)
 		cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
 	cpumask_clear(cpu_llc_shared_mask(cpu));
 	cpumask_clear(topology_sibling_cpumask(cpu));
+	per_cpu(cpumask_weight_sibling, cpu) = 0;
 	cpumask_clear(topology_core_cpumask(cpu));
 	c->cpu_core_id = 0;
 	c->booted_cores = 0;
-- 
2.9.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ