linux-kernel - Re: hotplug support for arch/arc/plat-eznps platform

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170810154518.gl2w3llfabnszusr@hirez.programming.kicks-ass.net>
Date:   Thu, 10 Aug 2017 17:45:18 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     "Ofer Levi(SW)" <oferle@...lanox.com>
Cc:     "rusty@...tcorp.com.au" <rusty@...tcorp.com.au>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "Vineet.Gupta1@...opsys.com" <Vineet.Gupta1@...opsys.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Tejun Heo <tj@...nel.org>
Subject: Re: hotplug support for arch/arc/plat-eznps platform

On Thu, Aug 10, 2017 at 11:19:05AM +0200, Peter Zijlstra wrote:
> On Thu, Aug 10, 2017 at 07:40:16AM +0000, Ofer Levi(SW) wrote:
> > Well, this definitely have pleased the little toy :)
> > Thank you. I really appreciate your time and effort.
> > 
> > If I may, one more newbie question. What do I need to do for the two patches to find 
> > their way into formal kernel code?
> 
> I'll split the first patch into two separate patches and line them up.
> 
> I'm not sure about this last patch, I'll speak with Ingo once he's back
> to see what would be the thing to do here.
> 
> I suspect we can make it work, that sysctl stuff is only debug crud
> after all and that should never get in the way of getting work done.

Can you test this instead of the second patch? It should have the same
effect.

BTW, what physical size does your toy have? I'm thinking its less than
multiple racks worth like the SGI systems were.

---
Subject: sched/debug: Optimize sched_domain sysctl generation
From: Peter Zijlstra <peterz@...radead.org>
Date: Thu Aug 10 17:10:26 CEST 2017

Currently we unconditionally destroy all sysctl bits and regenerate
them after we've rebuild the domains (even if that rebuild is a
no-op).

And since we unconditionally (re)build the sysctl for all possible
CPUs, onlining all CPUs gets us O(n^2) time. Instead change this to
only rebuild the bits for CPUs we've actually installed new domains
on.

Reported-by: "Ofer Levi(SW)" <oferle@...lanox.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
 kernel/sched/debug.c    |   68 ++++++++++++++++++++++++++++++++++++++----------
 kernel/sched/sched.h    |    4 ++
 kernel/sched/topology.c |    1 
 3 files changed, 59 insertions(+), 14 deletions(-)

--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cp
 	return table;
 }
 
+static cpumask_var_t sd_sysctl_cpus;
 static struct ctl_table_header *sd_sysctl_header;
+
 void register_sched_domain_sysctl(void)
 {
-	int i, cpu_num = num_possible_cpus();
-	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
+	static struct ctl_table *cpu_entries;
+	static struct ctl_table **cpu_idx;
 	char buf[32];
+	int i;
+
+	if (!cpu_entries) {
+		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
+		if (!cpu_entries)
+			return;
+
+		WARN_ON(sd_ctl_dir[0].child);
+		sd_ctl_dir[0].child = cpu_entries;
+	}
+
+	if (!cpu_idx) {
+		struct ctl_table *e = cpu_entries;
+
+		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
+		if (!cpu_idx)
+			return;
+
+		/* deal with sparse possible map */
+		for_each_possible_cpu(i) {
+			cpu_idx[i] = e;
+			e++;
+		}
+	}
 
-	WARN_ON(sd_ctl_dir[0].child);
-	sd_ctl_dir[0].child = entry;
+	if (!cpumask_available(sd_sysctl_cpus)) {
+		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
+			return;
 
-	if (entry == NULL)
-		return;
+		/* init to possible to not have holes in @cpu_entries */
+		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
+	}
+
+	for_each_cpu(i, sd_sysctl_cpus) {
+		struct ctl_table *e = cpu_idx[i];
+
+		if (e->child)
+			sd_free_ctl_entry(&e->child);
+
+		if (!e->procname) {
+			snprintf(buf, 32, "cpu%d", i);
+			e->procname = kstrdup(buf, GFP_KERNEL);
+		}
+		e->mode = 0555;
+		e->child = sd_alloc_ctl_cpu_table(i);
 
-	for_each_possible_cpu(i) {
-		snprintf(buf, 32, "cpu%d", i);
-		entry->procname = kstrdup(buf, GFP_KERNEL);
-		entry->mode = 0555;
-		entry->child = sd_alloc_ctl_cpu_table(i);
-		entry++;
+		__cpumask_clear_cpu(i, sd_sysctl_cpus);
 	}
 
 	WARN_ON(sd_sysctl_header);
 	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
 }
 
+void dirty_sched_domain_sysctl(int cpu)
+{
+	if (cpumask_available(sd_sysctl_cpus))
+		__cpumask_set_cpu(cpu, sd_sysctl_cpus);
+}
+
 /* may be called multiple times per register */
 void unregister_sched_domain_sysctl(void)
 {
 	unregister_sysctl_table(sd_sysctl_header);
 	sd_sysctl_header = NULL;
-	if (sd_ctl_dir[0].child)
-		sd_free_ctl_entry(&sd_ctl_dir[0].child);
 }
 #endif /* CONFIG_SYSCTL */
 #endif /* CONFIG_SMP */
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sche
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
 void register_sched_domain_sysctl(void);
+void dirty_sched_domain_sysctl(int cpu);
 void unregister_sched_domain_sysctl(void);
 #else
 static inline void register_sched_domain_sysctl(void)
 {
 }
+static inline void dirty_sched_domain_sysctl(int cpu)
+{
+}
 static inline void unregister_sched_domain_sysctl(void)
 {
 }
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -461,6 +461,7 @@ cpu_attach_domain(struct sched_domain *s
 	rq_attach_root(rq, rd);
 	tmp = rq->sd;
 	rcu_assign_pointer(rq->sd, sd);
+	dirty_sched_domain_sysctl(cpu);
 	destroy_sched_domains(tmp);
 
 	update_top_cache_domain(cpu);