lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Mon, 14 Aug 2017 07:54:57 +0000
From:   "Ofer Levi(SW)" <oferle@...lanox.com>
To:     Peter Zijlstra <peterz@...radead.org>
CC:     "rusty@...tcorp.com.au" <rusty@...tcorp.com.au>,
        "mingo@...hat.com" <mingo@...hat.com>,
        "Vineet.Gupta1@...opsys.com" <Vineet.Gupta1@...opsys.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Tejun Heo <tj@...nel.org>
Subject: RE: hotplug support for arch/arc/plat-eznps platform

Sorry for the late response but this patch is a drawback,. Its back to about 0.4 sec per cpu bring up.
This is when possible, present and isolcpus are 16-4095
Most time is spent at:
register_sched_domain_sysctl() calling sd_sysctl_header = register_sysctl_table(sd_ctl_root);

[   22.150000] ## CPU16 LIVE ##: Executing Code...
[   22.170000] partition_sched_domains start
[   22.220000] register_sched_domain_sysctl start
[   22.580000] register_sched_domain_sysctl end
[   22.580000] partition_sched_domains end


> BTW, what physical size does your toy have? I'm thinking its less than
> multiple racks worth like the SGI systems were.
It's a single chip with 4K cpus, capable of 400Gbps duplex. Evaluation board is pizza box size. 

Thanks


> -----Original Message-----
> From: Peter Zijlstra [mailto:peterz@...radead.org]
> Sent: Thursday, August 10, 2017 6:45 PM
> To: Ofer Levi(SW) <oferle@...lanox.com>
> Cc: rusty@...tcorp.com.au; mingo@...hat.com;
> Vineet.Gupta1@...opsys.com; linux-kernel@...r.kernel.org; Tejun Heo
> <tj@...nel.org>
> Subject: Re: hotplug support for arch/arc/plat-eznps platform
> 
> On Thu, Aug 10, 2017 at 11:19:05AM +0200, Peter Zijlstra wrote:
> > On Thu, Aug 10, 2017 at 07:40:16AM +0000, Ofer Levi(SW) wrote:
> > > Well, this definitely have pleased the little toy :) Thank you. I
> > > really appreciate your time and effort.
> > >
> > > If I may, one more newbie question. What do I need to do for the two
> > > patches to find their way into formal kernel code?
> >
> > I'll split the first patch into two separate patches and line them up.
> >
> > I'm not sure about this last patch, I'll speak with Ingo once he's
> > back to see what would be the thing to do here.
> >
> > I suspect we can make it work, that sysctl stuff is only debug crud
> > after all and that should never get in the way of getting work done.
> 
> Can you test this instead of the second patch? It should have the same
> effect.
> 
> 
> ---
> Subject: sched/debug: Optimize sched_domain sysctl generation
> From: Peter Zijlstra <peterz@...radead.org>
> Date: Thu Aug 10 17:10:26 CEST 2017
> 
> Currently we unconditionally destroy all sysctl bits and regenerate them after
> we've rebuild the domains (even if that rebuild is a no-op).
> 
> And since we unconditionally (re)build the sysctl for all possible CPUs,
> onlining all CPUs gets us O(n^2) time. Instead change this to only rebuild the
> bits for CPUs we've actually installed new domains on.
> 
> Reported-by: "Ofer Levi(SW)" <oferle@...lanox.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
>  kernel/sched/debug.c    |   68
> ++++++++++++++++++++++++++++++++++++++----------
>  kernel/sched/sched.h    |    4 ++
>  kernel/sched/topology.c |    1
>  3 files changed, 59 insertions(+), 14 deletions(-)
> 
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -327,38 +327,78 @@ static struct ctl_table *sd_alloc_ctl_cp
>  	return table;
>  }
> 
> +static cpumask_var_t sd_sysctl_cpus;
>  static struct ctl_table_header *sd_sysctl_header;
> +
>  void register_sched_domain_sysctl(void)
>  {
> -	int i, cpu_num = num_possible_cpus();
> -	struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
> +	static struct ctl_table *cpu_entries;
> +	static struct ctl_table **cpu_idx;
>  	char buf[32];
> +	int i;
> +
> +	if (!cpu_entries) {
> +		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
> +		if (!cpu_entries)
> +			return;
> +
> +		WARN_ON(sd_ctl_dir[0].child);
> +		sd_ctl_dir[0].child = cpu_entries;
> +	}
> +
> +	if (!cpu_idx) {
> +		struct ctl_table *e = cpu_entries;
> +
> +		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*),
> GFP_KERNEL);
> +		if (!cpu_idx)
> +			return;
> +
> +		/* deal with sparse possible map */
> +		for_each_possible_cpu(i) {
> +			cpu_idx[i] = e;
> +			e++;
> +		}
> +	}
> 
> -	WARN_ON(sd_ctl_dir[0].child);
> -	sd_ctl_dir[0].child = entry;
> +	if (!cpumask_available(sd_sysctl_cpus)) {
> +		if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
> +			return;
> 
> -	if (entry == NULL)
> -		return;
> +		/* init to possible to not have holes in @cpu_entries */
> +		cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
> +	}
> +
> +	for_each_cpu(i, sd_sysctl_cpus) {
> +		struct ctl_table *e = cpu_idx[i];
> +
> +		if (e->child)
> +			sd_free_ctl_entry(&e->child);
> +
> +		if (!e->procname) {
> +			snprintf(buf, 32, "cpu%d", i);
> +			e->procname = kstrdup(buf, GFP_KERNEL);
> +		}
> +		e->mode = 0555;
> +		e->child = sd_alloc_ctl_cpu_table(i);
> 
> -	for_each_possible_cpu(i) {
> -		snprintf(buf, 32, "cpu%d", i);
> -		entry->procname = kstrdup(buf, GFP_KERNEL);
> -		entry->mode = 0555;
> -		entry->child = sd_alloc_ctl_cpu_table(i);
> -		entry++;
> +		__cpumask_clear_cpu(i, sd_sysctl_cpus);
>  	}
> 
>  	WARN_ON(sd_sysctl_header);
>  	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
>  }
> 
> +void dirty_sched_domain_sysctl(int cpu) {
> +	if (cpumask_available(sd_sysctl_cpus))
> +		__cpumask_set_cpu(cpu, sd_sysctl_cpus); }
> +
>  /* may be called multiple times per register */  void
> unregister_sched_domain_sysctl(void)
>  {
>  	unregister_sysctl_table(sd_sysctl_header);
>  	sd_sysctl_header = NULL;
> -	if (sd_ctl_dir[0].child)
> -		sd_free_ctl_entry(&sd_ctl_dir[0].child);
>  }
>  #endif /* CONFIG_SYSCTL */
>  #endif /* CONFIG_SMP */
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1120,11 +1120,15 @@ extern int group_balance_cpu(struct sche
> 
>  #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)  void
> register_sched_domain_sysctl(void);
> +void dirty_sched_domain_sysctl(int cpu);
>  void unregister_sched_domain_sysctl(void);
>  #else
>  static inline void register_sched_domain_sysctl(void)
>  {
>  }
> +static inline void dirty_sched_domain_sysctl(int cpu) { }
>  static inline void unregister_sched_domain_sysctl(void)
>  {
>  }
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -461,6 +461,7 @@ cpu_attach_domain(struct sched_domain *s
>  	rq_attach_root(rq, rd);
>  	tmp = rq->sd;
>  	rcu_assign_pointer(rq->sd, sd);
> +	dirty_sched_domain_sysctl(cpu);
>  	destroy_sched_domains(tmp);
> 
>  	update_top_cache_domain(cpu);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ