linux-kernel - Re: [PATCH v2 7/9] sched,debug: Convert sysctl sched

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87czuvn2uk.mognet@arm.com>
Date:   Thu, 15 Apr 2021 13:34:27 +0100
From:   Valentin Schneider <valentin.schneider@....com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     mingo@...nel.org, mgorman@...e.de, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, bristot@...hat.com,
        joshdon@...gle.com, linux-kernel@...r.kernel.org, greg@...ah.com,
        linux@...musvillemoes.dk
Subject: Re: [PATCH v2 7/9] sched,debug: Convert sysctl sched_domains to debugfs

On 15/04/21 11:06, Peter Zijlstra wrote:
> On Tue, Apr 13, 2021 at 03:55:15PM +0100, Valentin Schneider wrote:
>> On 12/04/21 12:14, Peter Zijlstra wrote:
>> > Stop polluting sysctl, move to debugfs for SCHED_DEBUG stuff.
>> >
>> > Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
>> > Reviewed-by: Dietmar Eggemann <dietmar.eggemann@....com>
>>
>> On my Juno (2+4 big.LITTLE), sys/kernel/debug/sched/domains/ is now empty.
>>
>> I think that's because of unregister_sched_domain_sysctl() -
>> debugfs_remove() is recursive, and I do get a case where we rebuild the
>> domains but no CPU has been added or removed (we rebuild the domains when
>> cpufreq kicks in, it's part of the big.LITTLE ponies).
>>
>> Do we actually still need that unregister? From a brief glance it looks
>> like we could throw it out.
>
> Yeah, I can't think of anything either. AFAICT it hasn't done anything
> useful since that cpumask optimization. Consider it gone.
>
> I'll let it soak for another day or so, but then I was planning on
> merging this series.
>
> Updated patch has been in queue.git/sched/debug since yesterday.
>
> ---
> Subject: sched,debug: Convert sysctl sched_domains to debugfs
> From: Peter Zijlstra <peterz@...radead.org>
> Date: Thu Mar 25 11:31:20 CET 2021
>
> Stop polluting sysctl, move to debugfs for SCHED_DEBUG stuff.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@....com>
> ---
>  kernel/sched/debug.c    |  254 ++++++++++--------------------------------------
>  kernel/sched/sched.h    |    6 -
>  kernel/sched/topology.c |    6 -
>  3 files changed, 56 insertions(+), 210 deletions(-)
>
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -299,6 +299,10 @@ static __init int sched_init_debug(void)
>       debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
>       debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
>       debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
> +
> +	mutex_lock(&sched_domains_mutex);
> +	register_sched_domain_sysctl();
> +	mutex_unlock(&sched_domains_mutex);
>  #endif
>
>  #ifdef CONFIG_NUMA_BALANCING
> @@ -316,229 +320,88 @@ late_initcall(sched_init_debug);
>
>  #ifdef CONFIG_SMP
>
> -#ifdef CONFIG_SYSCTL
> -
> -static struct ctl_table sd_ctl_dir[] = {
> -	{
> -		.procname	= "sched_domain",
> -		.mode		= 0555,
> -	},
> -	{}
> -};
> -
> -static struct ctl_table sd_ctl_root[] = {
> -	{
> -		.procname	= "kernel",
> -		.mode		= 0555,
> -		.child		= sd_ctl_dir,
> -	},
> -	{}
> -};
> -
> -static struct ctl_table *sd_alloc_ctl_entry(int n)
> -{
> -	struct ctl_table *entry =
> -		kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
> -
> -	return entry;
> -}
> -
> -static void sd_free_ctl_entry(struct ctl_table **tablep)
> -{
> -	struct ctl_table *entry;
> -
> -	/*
> -	 * In the intermediate directories, both the child directory and
> -	 * procname are dynamically allocated and could fail but the mode
> -	 * will always be set. In the lowest directory the names are
> -	 * static strings and all have proc handlers.
> -	 */
> -	for (entry = *tablep; entry->mode; entry++) {
> -		if (entry->child)
> -			sd_free_ctl_entry(&entry->child);
> -		if (entry->proc_handler == NULL)
> -			kfree(entry->procname);
> -	}
> -
> -	kfree(*tablep);
> -	*tablep = NULL;
> -}
> -
> -static void
> -set_table_entry(struct ctl_table *entry,
> -		const char *procname, void *data, int maxlen,
> -		umode_t mode, proc_handler *proc_handler)
> -{
> -	entry->procname = procname;
> -	entry->data = data;
> -	entry->maxlen = maxlen;
> -	entry->mode = mode;
> -	entry->proc_handler = proc_handler;
> -}
> +static cpumask_var_t		sd_sysctl_cpus;
> +static struct dentry		*sd_dentry;
>
> -static int sd_ctl_doflags(struct ctl_table *table, int write,
> -			  void *buffer, size_t *lenp, loff_t *ppos)
> +static int sd_flags_show(struct seq_file *m, void *v)
>  {
> -	unsigned long flags = *(unsigned long *)table->data;
> -	size_t data_size = 0;
> -	size_t len = 0;
> -	char *tmp, *buf;
> +	unsigned long flags = *(unsigned int *)m->private;
>       int idx;
>
> -	if (write)
> -		return 0;
> -
> -	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
> -		char *name = sd_flag_debug[idx].name;
> -
> -		/* Name plus whitespace */
> -		data_size += strlen(name) + 1;
> -	}
> -
> -	if (*ppos > data_size) {
> -		*lenp = 0;
> -		return 0;
> -	}
> -
> -	buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
> -	if (!buf)
> -		return -ENOMEM;
> -
>       for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
> -		char *name = sd_flag_debug[idx].name;
> -
> -		len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
> +		seq_puts(m, sd_flag_debug[idx].name);
> +		seq_puts(m, " ");
>       }
> -
> -	tmp = buf + *ppos;
> -	len -= *ppos;
> -
> -	if (len > *lenp)
> -		len = *lenp;
> -	if (len)
> -		memcpy(buffer, tmp, len);
> -	if (len < *lenp) {
> -		((char *)buffer)[len] = '\n';
> -		len++;
> -	}
> -
> -	*lenp = len;
> -	*ppos += len;
> -
> -	kfree(buf);
> +	seq_puts(m, "\n");
>
>       return 0;
>  }
>
> -static struct ctl_table *
> -sd_alloc_ctl_domain_table(struct sched_domain *sd)
> -{
> -	struct ctl_table *table = sd_alloc_ctl_entry(9);
> -
> -	if (table == NULL)
> -		return NULL;
> -
> -	set_table_entry(&table[0], "min_interval",	  &sd->min_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
> -	set_table_entry(&table[1], "max_interval",	  &sd->max_interval,	    sizeof(long), 0644, proc_doulongvec_minmax);
> -	set_table_entry(&table[2], "busy_factor",	  &sd->busy_factor,	    sizeof(int),  0644, proc_dointvec_minmax);
> -	set_table_entry(&table[3], "imbalance_pct",	  &sd->imbalance_pct,	    sizeof(int),  0644, proc_dointvec_minmax);
> -	set_table_entry(&table[4], "cache_nice_tries",	  &sd->cache_nice_tries,    sizeof(int),  0644, proc_dointvec_minmax);
> -	set_table_entry(&table[5], "flags",		  &sd->flags,		    sizeof(int),  0444, sd_ctl_doflags);
> -	set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
> -	set_table_entry(&table[7], "name",		  sd->name,	       CORENAME_MAX_SIZE, 0444, proc_dostring);
> -	/* &table[8] is terminator */
> -
> -	return table;
> -}
> -
> -static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
> +static int sd_flags_open(struct inode *inode, struct file *file)
>  {
> -	struct ctl_table *entry, *table;
> -	struct sched_domain *sd;
> -	int domain_num = 0, i;
> -	char buf[32];
> -
> -	for_each_domain(cpu, sd)
> -		domain_num++;
> -	entry = table = sd_alloc_ctl_entry(domain_num + 1);
> -	if (table == NULL)
> -		return NULL;
> -
> -	i = 0;
> -	for_each_domain(cpu, sd) {
> -		snprintf(buf, 32, "domain%d", i);
> -		entry->procname = kstrdup(buf, GFP_KERNEL);
> -		entry->mode = 0555;
> -		entry->child = sd_alloc_ctl_domain_table(sd);
> -		entry++;
> -		i++;
> -	}
> -	return table;
> +	return single_open(file, sd_flags_show, inode->i_private);
>  }
>
> -static cpumask_var_t		sd_sysctl_cpus;
> -static struct ctl_table_header	*sd_sysctl_header;
> +static const struct file_operations sd_flags_fops = {
> +	.open		= sd_flags_open,
> +	.read		= seq_read,
> +	.llseek		= seq_lseek,
> +	.release	= single_release,
> +};
>
> -void register_sched_domain_sysctl(void)
> +static void register_sd(struct sched_domain *sd, struct dentry *parent)
>  {
> -	static struct ctl_table *cpu_entries;
> -	static struct ctl_table **cpu_idx;
> -	static bool init_done = false;
> -	char buf[32];
> -	int i;
> -
> -	if (!cpu_entries) {
> -		cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
> -		if (!cpu_entries)
> -			return;
> +#define SDM(type, mode, member)	\
> +	debugfs_create_##type(#member, mode, parent, &sd->member)
>
> -		WARN_ON(sd_ctl_dir[0].child);
> -		sd_ctl_dir[0].child = cpu_entries;
> -	}
> +	SDM(ulong, 0644, min_interval);
> +	SDM(ulong, 0644, max_interval);
> +	SDM(u64,   0644, max_newidle_lb_cost);
> +	SDM(u32,   0644, busy_factor);
> +	SDM(u32,   0644, imbalance_pct);
> +	SDM(u32,   0644, cache_nice_tries);
> +	SDM(str,   0444, name);
>
> -	if (!cpu_idx) {
> -		struct ctl_table *e = cpu_entries;
> +#undef SDM
>
> -		cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
> -		if (!cpu_idx)
> -			return;
> +	debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
> +}
>
> -		/* deal with sparse possible map */
> -		for_each_possible_cpu(i) {
> -			cpu_idx[i] = e;
> -			e++;
> -		}
> -	}
> +void register_sched_domain_sysctl(void)
> +{
> +	int cpu, i;
>
>       if (!cpumask_available(sd_sysctl_cpus)) {
>               if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
>                       return;
> -	}
> -
> -	if (!init_done) {
> -		init_done = true;
> -		/* init to possible to not have holes in @cpu_entries */
>               cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
>       }
>
> -	for_each_cpu(i, sd_sysctl_cpus) {
> -		struct ctl_table *e = cpu_idx[i];
> +	if (!sd_dentry)
> +		sd_dentry = debugfs_create_dir("domains", debugfs_sched);
> +
> +	for_each_cpu(cpu, sd_sysctl_cpus) {
> +		struct sched_domain *sd;
> +		struct dentry *d_cpu;
> +		char buf[32];
> +
> +		snprintf(buf, sizeof(buf), "cpu%d", cpu);
> +		debugfs_remove(debugfs_lookup(buf, sd_dentry));
> +		d_cpu = debugfs_create_dir(buf, sd_dentry);
> +
> +		i = 0;
> +		for_each_domain(cpu, sd) {
> +			struct dentry *d_sd;
>
> -		if (e->child)
> -			sd_free_ctl_entry(&e->child);
> +			snprintf(buf, sizeof(buf), "domain%d", i);
> +			d_sd = debugfs_create_dir(buf, d_cpu);
>
> -		if (!e->procname) {
> -			snprintf(buf, 32, "cpu%d", i);
> -			e->procname = kstrdup(buf, GFP_KERNEL);
> +			register_sd(sd, d_sd);
> +			i++;
>               }
> -		e->mode = 0555;
> -		e->child = sd_alloc_ctl_cpu_table(i);
>
> -		__cpumask_clear_cpu(i, sd_sysctl_cpus);
> +		__cpumask_clear_cpu(cpu, sd_sysctl_cpus);
>       }
> -
> -	WARN_ON(sd_sysctl_header);
> -	sd_sysctl_header = register_sysctl_table(sd_ctl_root);
>  }
>
>  void dirty_sched_domain_sysctl(int cpu)
> @@ -547,13 +410,6 @@ void dirty_sched_domain_sysctl(int cpu)
>               __cpumask_set_cpu(cpu, sd_sysctl_cpus);
>  }
>
> -/* may be called multiple times per register */
> -void unregister_sched_domain_sysctl(void)
> -{
> -	unregister_sysctl_table(sd_sysctl_header);
> -	sd_sysctl_header = NULL;
> -}
> -#endif /* CONFIG_SYSCTL */
>  #endif /* CONFIG_SMP */
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1553,10 +1553,9 @@ static inline unsigned int group_first_c
>
>  extern int group_balance_cpu(struct sched_group *sg);
>
> -#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
> +#ifdef CONFIG_SCHED_DEBUG
>  void register_sched_domain_sysctl(void);
>  void dirty_sched_domain_sysctl(int cpu);
> -void unregister_sched_domain_sysctl(void);
>  #else
>  static inline void register_sched_domain_sysctl(void)
>  {
> @@ -1564,9 +1563,6 @@ static inline void register_sched_domain
>  static inline void dirty_sched_domain_sysctl(int cpu)
>  {
>  }
> -static inline void unregister_sched_domain_sysctl(void)
> -{
> -}
>  #endif
>
>  extern int sched_update_scaling(void);
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -2223,7 +2223,6 @@ int sched_init_domains(const struct cpum
>               doms_cur = &fallback_doms;
>       cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
>       err = build_sched_domains(doms_cur[0], NULL);
> -	register_sched_domain_sysctl();
>
>       return err;
>  }
> @@ -2298,9 +2297,6 @@ void partition_sched_domains_locked(int
>
>       lockdep_assert_held(&sched_domains_mutex);
>
> -	/* Always unregister in case we don't destroy any domains: */
> -	unregister_sched_domain_sysctl();
> -
>       /* Let the architecture update CPU core mappings: */
>       new_topology = arch_update_cpu_topology();
>
> @@ -2388,8 +2384,6 @@ void partition_sched_domains_locked(int
>       doms_cur = doms_new;
>       dattr_cur = dattr_new;
>       ndoms_cur = ndoms_new;
> -
> -	register_sched_domain_sysctl();
>  }
>

This has to stay, otherwise we never update the files.

Other than that:
Reviewed-by: Valentin Schneider <valentin.schneider@....com>

And for the whole series:
Tested-by: Valentin Schneider <valentin.schneider@....com>

>  /*