[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87czuvn2uk.mognet@arm.com>
Date: Thu, 15 Apr 2021 13:34:27 +0100
From: Valentin Schneider <valentin.schneider@....com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: mingo@...nel.org, mgorman@...e.de, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, bristot@...hat.com,
joshdon@...gle.com, linux-kernel@...r.kernel.org, greg@...ah.com,
linux@...musvillemoes.dk
Subject: Re: [PATCH v2 7/9] sched,debug: Convert sysctl sched_domains to debugfs
On 15/04/21 11:06, Peter Zijlstra wrote:
> On Tue, Apr 13, 2021 at 03:55:15PM +0100, Valentin Schneider wrote:
>> On 12/04/21 12:14, Peter Zijlstra wrote:
>> > Stop polluting sysctl, move to debugfs for SCHED_DEBUG stuff.
>> >
>> > Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
>> > Reviewed-by: Dietmar Eggemann <dietmar.eggemann@....com>
>>
>> On my Juno (2+4 big.LITTLE), sys/kernel/debug/sched/domains/ is now empty.
>>
>> I think that's because of unregister_sched_domain_sysctl() -
>> debugfs_remove() is recursive, and I do get a case where we rebuild the
>> domains but no CPU has been added or removed (we rebuild the domains when
>> cpufreq kicks in, it's part of the big.LITTLE ponies).
>>
>> Do we actually still need that unregister? From a brief glance it looks
>> like we could throw it out.
>
> Yeah, I can't think of anything either. AFAICT it hasn't done anything
> useful since that cpumask optimization. Consider it gone.
>
> I'll let it soak for another day or so, but then I was planning on
> merging this series.
>
> Updated patch has been in queue.git/sched/debug since yesterday.
>
> ---
> Subject: sched,debug: Convert sysctl sched_domains to debugfs
> From: Peter Zijlstra <peterz@...radead.org>
> Date: Thu Mar 25 11:31:20 CET 2021
>
> Stop polluting sysctl, move to debugfs for SCHED_DEBUG stuff.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@....com>
> ---
> kernel/sched/debug.c | 254 ++++++++++--------------------------------------
> kernel/sched/sched.h | 6 -
> kernel/sched/topology.c | 6 -
> 3 files changed, 56 insertions(+), 210 deletions(-)
>
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -299,6 +299,10 @@ static __init int sched_init_debug(void)
> debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops);
> debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
> debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
> +
> + mutex_lock(&sched_domains_mutex);
> + register_sched_domain_sysctl();
> + mutex_unlock(&sched_domains_mutex);
> #endif
>
> #ifdef CONFIG_NUMA_BALANCING
> @@ -316,229 +320,88 @@ late_initcall(sched_init_debug);
>
> #ifdef CONFIG_SMP
>
> -#ifdef CONFIG_SYSCTL
> -
> -static struct ctl_table sd_ctl_dir[] = {
> - {
> - .procname = "sched_domain",
> - .mode = 0555,
> - },
> - {}
> -};
> -
> -static struct ctl_table sd_ctl_root[] = {
> - {
> - .procname = "kernel",
> - .mode = 0555,
> - .child = sd_ctl_dir,
> - },
> - {}
> -};
> -
> -static struct ctl_table *sd_alloc_ctl_entry(int n)
> -{
> - struct ctl_table *entry =
> - kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
> -
> - return entry;
> -}
> -
> -static void sd_free_ctl_entry(struct ctl_table **tablep)
> -{
> - struct ctl_table *entry;
> -
> - /*
> - * In the intermediate directories, both the child directory and
> - * procname are dynamically allocated and could fail but the mode
> - * will always be set. In the lowest directory the names are
> - * static strings and all have proc handlers.
> - */
> - for (entry = *tablep; entry->mode; entry++) {
> - if (entry->child)
> - sd_free_ctl_entry(&entry->child);
> - if (entry->proc_handler == NULL)
> - kfree(entry->procname);
> - }
> -
> - kfree(*tablep);
> - *tablep = NULL;
> -}
> -
> -static void
> -set_table_entry(struct ctl_table *entry,
> - const char *procname, void *data, int maxlen,
> - umode_t mode, proc_handler *proc_handler)
> -{
> - entry->procname = procname;
> - entry->data = data;
> - entry->maxlen = maxlen;
> - entry->mode = mode;
> - entry->proc_handler = proc_handler;
> -}
> +static cpumask_var_t sd_sysctl_cpus;
> +static struct dentry *sd_dentry;
>
> -static int sd_ctl_doflags(struct ctl_table *table, int write,
> - void *buffer, size_t *lenp, loff_t *ppos)
> +static int sd_flags_show(struct seq_file *m, void *v)
> {
> - unsigned long flags = *(unsigned long *)table->data;
> - size_t data_size = 0;
> - size_t len = 0;
> - char *tmp, *buf;
> + unsigned long flags = *(unsigned int *)m->private;
> int idx;
>
> - if (write)
> - return 0;
> -
> - for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
> - char *name = sd_flag_debug[idx].name;
> -
> - /* Name plus whitespace */
> - data_size += strlen(name) + 1;
> - }
> -
> - if (*ppos > data_size) {
> - *lenp = 0;
> - return 0;
> - }
> -
> - buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL);
> - if (!buf)
> - return -ENOMEM;
> -
> for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
> - char *name = sd_flag_debug[idx].name;
> -
> - len += snprintf(buf + len, strlen(name) + 2, "%s ", name);
> + seq_puts(m, sd_flag_debug[idx].name);
> + seq_puts(m, " ");
> }
> -
> - tmp = buf + *ppos;
> - len -= *ppos;
> -
> - if (len > *lenp)
> - len = *lenp;
> - if (len)
> - memcpy(buffer, tmp, len);
> - if (len < *lenp) {
> - ((char *)buffer)[len] = '\n';
> - len++;
> - }
> -
> - *lenp = len;
> - *ppos += len;
> -
> - kfree(buf);
> + seq_puts(m, "\n");
>
> return 0;
> }
>
> -static struct ctl_table *
> -sd_alloc_ctl_domain_table(struct sched_domain *sd)
> -{
> - struct ctl_table *table = sd_alloc_ctl_entry(9);
> -
> - if (table == NULL)
> - return NULL;
> -
> - set_table_entry(&table[0], "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax);
> - set_table_entry(&table[1], "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax);
> - set_table_entry(&table[2], "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax);
> - set_table_entry(&table[3], "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax);
> - set_table_entry(&table[4], "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax);
> - set_table_entry(&table[5], "flags", &sd->flags, sizeof(int), 0444, sd_ctl_doflags);
> - set_table_entry(&table[6], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax);
> - set_table_entry(&table[7], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring);
> - /* &table[8] is terminator */
> -
> - return table;
> -}
> -
> -static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
> +static int sd_flags_open(struct inode *inode, struct file *file)
> {
> - struct ctl_table *entry, *table;
> - struct sched_domain *sd;
> - int domain_num = 0, i;
> - char buf[32];
> -
> - for_each_domain(cpu, sd)
> - domain_num++;
> - entry = table = sd_alloc_ctl_entry(domain_num + 1);
> - if (table == NULL)
> - return NULL;
> -
> - i = 0;
> - for_each_domain(cpu, sd) {
> - snprintf(buf, 32, "domain%d", i);
> - entry->procname = kstrdup(buf, GFP_KERNEL);
> - entry->mode = 0555;
> - entry->child = sd_alloc_ctl_domain_table(sd);
> - entry++;
> - i++;
> - }
> - return table;
> + return single_open(file, sd_flags_show, inode->i_private);
> }
>
> -static cpumask_var_t sd_sysctl_cpus;
> -static struct ctl_table_header *sd_sysctl_header;
> +static const struct file_operations sd_flags_fops = {
> + .open = sd_flags_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
>
> -void register_sched_domain_sysctl(void)
> +static void register_sd(struct sched_domain *sd, struct dentry *parent)
> {
> - static struct ctl_table *cpu_entries;
> - static struct ctl_table **cpu_idx;
> - static bool init_done = false;
> - char buf[32];
> - int i;
> -
> - if (!cpu_entries) {
> - cpu_entries = sd_alloc_ctl_entry(num_possible_cpus() + 1);
> - if (!cpu_entries)
> - return;
> +#define SDM(type, mode, member) \
> + debugfs_create_##type(#member, mode, parent, &sd->member)
>
> - WARN_ON(sd_ctl_dir[0].child);
> - sd_ctl_dir[0].child = cpu_entries;
> - }
> + SDM(ulong, 0644, min_interval);
> + SDM(ulong, 0644, max_interval);
> + SDM(u64, 0644, max_newidle_lb_cost);
> + SDM(u32, 0644, busy_factor);
> + SDM(u32, 0644, imbalance_pct);
> + SDM(u32, 0644, cache_nice_tries);
> + SDM(str, 0444, name);
>
> - if (!cpu_idx) {
> - struct ctl_table *e = cpu_entries;
> +#undef SDM
>
> - cpu_idx = kcalloc(nr_cpu_ids, sizeof(struct ctl_table*), GFP_KERNEL);
> - if (!cpu_idx)
> - return;
> + debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
> +}
>
> - /* deal with sparse possible map */
> - for_each_possible_cpu(i) {
> - cpu_idx[i] = e;
> - e++;
> - }
> - }
> +void register_sched_domain_sysctl(void)
> +{
> + int cpu, i;
>
> if (!cpumask_available(sd_sysctl_cpus)) {
> if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
> return;
> - }
> -
> - if (!init_done) {
> - init_done = true;
> - /* init to possible to not have holes in @cpu_entries */
> cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
> }
>
> - for_each_cpu(i, sd_sysctl_cpus) {
> - struct ctl_table *e = cpu_idx[i];
> + if (!sd_dentry)
> + sd_dentry = debugfs_create_dir("domains", debugfs_sched);
> +
> + for_each_cpu(cpu, sd_sysctl_cpus) {
> + struct sched_domain *sd;
> + struct dentry *d_cpu;
> + char buf[32];
> +
> + snprintf(buf, sizeof(buf), "cpu%d", cpu);
> + debugfs_remove(debugfs_lookup(buf, sd_dentry));
> + d_cpu = debugfs_create_dir(buf, sd_dentry);
> +
> + i = 0;
> + for_each_domain(cpu, sd) {
> + struct dentry *d_sd;
>
> - if (e->child)
> - sd_free_ctl_entry(&e->child);
> + snprintf(buf, sizeof(buf), "domain%d", i);
> + d_sd = debugfs_create_dir(buf, d_cpu);
>
> - if (!e->procname) {
> - snprintf(buf, 32, "cpu%d", i);
> - e->procname = kstrdup(buf, GFP_KERNEL);
> + register_sd(sd, d_sd);
> + i++;
> }
> - e->mode = 0555;
> - e->child = sd_alloc_ctl_cpu_table(i);
>
> - __cpumask_clear_cpu(i, sd_sysctl_cpus);
> + __cpumask_clear_cpu(cpu, sd_sysctl_cpus);
> }
> -
> - WARN_ON(sd_sysctl_header);
> - sd_sysctl_header = register_sysctl_table(sd_ctl_root);
> }
>
> void dirty_sched_domain_sysctl(int cpu)
> @@ -547,13 +410,6 @@ void dirty_sched_domain_sysctl(int cpu)
> __cpumask_set_cpu(cpu, sd_sysctl_cpus);
> }
>
> -/* may be called multiple times per register */
> -void unregister_sched_domain_sysctl(void)
> -{
> - unregister_sysctl_table(sd_sysctl_header);
> - sd_sysctl_header = NULL;
> -}
> -#endif /* CONFIG_SYSCTL */
> #endif /* CONFIG_SMP */
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1553,10 +1553,9 @@ static inline unsigned int group_first_c
>
> extern int group_balance_cpu(struct sched_group *sg);
>
> -#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
> +#ifdef CONFIG_SCHED_DEBUG
> void register_sched_domain_sysctl(void);
> void dirty_sched_domain_sysctl(int cpu);
> -void unregister_sched_domain_sysctl(void);
> #else
> static inline void register_sched_domain_sysctl(void)
> {
> @@ -1564,9 +1563,6 @@ static inline void register_sched_domain
> static inline void dirty_sched_domain_sysctl(int cpu)
> {
> }
> -static inline void unregister_sched_domain_sysctl(void)
> -{
> -}
> #endif
>
> extern int sched_update_scaling(void);
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -2223,7 +2223,6 @@ int sched_init_domains(const struct cpum
> doms_cur = &fallback_doms;
> cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
> err = build_sched_domains(doms_cur[0], NULL);
> - register_sched_domain_sysctl();
>
> return err;
> }
> @@ -2298,9 +2297,6 @@ void partition_sched_domains_locked(int
>
> lockdep_assert_held(&sched_domains_mutex);
>
> - /* Always unregister in case we don't destroy any domains: */
> - unregister_sched_domain_sysctl();
> -
> /* Let the architecture update CPU core mappings: */
> new_topology = arch_update_cpu_topology();
>
> @@ -2388,8 +2384,6 @@ void partition_sched_domains_locked(int
> doms_cur = doms_new;
> dattr_cur = dattr_new;
> ndoms_cur = ndoms_new;
> -
> - register_sched_domain_sysctl();
> }
>
This has to stay, otherwise we never update the files.
Other than that:
Reviewed-by: Valentin Schneider <valentin.schneider@....com>
And for the whole series:
Tested-by: Valentin Schneider <valentin.schneider@....com>
> /*
Powered by blists - more mailing lists