[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <AE1F6825-E70D-41C0-90BE-222B22ADB2BC@gmail.com>
Date: Thu, 5 Jun 2025 16:34:19 +0800
From: Jemmy Wong <jemmywong512@...il.com>
To: K Prateek Nayak <kprateek.nayak@....com>
Cc: Jemmy <jemmywong512@...il.com>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v0] sched/topology: Add lock guard support
Hi Prateek,
Thank you for your review and comments regarding guard vs. scoped_guard.
The primary purpose of guard is to address the notorious "goto" problem,
preventing resource leaks with compiler assistance.
Additionally, I think it serves a second key purpose:
clearly defining the boundaries of an object's lifecycle,
which enhances code readability and maintainability.
This role aligns with how guards are used in other languages, e.g., C++.
To clarify usage:
- For critical sections is part of a function, I prefer scoped_guard,
as it explicitly delineates the boundaries of the critical section.
- For critical sections spanning an entire function, I prefer guard,
as it better suits the broader scope.
I agree and will convert most scoped_guard to guard according to your comments but with some exceptions.
Let me know if you have further thoughts or suggestions!
> On Jun 5, 2025, at 11:41 AM, K Prateek Nayak <kprateek.nayak@....com> wrote:
>
> Hello Jammy,
>
> On 6/5/2025 12:20 AM, Jemmy Wong wrote:
>> This change replaces manual lock acquisition and release with lock guards
>> to improve code robustness and reduce the risk of lock mismanagement.
>> No functional changes to the scheduler topology logic are introduced.
>> Signed-off-by: Jemmy Wong <jemmywong512@...il.com>
>> ---
>> include/linux/sched.h | 11 +--
>> kernel/sched/core.c | 6 +-
>> kernel/sched/debug.c | 28 ++++---
>> kernel/sched/rt.c | 46 ++++++------
>> kernel/sched/topology.c | 162 +++++++++++++++++++---------------------
>> 5 files changed, 120 insertions(+), 133 deletions(-)
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 4f78a64beb52..10a9d6083b72 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -46,6 +46,7 @@
>> #include <linux/rv.h>
>> #include <linux/uidgid_types.h>
>> #include <linux/tracepoint-defs.h>
>> +#include <linux/mutex.h>
>> #include <asm/kmap_size.h>
>> /* task_struct member predeclarations (sorted alphabetically): */
>> @@ -395,14 +396,14 @@ enum uclamp_id {
>> UCLAMP_CNT
>> };
>> +extern struct mutex sched_domains_mutex;
>> #ifdef CONFIG_SMP
>> extern struct root_domain def_root_domain;
>> -extern struct mutex sched_domains_mutex;
>> -extern void sched_domains_mutex_lock(void);
>> -extern void sched_domains_mutex_unlock(void);
>> +DEFINE_LOCK_GUARD_0(sched_domains_mutex,
>> + mutex_lock(&sched_domains_mutex),
>> + mutex_unlock(&sched_domains_mutex))
>> #else
>> -static inline void sched_domains_mutex_lock(void) { }
>> -static inline void sched_domains_mutex_unlock(void) { }
>> +DEFINE_LOCK_GUARD_0(sched_domains_mutex, ,)
>> #endif
>> struct sched_param {
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index dce50fa57471..b2b7a0cae95a 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -8457,9 +8457,9 @@ void __init sched_init_smp(void)
>> * CPU masks are stable and all blatant races in the below code cannot
>> * happen.
>> */
>> - sched_domains_mutex_lock();
>> - sched_init_domains(cpu_active_mask);
>> - sched_domains_mutex_unlock();
>> + scoped_guard(sched_domains_mutex) {
>> + sched_init_domains(cpu_active_mask);
>> + }
>> /* Move init over to a non-isolated CPU */
>> if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
>> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
>> index 9d71baf08075..f56401725ef6 100644
>> --- a/kernel/sched/debug.c
>> +++ b/kernel/sched/debug.c
>> @@ -294,19 +294,17 @@ static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf,
>> bool orig;
>> cpus_read_lock();
>
> cpus_read_{un}lock() have guards too. You can just have:
>
> guard(cpus_read_lock)();
> guard(sched_domains_mutex)();
>
> no need for scoped guard. Compiler will take care of unlocking
> ordering before return.
>
>> - sched_domains_mutex_lock();
>> -
>> - orig = sched_debug_verbose;
>> - result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
>> -
>> - if (sched_debug_verbose && !orig)
>> - update_sched_domain_debugfs();
>> - else if (!sched_debug_verbose && orig) {
>> - debugfs_remove(sd_dentry);
>> - sd_dentry = NULL;
>> + scoped_guard(sched_domains_mutex) {
>> + orig = sched_debug_verbose;
>> + result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
>> +
>> + if (sched_debug_verbose && !orig)
>> + update_sched_domain_debugfs();
>> + else if (!sched_debug_verbose && orig) {
>> + debugfs_remove(sd_dentry);
>> + sd_dentry = NULL;
>> + }
>> }
>> -
>> - sched_domains_mutex_unlock();
>> cpus_read_unlock();
>> return result;
>
> General comment, it is okay to convert the folllowing pattern:
>
> func()
> {
> ...
> lock();
> ... /* critical section */
> unlock:
> unlock();
>
> return ret;
> }
>
> to:
> func()
> {
> ...
> guard();
> ... /* critical section with s/goto unlock/return ret/ */
>
> return ret;
> }
>
> You don't need a scoped_guard() if the critical section is at the end of
> the funtion.
>
>> @@ -517,9 +515,9 @@ static __init int sched_init_debug(void)
>> debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
>> debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
>> - sched_domains_mutex_lock();
>> - update_sched_domain_debugfs();
>> - sched_domains_mutex_unlock();
>> + scoped_guard(sched_domains_mutex) {
>> + update_sched_domain_debugfs();
>> + }
>> #endif
>> #ifdef CONFIG_NUMA_BALANCING
>> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
>> index e40422c37033..3f6f181de387 100644
>> --- a/kernel/sched/rt.c
>> +++ b/kernel/sched/rt.c
>> @@ -2920,36 +2920,36 @@ static int sched_rt_handler(const struct ctl_table *table, int write, void *buff
>> static DEFINE_MUTEX(mutex);
>> int ret;
>> - mutex_lock(&mutex);
>> - sched_domains_mutex_lock();
>> - old_period = sysctl_sched_rt_period;
>> - old_runtime = sysctl_sched_rt_runtime;
>> + guard(mutex)(&mutex);
>> - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
>> + scoped_guard(sched_domains_mutex) {
>
> No need for scoped guard, "guard(sched_domains_mutex)();" should be
> enough.
>
>> + old_period = sysctl_sched_rt_period;
>> + old_runtime = sysctl_sched_rt_runtime;
>> - if (!ret && write) {
>> - ret = sched_rt_global_validate();
>> - if (ret)
>> - goto undo;
>> + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
>> - ret = sched_dl_global_validate();
>> - if (ret)
>> - goto undo;
>> + if (!ret && write) {
>> + ret = sched_rt_global_validate();
>> + if (ret)
>> + goto undo;
>> - ret = sched_rt_global_constraints();
>> - if (ret)
>> - goto undo;
>> + ret = sched_dl_global_validate();
>> + if (ret)
>> + goto undo;
>> - sched_rt_do_global();
>> - sched_dl_do_global();
>> - }
>> - if (0) {
>> + ret = sched_rt_global_constraints();
>> + if (ret)
>> + goto undo;
>> +
>> + sched_rt_do_global();
>> + sched_dl_do_global();
>> + }
>> + if (0) {
>> undo:
>
> On a sidenote, include/linux/cleanup.h has the following comment:
>
> Lastly, given that the benefit of cleanup helpers is removal of
> "goto", and that the "goto" statement can jump between scopes, the
> expectation is that usage of "goto" and cleanup helpers is never
> mixed in the same function. I.e. for a given routine, convert all
> resources that need a "goto" cleanup to scope-based cleanup, or
> convert none of them.
>
> Although the compiler generates the correct code currently, I think
> you should just replicate the undo chunk inplace of "goto undo" just
> to be safe like:
>
> if (ret) {
> sysctl_sched_rt_period = old_period;
> sysctl_sched_rt_runtime = old_runtime;
>
> return ret;
> }
>
>> - sysctl_sched_rt_period = old_period;
>> - sysctl_sched_rt_runtime = old_runtime;
>> + sysctl_sched_rt_period = old_period;
>> + sysctl_sched_rt_runtime = old_runtime;
>> + }
>> }
>> - sched_domains_mutex_unlock();
>> - mutex_unlock(&mutex);
>> return ret;
>> }
>> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
>> index b958fe48e020..dac1dd5a6eca 100644
>> --- a/kernel/sched/topology.c
>> +++ b/kernel/sched/topology.c
>> @@ -6,14 +6,6 @@
>> #include <linux/bsearch.h>
>> DEFINE_MUTEX(sched_domains_mutex);
>> -void sched_domains_mutex_lock(void)
>> -{
>> - mutex_lock(&sched_domains_mutex);
>> -}
>> -void sched_domains_mutex_unlock(void)
>> -{
>> - mutex_unlock(&sched_domains_mutex);
>> -}
>> /* Protected by sched_domains_mutex: */
>> static cpumask_var_t sched_domains_tmpmask;
>> @@ -470,44 +462,41 @@ static void free_rootdomain(struct rcu_head *rcu)
>> void rq_attach_root(struct rq *rq, struct root_domain *rd)
>> {
>> struct root_domain *old_rd = NULL;
>> - struct rq_flags rf;
>> - rq_lock_irqsave(rq, &rf);
>> + scoped_guard(rq_lock_irqsave, rq) {
>
> I'm not a big fan of this added indentation. Perhaps you can move the
> rq_lock guarded bit into a separate function?
The added indentation clearly defines the boundary of the lock scope.
The caller function could become overly simplistic if moved into a separate function,
as the critical section constitutes the majority of the function.
>> + if (rq->rd) {
>> + old_rd = rq->rd;
>> - if (rq->rd) {
>> - old_rd = rq->rd;
>> + if (cpumask_test_cpu(rq->cpu, old_rd->online))
>> + set_rq_offline(rq);
>> +
>> + cpumask_clear_cpu(rq->cpu, old_rd->span);
>> +
>> + /*
>> + * If we don't want to free the old_rd yet then
>> + * set old_rd to NULL to skip the freeing later
>> + * in this function:
>> + */
>> + if (!atomic_dec_and_test(&old_rd->refcount))
>> + old_rd = NULL;
>> + }
>> - if (cpumask_test_cpu(rq->cpu, old_rd->online))
>> - set_rq_offline(rq);
>> + atomic_inc(&rd->refcount);
>> + rq->rd = rd;
>> - cpumask_clear_cpu(rq->cpu, old_rd->span);
>> + cpumask_set_cpu(rq->cpu, rd->span);
>> + if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
>> + set_rq_online(rq);
>> /*
>> - * If we don't want to free the old_rd yet then
>> - * set old_rd to NULL to skip the freeing later
>> - * in this function:
>> + * Because the rq is not a task, dl_add_task_root_domain() did not
>> + * move the fair server bw to the rd if it already started.
>> + * Add it now.
>> */
>> - if (!atomic_dec_and_test(&old_rd->refcount))
>> - old_rd = NULL;
>> + if (rq->fair_server.dl_server)
>> + __dl_server_attach_root(&rq->fair_server, rq);
>> }
>> - atomic_inc(&rd->refcount);
>> - rq->rd = rd;
>> -
>> - cpumask_set_cpu(rq->cpu, rd->span);
>> - if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
>> - set_rq_online(rq);
>> -
>> - /*
>> - * Because the rq is not a task, dl_add_task_root_domain() did not
>> - * move the fair server bw to the rd if it already started.
>> - * Add it now.
>> - */
>> - if (rq->fair_server.dl_server)
>> - __dl_server_attach_root(&rq->fair_server, rq);
>> -
>> - rq_unlock_irqrestore(rq, &rf);
>> -
>> if (old_rd)
>> call_rcu(&old_rd->rcu, free_rootdomain);
>> }
>> @@ -1809,18 +1798,17 @@ bool find_numa_distance(int distance)
>> if (distance == node_distance(0, 0))
>> return true;
>> - rcu_read_lock();
>> - distances = rcu_dereference(sched_domains_numa_distance);
>> - if (!distances)
>> - goto unlock;
>> - for (i = 0; i < sched_domains_numa_levels; i++) {
>> - if (distances[i] == distance) {
>> - found = true;
>> + scoped_guard(rcu) {
>
> guard(rcu)() should be enough. No need for scoped guard. Instead
> of breaks, you can "return found" directly ...
>
>> + distances = rcu_dereference(sched_domains_numa_distance);
>> + if (!distances)
>> break;
>> + for (i = 0; i < sched_domains_numa_levels; i++) {
>> + if (distances[i] == distance) {
>> + found = true;
>> + break;
>> + }
>> }
>> }
>> -unlock:
>> - rcu_read_unlock();
>> return found;
>> }
>> @@ -2134,21 +2122,20 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
>> int i, j = cpu_to_node(cpu), found = nr_cpu_ids;
>> struct cpumask ***masks;
>> - rcu_read_lock();
>> - masks = rcu_dereference(sched_domains_numa_masks);
>> - if (!masks)
>> - goto unlock;
>> - for (i = 0; i < sched_domains_numa_levels; i++) {
>> - if (!masks[i][j])
>> - break;
>> - cpu = cpumask_any_and_distribute(cpus, masks[i][j]);
>> - if (cpu < nr_cpu_ids) {
>> - found = cpu;
>> + scoped_guard(rcu) {
>
> Same as last comment, plain guard(rcu)(); should be fine ...
>
>> + masks = rcu_dereference(sched_domains_numa_masks);
>> + if (!masks)
>> break;
>> + for (i = 0; i < sched_domains_numa_levels; i++) {
>> + if (!masks[i][j])
>> + break;
>> + cpu = cpumask_any_and_distribute(cpus, masks[i][j]);
>> + if (cpu < nr_cpu_ids) {
>> + found = cpu;
>> + break;
>> + }
>> }
>> }
>> -unlock:
>> - rcu_read_unlock();
>> return found;
>> }
>> @@ -2201,24 +2188,25 @@ int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
>> if (node == NUMA_NO_NODE)
>> return cpumask_nth_and(cpu, cpus, cpu_online_mask);
>> - rcu_read_lock();
>> + scoped_guard(rcu) {
>
> Same as last comment ...
>
>> + /* CPU-less node entries are uninitialized in sched_domains_numa_masks */
>> + node = numa_nearest_node(node, N_CPU);
>> + k.node = node;
>> - /* CPU-less node entries are uninitialized in sched_domains_numa_masks */
>> - node = numa_nearest_node(node, N_CPU);
>> - k.node = node;
>> + k.masks = rcu_dereference(sched_domains_numa_masks);
>> + if (!k.masks)
>> + break;
>> - k.masks = rcu_dereference(sched_domains_numa_masks);
>> - if (!k.masks)
>> - goto unlock;
>> + hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels,
>> + sizeof(k.masks[0]), hop_cmp);
>> + hop = hop_masks - k.masks;
>> - hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels, sizeof(k.masks[0]), hop_cmp);
>> - hop = hop_masks - k.masks;
>> + ret = hop ?
>> + cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node],
>> + k.masks[hop-1][node]) :
>> + cpumask_nth_and(cpu, cpus, k.masks[0][node]);
>> + }
>> - ret = hop ?
>> - cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) :
>> - cpumask_nth_and(cpu, cpus, k.masks[0][node]);
>> -unlock:
>> - rcu_read_unlock();
>> return ret;
>> }
>> EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu);
>> @@ -2570,17 +2558,17 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
>> }
>> /* Attach the domains */
>> - rcu_read_lock();
>> - for_each_cpu(i, cpu_map) {
>> - rq = cpu_rq(i);
>> - sd = *per_cpu_ptr(d.sd, i);
>> + scoped_guard(rcu) {
>> + for_each_cpu(i, cpu_map) {
>> + rq = cpu_rq(i);
>> + sd = *per_cpu_ptr(d.sd, i);
>> - cpu_attach_domain(sd, d.rd, i);
>> + cpu_attach_domain(sd, d.rd, i);
>> - if (lowest_flag_domain(i, SD_CLUSTER))
>> - has_cluster = true;
>> + if (lowest_flag_domain(i, SD_CLUSTER))
>> + has_cluster = true;
>> + }
>> }
>> - rcu_read_unlock();
>> if (has_asym)
>> static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
>> @@ -2688,10 +2676,10 @@ static void detach_destroy_domains(const struct cpumask *cpu_map)
>> if (static_branch_unlikely(&sched_cluster_active))
>> static_branch_dec_cpuslocked(&sched_cluster_active);
>> - rcu_read_lock();
>
> Same as last comment …
The critical section of RCU is part of the function, I think scoped_guard is more suitable than guard.
>
>> - for_each_cpu(i, cpu_map)
>> - cpu_attach_domain(NULL, &def_root_domain, i);
>> - rcu_read_unlock();
>> + scoped_guard(rcu) {
>> + for_each_cpu(i, cpu_map)
>> + cpu_attach_domain(NULL, &def_root_domain, i);
>> + }
>> }
>> /* handle null as "default" */
>> @@ -2836,7 +2824,7 @@ static void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new
>> void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
>> struct sched_domain_attr *dattr_new)
>> {
>> - sched_domains_mutex_lock();
>> - partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
>> - sched_domains_mutex_unlock();
>> + scoped_guard(sched_domains_mutex) {
>
> Similar to lasr comment, plain guard(sched_domains_mutex)(); should be fine.
>
>> + partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
>> + }
>> }
>
> --
> Thanks and Regards,
> Prateek
Best Regards,
Jemmy
Powered by blists - more mailing lists