[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <4d156476-af35-4f0e-ab68-a3119b9a5a2e@amd.com>
Date: Thu, 12 Jun 2025 11:05:37 +0530
From: K Prateek Nayak <kprateek.nayak@....com>
To: Jemmy Wong <jemmywong512@...il.com>
Cc: Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guitcct <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1 1/1] sched/topology: Add lock guard support
Hello Jeremy,
Sorry for the delay!
On 6/5/2025 5:34 PM, Jemmy Wong wrote:
> This change replaces manual lock acquisition and release with lock guards
> to improve code robustness and reduce the risk of lock mismanagement.
> No functional changes to the scheduler topology logic are introduced.
>
> Signed-off-by: Jemmy Wong <jemmywong512@...il.com>
I have few comments below but other than that, I tested the patch with a
combination of hotplug and cpusets going concurrently and did not see
any issues. Feel free to add:
Tested-by: K Prateek Nayak <kprateek.nayak@....com>
>
> ---
> include/linux/sched.h | 11 ++--
> kernel/sched/core.c | 6 +--
> kernel/sched/debug.c | 13 ++---
> kernel/sched/rt.c | 35 ++++++------
> kernel/sched/topology.c | 115 +++++++++++++++++-----------------------
> 5 files changed, 81 insertions(+), 99 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 4f78a64beb52..10a9d6083b72 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -46,6 +46,7 @@
> #include <linux/rv.h>
> #include <linux/uidgid_types.h>
> #include <linux/tracepoint-defs.h>
> +#include <linux/mutex.h>
> #include <asm/kmap_size.h>
>
> /* task_struct member predeclarations (sorted alphabetically): */
> @@ -395,14 +396,14 @@ enum uclamp_id {
> UCLAMP_CNT
> };
>
> +extern struct mutex sched_domains_mutex;
> #ifdef CONFIG_SMP
> extern struct root_domain def_root_domain;
> -extern struct mutex sched_domains_mutex;
> -extern void sched_domains_mutex_lock(void);
> -extern void sched_domains_mutex_unlock(void);
> +DEFINE_LOCK_GUARD_0(sched_domains_mutex,
> + mutex_lock(&sched_domains_mutex),
> + mutex_unlock(&sched_domains_mutex))
> #else
> -static inline void sched_domains_mutex_lock(void) { }
> -static inline void sched_domains_mutex_unlock(void) { }
> +DEFINE_LOCK_GUARD_0(sched_domains_mutex, ,)
> #endif
>
> struct sched_param {
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index dce50fa57471..b2b7a0cae95a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -8457,9 +8457,9 @@ void __init sched_init_smp(void)
> * CPU masks are stable and all blatant races in the below code cannot
> * happen.
> */
> - sched_domains_mutex_lock();
> - sched_init_domains(cpu_active_mask);
> - sched_domains_mutex_unlock();
> + scoped_guard(sched_domains_mutex) {
> + sched_init_domains(cpu_active_mask);
> + }
Since sched_init_domains() is only called once during init from here,
perhaps you can move the guard() within sched_init_domains() and avoid
the scoped_guard() here in the caller.
>
> /* Move init over to a non-isolated CPU */
> if (set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_DOMAIN)) < 0)
> diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
> index 9d71baf08075..1c00016fd54c 100644
> --- a/kernel/sched/debug.c
> +++ b/kernel/sched/debug.c
> @@ -293,8 +293,8 @@ static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf,
> ssize_t result;
> bool orig;
>
> - cpus_read_lock();
> - sched_domains_mutex_lock();
> + guard(cpus_read_lock)();
> + guard(sched_domains_mutex)();
>
> orig = sched_debug_verbose;
> result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
> @@ -306,9 +306,6 @@ static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf,
> sd_dentry = NULL;
> }
>
> - sched_domains_mutex_unlock();
> - cpus_read_unlock();
> -
> return result;
> }
> #else
> @@ -517,9 +514,9 @@ static __init int sched_init_debug(void)
> debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost);
> debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate);
>
> - sched_domains_mutex_lock();
> - update_sched_domain_debugfs();
> - sched_domains_mutex_unlock();
> + scoped_guard(sched_domains_mutex) {
> + update_sched_domain_debugfs();
> + }
> #endif
>
> #ifdef CONFIG_NUMA_BALANCING
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index e40422c37033..3533b471b015 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -2920,36 +2920,37 @@ static int sched_rt_handler(const struct ctl_table *table, int write, void *buff
> static DEFINE_MUTEX(mutex);
> int ret;
>
> - mutex_lock(&mutex);
> - sched_domains_mutex_lock();
> + guard(mutex)(&mutex);
> + guard(sched_domains_mutex)();
> +
> old_period = sysctl_sched_rt_period;
> old_runtime = sysctl_sched_rt_runtime;
>
> ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
>
> if (!ret && write) {
> - ret = sched_rt_global_validate();
> - if (ret)
> - goto undo;
> + do {
Instead of this do-while loop that runs once, how about:
static int sched_validate_constraints()
{
int ret;
ret = sched_rt_global_validate();
if (ret)
return ret;
... /* similarly for dl_global_validate, rt_global_constraints */
sched_rt_do_global();
sched_dl_do_global();
return 0;
}
staic int sched_rt_handler()
{
...
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (ret || !write)
return ret;
ret = sched_validate_constraints();
if (ret) {
sysctl_sched_rt_period = old_period;
sysctl_sched_rt_runtime = old_runtime;
}
return ret;
}
> + ret = sched_rt_global_validate();
> + if (ret)
> + break;
>
> - ret = sched_dl_global_validate();
> - if (ret)
> - goto undo;
> + ret = sched_dl_global_validate();
> + if (ret)
> + break;
>
> - ret = sched_rt_global_constraints();
> - if (ret)
> - goto undo;
> + ret = sched_rt_global_constraints();
> + if (ret)
> + break;
>
> - sched_rt_do_global();
> - sched_dl_do_global();
> + sched_rt_do_global();
> + sched_dl_do_global();
> + } while (0);
> }
> - if (0) {
> -undo:
> +
> + if (ret) {
Previously "!write" cases and early ret from proc_dointvec_minmax()
wouldn't do this. It doesn't hurt but it is wasteful and can be avoided
using the pattern from the above suggestion.
> sysctl_sched_rt_period = old_period;
> sysctl_sched_rt_runtime = old_runtime;
> }
> - sched_domains_mutex_unlock();
> - mutex_unlock(&mutex);
>
> return ret;
> }
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index b958fe48e020..3f89f969666c 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -6,14 +6,6 @@
> #include <linux/bsearch.h>
>
> DEFINE_MUTEX(sched_domains_mutex);
> -void sched_domains_mutex_lock(void)
> -{
> - mutex_lock(&sched_domains_mutex);
> -}
> -void sched_domains_mutex_unlock(void)
> -{
> - mutex_unlock(&sched_domains_mutex);
> -}
>
> /* Protected by sched_domains_mutex: */
> static cpumask_var_t sched_domains_tmpmask;
> @@ -470,44 +462,41 @@ static void free_rootdomain(struct rcu_head *rcu)
> void rq_attach_root(struct rq *rq, struct root_domain *rd)
> {
> struct root_domain *old_rd = NULL;
> - struct rq_flags rf;
>
> - rq_lock_irqsave(rq, &rf);
> + scoped_guard(rq_lock_irqsave, rq) {
> + if (rq->rd) {
> + old_rd = rq->rd;
>
> - if (rq->rd) {
> - old_rd = rq->rd;
> + if (cpumask_test_cpu(rq->cpu, old_rd->online))
> + set_rq_offline(rq);
> +
> + cpumask_clear_cpu(rq->cpu, old_rd->span);
> +
> + /*
> + * If we don't want to free the old_rd yet then
> + * set old_rd to NULL to skip the freeing later
> + * in this function:
> + */
> + if (!atomic_dec_and_test(&old_rd->refcount))
> + old_rd = NULL;
> + }
>
> - if (cpumask_test_cpu(rq->cpu, old_rd->online))
> - set_rq_offline(rq);
> + atomic_inc(&rd->refcount);
> + rq->rd = rd;
>
> - cpumask_clear_cpu(rq->cpu, old_rd->span);
> + cpumask_set_cpu(rq->cpu, rd->span);
> + if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
> + set_rq_online(rq);
>
> /*
> - * If we don't want to free the old_rd yet then
> - * set old_rd to NULL to skip the freeing later
> - * in this function:
> + * Because the rq is not a task, dl_add_task_root_domain() did not
nit. Perhaps that comment can now be wrapped at 80 characters. No strong
feelings, just that the comment stood out for being pretty long when
reviewing.
> + * move the fair server bw to the rd if it already started.
> + * Add it now.
> */
> - if (!atomic_dec_and_test(&old_rd->refcount))
> - old_rd = NULL;
> + if (rq->fair_server.dl_server)
> + __dl_server_attach_root(&rq->fair_server, rq);
> }
>
--
Thanks and Regards,
Prateek
Powered by blists - more mailing lists