[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5c932b64-82f6-4374-b48f-e3983013b759@igalia.com>
Date: Fri, 21 Mar 2025 19:15:37 +0900
From: changwoo <changwoo@...lia.com>
To: Andrea Righi <arighi@...dia.com>, Tejun Heo <tj@...nel.org>, David
Vernet <void@...ifault.com>
Cc: Joel Fernandes <joelagnelf@...dia.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/6] sched_ext: idle: Explicitly pass allowed cpumask to
scx_select_cpu_dfl()
Hi Andrea,
On 3/20/25 16:36, Andrea Righi wrote:
> Modify scx_select_cpu_dfl() to take the allowed cpumask as an explicit
> argument, instead of implicitly using @p->cpus_ptr.
>
> This prepares for future changes where arbitrary cpumasks may be passed
> to the built-in idle CPU selection policy.
>
> This is a pure refactoring with no functional changes.
>
> Signed-off-by: Andrea Righi <arighi@...dia.com>
> ---
> kernel/sched/ext.c | 2 +-
> kernel/sched/ext_idle.c | 45 ++++++++++++++++++++++++++---------------
> kernel/sched/ext_idle.h | 3 ++-
> 3 files changed, 32 insertions(+), 18 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 06561d6717c9a..f42352e8d889e 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3395,7 +3395,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
> } else {
> s32 cpu;
>
> - cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
> + cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, p->cpus_ptr, 0);
> if (cpu >= 0) {
> p->scx.slice = SCX_SLICE_DFL;
> p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
> diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
> index e1e020c27c07c..a90d85bce1ccb 100644
> --- a/kernel/sched/ext_idle.c
> +++ b/kernel/sched/ext_idle.c
> @@ -397,11 +397,19 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
> static_branch_disable_cpuslocked(&scx_selcpu_topo_numa);
> }
>
> +static inline bool task_allowed_all_cpus(const struct task_struct *p)
> +{
> + return p->nr_cpus_allowed >= num_possible_cpus();
> +}
This function will be renamed to task_affinity_all() in patch #3.
Can we use the same name from the beginning?
That will make the commits easier to read.
> +
> /*
> - * Return the subset of @cpus that task @p can use or NULL if none of the
> - * CPUs in the @cpus cpumask can be used.
> + * Return the subset of @cpus that task @p can use, according to
> + * @cpus_allowed, or NULL if none of the CPUs in the @cpus cpumask can be
> + * used.
> */
> -static const struct cpumask *task_cpumask(const struct task_struct *p, const struct cpumask *cpus,
> +static const struct cpumask *task_cpumask(const struct task_struct *p,
> + const struct cpumask *cpus_allowed,
> + const struct cpumask *cpus,
> struct cpumask *local_cpus)
> {
> /*
> @@ -410,12 +418,10 @@ static const struct cpumask *task_cpumask(const struct task_struct *p, const str
> * intersection of the architecture's cpumask and the task's
> * allowed cpumask.
> */
> - if (!cpus || p->nr_cpus_allowed >= num_possible_cpus() ||
> - cpumask_subset(cpus, p->cpus_ptr))
> + if (!cpus || task_allowed_all_cpus(p) || cpumask_subset(cpus, cpus_allowed))
> return cpus;
>
> - if (!cpumask_equal(cpus, p->cpus_ptr) &&
> - cpumask_and(local_cpus, cpus, p->cpus_ptr))
> + if (cpumask_and(local_cpus, cpus, cpus_allowed))
> return local_cpus;
>
> return NULL;
> @@ -454,7 +460,8 @@ static const struct cpumask *task_cpumask(const struct task_struct *p, const str
> * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
> * we never call ops.select_cpu() for them, see select_task_rq().
> */
> -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags)
> +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
> + const struct cpumask *cpus_allowed, u64 flags)
> {
> const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
> int node = scx_cpu_node_if_enabled(prev_cpu);
> @@ -469,13 +476,19 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
> * Determine the subset of CPUs that the task can use in its
> * current LLC and node.
> */
> - if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
> - numa_cpus = task_cpumask(p, numa_span(prev_cpu),
> + if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) {
> + numa_cpus = task_cpumask(p, cpus_allowed, numa_span(prev_cpu),
> this_cpu_cpumask_var_ptr(local_numa_idle_cpumask));
> + if (cpumask_equal(numa_cpus, cpus_allowed))
Since task_cpumask() can return NULL, I think we should test if
numa_cpus is NULL or not here, something like this:
if (numa_cpus && cpumask_equal(numa_cpus, cpus_allowed))
> + numa_cpus = NULL;
> + }
>
> - if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc))
> - llc_cpus = task_cpumask(p, llc_span(prev_cpu),
> + if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
> + llc_cpus = task_cpumask(p, cpus_allowed, llc_span(prev_cpu),
> this_cpu_cpumask_var_ptr(local_llc_idle_cpumask));
> + if (cpumask_equal(llc_cpus, cpus_allowed))
Same here.
if (llc_cpus && cpumask_equal(llc_cpus, cpus_allowed))
> + llc_cpus = NULL;
> + }
>
> /*
> * If WAKE_SYNC, try to migrate the wakee to the waker's CPU.
> @@ -512,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
> cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
> (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
> !cpumask_empty(idle_cpumask(waker_node)->cpu)) {
> - if (cpumask_test_cpu(cpu, p->cpus_ptr))
> + if (cpumask_test_cpu(cpu, cpus_allowed))
> goto out_unlock;
> }
> }
> @@ -557,7 +570,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
> * begin in prev_cpu's node and proceed to other nodes in
> * order of increasing distance.
> */
> - cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE);
> + cpu = scx_pick_idle_cpu(cpus_allowed, node, flags | SCX_PICK_IDLE_CORE);
> if (cpu >= 0)
> goto out_unlock;
>
> @@ -605,7 +618,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
> * in prev_cpu's node and proceed to other nodes in order of
> * increasing distance.
> */
> - cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
> + cpu = scx_pick_idle_cpu(cpus_allowed, node, flags);
> if (cpu >= 0)
> goto out_unlock;
>
> @@ -861,7 +874,7 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
> goto prev_cpu;
>
> #ifdef CONFIG_SMP
> - cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
> + cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, p->cpus_ptr, 0);
> if (cpu >= 0) {
> *is_idle = true;
> return cpu;
> diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h
> index 511cc2221f7a8..37be78a7502b3 100644
> --- a/kernel/sched/ext_idle.h
> +++ b/kernel/sched/ext_idle.h
> @@ -27,7 +27,8 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node
> }
> #endif /* CONFIG_SMP */
>
> -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags);
> +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
> + const struct cpumask *cpus_allowed, u64 flags);
> void scx_idle_enable(struct sched_ext_ops *ops);
> void scx_idle_disable(void);
> int scx_idle_init(void);
Regards,
Changwoo Min
Powered by blists - more mailing lists