linux-kernel - Re: [PATCH 2/6] sched_ext: idle: Explicitly pass allowed cpumask to scx_select_cpu

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5c932b64-82f6-4374-b48f-e3983013b759@igalia.com>
Date: Fri, 21 Mar 2025 19:15:37 +0900
From: changwoo <changwoo@...lia.com>
To: Andrea Righi <arighi@...dia.com>, Tejun Heo <tj@...nel.org>, David
 Vernet <void@...ifault.com>
Cc: Joel Fernandes <joelagnelf@...dia.com>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/6] sched_ext: idle: Explicitly pass allowed cpumask to
 scx_select_cpu_dfl()

Hi Andrea,

On 3/20/25 16:36, Andrea Righi wrote:
> Modify scx_select_cpu_dfl() to take the allowed cpumask as an explicit
> argument, instead of implicitly using @p->cpus_ptr.
> 
> This prepares for future changes where arbitrary cpumasks may be passed
> to the built-in idle CPU selection policy.
> 
> This is a pure refactoring with no functional changes.
> 
> Signed-off-by: Andrea Righi <arighi@...dia.com>
> ---
>   kernel/sched/ext.c      |  2 +-
>   kernel/sched/ext_idle.c | 45 ++++++++++++++++++++++++++---------------
>   kernel/sched/ext_idle.h |  3 ++-
>   3 files changed, 32 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 06561d6717c9a..f42352e8d889e 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3395,7 +3395,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
>   	} else {
>   		s32 cpu;
>   
> -		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
> +		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, p->cpus_ptr, 0);
>   		if (cpu >= 0) {
>   			p->scx.slice = SCX_SLICE_DFL;
>   			p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
> diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
> index e1e020c27c07c..a90d85bce1ccb 100644
> --- a/kernel/sched/ext_idle.c
> +++ b/kernel/sched/ext_idle.c
> @@ -397,11 +397,19 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
>   		static_branch_disable_cpuslocked(&scx_selcpu_topo_numa);
>   }
>   
> +static inline bool task_allowed_all_cpus(const struct task_struct *p)
> +{
> +	return p->nr_cpus_allowed >= num_possible_cpus();
> +}

This function will be renamed to task_affinity_all() in patch #3.
Can we use the same name from the beginning?
That will make the commits easier to read.

> +
>   /*
> - * Return the subset of @cpus that task @p can use or NULL if none of the
> - * CPUs in the @cpus cpumask can be used.
> + * Return the subset of @cpus that task @p can use, according to
> + * @cpus_allowed, or NULL if none of the CPUs in the @cpus cpumask can be
> + * used.
>    */
> -static const struct cpumask *task_cpumask(const struct task_struct *p, const struct cpumask *cpus,
> +static const struct cpumask *task_cpumask(const struct task_struct *p,
> +					  const struct cpumask *cpus_allowed,
> +					  const struct cpumask *cpus,
>   					  struct cpumask *local_cpus)
>   {
>   	/*
> @@ -410,12 +418,10 @@ static const struct cpumask *task_cpumask(const struct task_struct *p, const str
>   	 * intersection of the architecture's cpumask and the task's
>   	 * allowed cpumask.
>   	 */
> -	if (!cpus || p->nr_cpus_allowed >= num_possible_cpus() ||
> -	    cpumask_subset(cpus, p->cpus_ptr))
> +	if (!cpus || task_allowed_all_cpus(p) || cpumask_subset(cpus, cpus_allowed))
>   		return cpus;
>   
> -	if (!cpumask_equal(cpus, p->cpus_ptr) &&
> -	    cpumask_and(local_cpus, cpus, p->cpus_ptr))
> +	if (cpumask_and(local_cpus, cpus, cpus_allowed))
>   		return local_cpus;
>   
>   	return NULL;
> @@ -454,7 +460,8 @@ static const struct cpumask *task_cpumask(const struct task_struct *p, const str
>    * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
>    * we never call ops.select_cpu() for them, see select_task_rq().
>    */
> -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags)
> +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
> +		       const struct cpumask *cpus_allowed, u64 flags)
>   {
>   	const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
>   	int node = scx_cpu_node_if_enabled(prev_cpu);
> @@ -469,13 +476,19 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
>   	 * Determine the subset of CPUs that the task can use in its
>   	 * current LLC and node.
>   	 */
> -	if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
> -		numa_cpus = task_cpumask(p, numa_span(prev_cpu),
> +	if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) {
> +		numa_cpus = task_cpumask(p, cpus_allowed, numa_span(prev_cpu),
>   					 this_cpu_cpumask_var_ptr(local_numa_idle_cpumask));
> +		if (cpumask_equal(numa_cpus, cpus_allowed))

Since task_cpumask() can return NULL, I think we should test if
numa_cpus is NULL or not here, something like this: 

if (numa_cpus && cpumask_equal(numa_cpus, cpus_allowed))

> +			numa_cpus = NULL;
> +	}
>   
> -	if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc))
> -		llc_cpus = task_cpumask(p, llc_span(prev_cpu),
> +	if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
> +		llc_cpus = task_cpumask(p, cpus_allowed, llc_span(prev_cpu),
>   					this_cpu_cpumask_var_ptr(local_llc_idle_cpumask));
> +		if (cpumask_equal(llc_cpus, cpus_allowed))

Same here.

		if (llc_cpus && cpumask_equal(llc_cpus, cpus_allowed))

> +			llc_cpus = NULL;
> +	}
>   
>   	/*
>   	 * If WAKE_SYNC, try to migrate the wakee to the waker's CPU.
> @@ -512,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
>   		    cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
>   		    (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
>   		    !cpumask_empty(idle_cpumask(waker_node)->cpu)) {
> -			if (cpumask_test_cpu(cpu, p->cpus_ptr))
> +			if (cpumask_test_cpu(cpu, cpus_allowed))
>   				goto out_unlock;
>   		}
>   	}
> @@ -557,7 +570,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
>   		 * begin in prev_cpu's node and proceed to other nodes in
>   		 * order of increasing distance.
>   		 */
> -		cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE);
> +		cpu = scx_pick_idle_cpu(cpus_allowed, node, flags | SCX_PICK_IDLE_CORE);
>   		if (cpu >= 0)
>   			goto out_unlock;
>   
> @@ -605,7 +618,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
>   	 * in prev_cpu's node and proceed to other nodes in order of
>   	 * increasing distance.
>   	 */
> -	cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
> +	cpu = scx_pick_idle_cpu(cpus_allowed, node, flags);
>   	if (cpu >= 0)
>   		goto out_unlock;
>   
> @@ -861,7 +874,7 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
>   		goto prev_cpu;
>   
>   #ifdef CONFIG_SMP
> -	cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
> +	cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, p->cpus_ptr, 0);
>   	if (cpu >= 0) {
>   		*is_idle = true;
>   		return cpu;
> diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h
> index 511cc2221f7a8..37be78a7502b3 100644
> --- a/kernel/sched/ext_idle.h
> +++ b/kernel/sched/ext_idle.h
> @@ -27,7 +27,8 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node
>   }
>   #endif /* CONFIG_SMP */
>   
> -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags);
> +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
> +		       const struct cpumask *cpus_allowed, u64 flags);
>   void scx_idle_enable(struct sched_ext_ops *ops);
>   void scx_idle_disable(void);
>   int scx_idle_init(void);

Regards,
Changwoo Min