lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250317175717.163267-3-arighi@nvidia.com>
Date: Mon, 17 Mar 2025 18:53:25 +0100
From: Andrea Righi <arighi@...dia.com>
To: Tejun Heo <tj@...nel.org>,
	David Vernet <void@...ifault.com>,
	Changwoo Min <changwoo@...lia.com>
Cc: Joel Fernandes <joelagnelf@...dia.com>,
	bpf@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 2/6] sched_ext: idle: Explicitly pass allowed cpumask to scx_select_cpu_dfl()

Modify scx_select_cpu_dfl() to take the allowed cpumask as an explicit
argument, instead of implicitly using @p->cpus_ptr.

This prepares for future changes where arbitrary cpumasks may be passed
to the built-in idle CPU selection policy.

This is a pure refactoring with no functional changes.

Signed-off-by: Andrea Righi <arighi@...dia.com>
---
 kernel/sched/ext.c      |  2 +-
 kernel/sched/ext_idle.c | 45 ++++++++++++++++++++++++++---------------
 kernel/sched/ext_idle.h |  3 ++-
 3 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 06561d6717c9a..f42352e8d889e 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3395,7 +3395,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
 	} else {
 		s32 cpu;
 
-		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
+		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, p->cpus_ptr, 0);
 		if (cpu >= 0) {
 			p->scx.slice = SCX_SLICE_DFL;
 			p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c
index e1e020c27c07c..a90d85bce1ccb 100644
--- a/kernel/sched/ext_idle.c
+++ b/kernel/sched/ext_idle.c
@@ -397,11 +397,19 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops)
 		static_branch_disable_cpuslocked(&scx_selcpu_topo_numa);
 }
 
+static inline bool task_allowed_all_cpus(const struct task_struct *p)
+{
+	return p->nr_cpus_allowed >= num_possible_cpus();
+}
+
 /*
- * Return the subset of @cpus that task @p can use or NULL if none of the
- * CPUs in the @cpus cpumask can be used.
+ * Return the subset of @cpus that task @p can use, according to
+ * @cpus_allowed, or NULL if none of the CPUs in the @cpus cpumask can be
+ * used.
  */
-static const struct cpumask *task_cpumask(const struct task_struct *p, const struct cpumask *cpus,
+static const struct cpumask *task_cpumask(const struct task_struct *p,
+					  const struct cpumask *cpus_allowed,
+					  const struct cpumask *cpus,
 					  struct cpumask *local_cpus)
 {
 	/*
@@ -410,12 +418,10 @@ static const struct cpumask *task_cpumask(const struct task_struct *p, const str
 	 * intersection of the architecture's cpumask and the task's
 	 * allowed cpumask.
 	 */
-	if (!cpus || p->nr_cpus_allowed >= num_possible_cpus() ||
-	    cpumask_subset(cpus, p->cpus_ptr))
+	if (!cpus || task_allowed_all_cpus(p) || cpumask_subset(cpus, cpus_allowed))
 		return cpus;
 
-	if (!cpumask_equal(cpus, p->cpus_ptr) &&
-	    cpumask_and(local_cpus, cpus, p->cpus_ptr))
+	if (cpumask_and(local_cpus, cpus, cpus_allowed))
 		return local_cpus;
 
 	return NULL;
@@ -454,7 +460,8 @@ static const struct cpumask *task_cpumask(const struct task_struct *p, const str
  * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because
  * we never call ops.select_cpu() for them, see select_task_rq().
  */
-s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags)
+s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+		       const struct cpumask *cpus_allowed, u64 flags)
 {
 	const struct cpumask *llc_cpus = NULL, *numa_cpus = NULL;
 	int node = scx_cpu_node_if_enabled(prev_cpu);
@@ -469,13 +476,19 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
 	 * Determine the subset of CPUs that the task can use in its
 	 * current LLC and node.
 	 */
-	if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa))
-		numa_cpus = task_cpumask(p, numa_span(prev_cpu),
+	if (static_branch_maybe(CONFIG_NUMA, &scx_selcpu_topo_numa)) {
+		numa_cpus = task_cpumask(p, cpus_allowed, numa_span(prev_cpu),
 					 this_cpu_cpumask_var_ptr(local_numa_idle_cpumask));
+		if (cpumask_equal(numa_cpus, cpus_allowed))
+			numa_cpus = NULL;
+	}
 
-	if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc))
-		llc_cpus = task_cpumask(p, llc_span(prev_cpu),
+	if (static_branch_maybe(CONFIG_SCHED_MC, &scx_selcpu_topo_llc)) {
+		llc_cpus = task_cpumask(p, cpus_allowed, llc_span(prev_cpu),
 					this_cpu_cpumask_var_ptr(local_llc_idle_cpumask));
+		if (cpumask_equal(llc_cpus, cpus_allowed))
+			llc_cpus = NULL;
+	}
 
 	/*
 	 * If WAKE_SYNC, try to migrate the wakee to the waker's CPU.
@@ -512,7 +525,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
 		    cpu_rq(cpu)->scx.local_dsq.nr == 0 &&
 		    (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) &&
 		    !cpumask_empty(idle_cpumask(waker_node)->cpu)) {
-			if (cpumask_test_cpu(cpu, p->cpus_ptr))
+			if (cpumask_test_cpu(cpu, cpus_allowed))
 				goto out_unlock;
 		}
 	}
@@ -557,7 +570,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
 		 * begin in prev_cpu's node and proceed to other nodes in
 		 * order of increasing distance.
 		 */
-		cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE);
+		cpu = scx_pick_idle_cpu(cpus_allowed, node, flags | SCX_PICK_IDLE_CORE);
 		if (cpu >= 0)
 			goto out_unlock;
 
@@ -605,7 +618,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
 	 * in prev_cpu's node and proceed to other nodes in order of
 	 * increasing distance.
 	 */
-	cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
+	cpu = scx_pick_idle_cpu(cpus_allowed, node, flags);
 	if (cpu >= 0)
 		goto out_unlock;
 
@@ -861,7 +874,7 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
 		goto prev_cpu;
 
 #ifdef CONFIG_SMP
-	cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0);
+	cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, p->cpus_ptr, 0);
 	if (cpu >= 0) {
 		*is_idle = true;
 		return cpu;
diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h
index 511cc2221f7a8..37be78a7502b3 100644
--- a/kernel/sched/ext_idle.h
+++ b/kernel/sched/ext_idle.h
@@ -27,7 +27,8 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node
 }
 #endif /* CONFIG_SMP */
 
-s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags);
+s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+		       const struct cpumask *cpus_allowed, u64 flags);
 void scx_idle_enable(struct sched_ext_ops *ops);
 void scx_idle_disable(void);
 int scx_idle_init(void);
-- 
2.48.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ