[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <178bf43d7cbc9b2c9aea408dd56b87391067df37.1754712565.git.tim.c.chen@linux.intel.com>
Date: Sat, 9 Aug 2025 13:08:11 +0800
From: Chen Yu <yu.c.chen@...el.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
K Prateek Nayak <kprateek.nayak@....com>,
"Gautham R . Shenoy" <gautham.shenoy@....com>
Cc: Vincent Guittot <vincent.guittot@...aro.org>,
Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
Libo Chen <libo.chen@...cle.com>,
Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
Hillf Danton <hdanton@...a.com>,
Shrikanth Hegde <sshegde@...ux.ibm.com>,
Jianyong Wu <jianyong.wu@...look.com>,
Yangyu Chen <cyy@...self.name>,
Tingyin Duan <tingyin.duan@...il.com>,
Vern Hao <vernhao@...cent.com>,
Len Brown <len.brown@...el.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Aubrey Li <aubrey.li@...el.com>,
Zhao Liu <zhao1.liu@...el.com>,
Chen Yu <yu.chen.surf@...il.com>,
Chen Yu <yu.c.chen@...el.com>,
linux-kernel@...r.kernel.org
Subject: [RFC PATCH v4 23/28] sched: Scan a task's preferred node for preferred LLC
When sched_cache is enabled, fully scanning all online
CPUs to find the hottest one is very costly. As a first
step, limit the scan to only the CPUs within the task's
preferred node. If the node containing the task's preferred
LLC is not in the CPU scan mask, add it. Additionally, if
the node where the current task is running is not in the
scan mask, add it too.
Suggested-by: Jianyong Wu <jianyong.wu@...look.com>
Suggested-by: Shrikanth Hegde <sshegde@...ux.ibm.com>
Co-developed-by: Tim Chen <tim.c.chen@...ux.intel.com>
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
Signed-off-by: Chen Yu <yu.c.chen@...el.com>
---
kernel/sched/fair.c | 36 +++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 64f757ad39fc..420d3a080990 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1390,13 +1390,36 @@ static void task_tick_cache(struct rq *rq, struct task_struct *p)
}
}
+static void get_scan_cpumasks(cpumask_var_t cpus, int cache_cpu,
+ int pref_nid, int curr_cpu)
+{
+#ifdef CONFIG_NUMA_BALANCING
+ /* first honor the task's preferred node */
+ if (pref_nid != NUMA_NO_NODE)
+ cpumask_or(cpus, cpus, cpumask_of_node(pref_nid));
+#endif
+
+ /* secondly honor the task's cache CPU if it is not included */
+ if (cache_cpu != -1 && !cpumask_test_cpu(cache_cpu, cpus))
+ cpumask_or(cpus, cpus,
+ cpumask_of_node(cpu_to_node(cache_cpu)));
+
+ /*
+ * Thirdly honor the task's current running node
+ * as the last resort.
+ */
+ if (!cpumask_test_cpu(curr_cpu, cpus))
+ cpumask_or(cpus, cpus, cpumask_of_node(cpu_to_node(curr_cpu)));
+}
+
static void __no_profile task_cache_work(struct callback_head *work)
{
struct task_struct *p = current;
struct mm_struct *mm = p->mm;
unsigned long m_a_occ = 0;
unsigned long last_m_a_occ = 0;
- int cpu, m_a_cpu = -1;
+ int cpu, m_a_cpu = -1, cache_cpu,
+ pref_nid = NUMA_NO_NODE, curr_cpu = smp_processor_id();
cpumask_var_t cpus;
WARN_ON_ONCE(work != &p->cache_work);
@@ -1406,11 +1429,18 @@ static void __no_profile task_cache_work(struct callback_head *work)
if (p->flags & PF_EXITING)
return;
- if (!alloc_cpumask_var(&cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&cpus, GFP_KERNEL))
return;
+ cache_cpu = mm->mm_sched_cpu;
+#ifdef CONFIG_NUMA_BALANCING
+ if (static_branch_likely(&sched_numa_balancing))
+ pref_nid = p->numa_preferred_nid;
+#endif
+
scoped_guard (cpus_read_lock) {
- cpumask_copy(cpus, cpu_online_mask);
+ get_scan_cpumasks(cpus, cache_cpu,
+ pref_nid, curr_cpu);
for_each_cpu(cpu, cpus) {
/* XXX sched_cluster_active */
--
2.25.1
Powered by blists - more mailing lists