[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <63091f7ca7bb473fbc176af86a87d27a07a6e149.1764801860.git.tim.c.chen@linux.intel.com>
Date: Wed, 3 Dec 2025 15:07:26 -0800
From: Tim Chen <tim.c.chen@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
K Prateek Nayak <kprateek.nayak@....com>,
"Gautham R . Shenoy" <gautham.shenoy@....com>,
Vincent Guittot <vincent.guittot@...aro.org>
Cc: Tim Chen <tim.c.chen@...ux.intel.com>,
Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
Hillf Danton <hdanton@...a.com>,
Shrikanth Hegde <sshegde@...ux.ibm.com>,
Jianyong Wu <jianyong.wu@...look.com>,
Yangyu Chen <cyy@...self.name>,
Tingyin Duan <tingyin.duan@...il.com>,
Vern Hao <vernhao@...cent.com>,
Vern Hao <haoxing990@...il.com>,
Len Brown <len.brown@...el.com>,
Aubrey Li <aubrey.li@...el.com>,
Zhao Liu <zhao1.liu@...el.com>,
Chen Yu <yu.chen.surf@...il.com>,
Chen Yu <yu.c.chen@...el.com>,
Adam Li <adamli@...amperecomputing.com>,
Aaron Lu <ziqianlu@...edance.com>,
Tim Chen <tim.c.chen@...el.com>,
linux-kernel@...r.kernel.org
Subject: [PATCH v2 07/23] sched/cache: Introduce per runqueue task LLC preference counter
Each runqueue is assigned an array where each element tracks
the number of tasks preferring a given LLC, indexed from 0 to
max_llcs - 1.
For example, rq->nr_pref_llc[3] = 2 signifies that there are 2 tasks on
this runqueue which prefer to run within LLC3.
The load balancer can use this information to identify busy
runqueues and migrate tasks to their preferred LLC domains.
This array will be reallocated at runtime if the number of LLCs
increases due to CPU hotplug. Only extending the buffer(rather
than shrinking it) is supported to simplify the implementation.
Introduce the buffer allocation mechanism, and the statistics
will be calculated in the subsequent patch.
Co-developed-by: Chen Yu <yu.c.chen@...el.com>
Signed-off-by: Chen Yu <yu.c.chen@...el.com>
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
---
Notes:
v1->v2:
Remove static allocation of per runqueue LLC preference arrays.
Allocate array size to the actual number of LLCs online. (Peter Zijlstra, Madadi Vineeth Reddy)
kernel/sched/core.c | 1 +
kernel/sched/sched.h | 1 +
kernel/sched/topology.c | 117 +++++++++++++++++++++++++++++++++++++++-
3 files changed, 118 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 48626c81ba8e..ce533dc485f5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8800,6 +8800,7 @@ void __init sched_init(void)
#ifdef CONFIG_SCHED_CACHE
raw_spin_lock_init(&rq->cpu_epoch_lock);
rq->cpu_epoch_next = jiffies;
+ rq->nr_pref_llc = NULL;
#endif
zalloc_cpumask_var_node(&rq->scratch_mask, GFP_KERNEL, cpu_to_node(i));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ee8b70647835..8f2a779825e4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1129,6 +1129,7 @@ struct rq {
#ifdef CONFIG_SCHED_CACHE
unsigned int nr_pref_llc_running;
unsigned int nr_llc_running;
+ unsigned int *nr_pref_llc;
#endif
#ifdef CONFIG_NO_HZ_COMMON
unsigned long last_blocked_load_update_tick;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index f25d950ab015..d583399fc6a1 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -17,8 +17,121 @@ void sched_domains_mutex_unlock(void)
mutex_unlock(&sched_domains_mutex);
}
+/* the number of max LLCs being detected */
+static int new_max_llcs;
+/* the current number of max LLCs */
int max_llcs;
+#ifdef CONFIG_SCHED_CACHE
+
+static unsigned int *alloc_new_pref_llcs(unsigned int *old, unsigned int **gc)
+{
+ unsigned int *new = NULL;
+
+ new = kcalloc(new_max_llcs, sizeof(unsigned int),
+ GFP_KERNEL | __GFP_NOWARN);
+
+ if (!new) {
+ *gc = NULL;
+ } else {
+ /*
+ * Place old entry in garbage collector
+ * for later disposal.
+ */
+ *gc = old;
+ }
+ return new;
+}
+
+static void populate_new_pref_llcs(unsigned int *old, unsigned int *new)
+{
+ int i;
+
+ if (!old)
+ return;
+
+ for (i = 0; i < max_llcs; i++)
+ new[i] = old[i];
+}
+
+static int resize_llc_pref(void)
+{
+ unsigned int *__percpu *tmp_llc_pref;
+ int i, ret = 0;
+
+ if (new_max_llcs <= max_llcs)
+ return 0;
+
+ /*
+ * Allocate temp percpu pointer for old llc_pref,
+ * which will be released after switching to the
+ * new buffer.
+ */
+ tmp_llc_pref = alloc_percpu_noprof(unsigned int *);
+ if (!tmp_llc_pref)
+ return -ENOMEM;
+
+ for_each_present_cpu(i)
+ *per_cpu_ptr(tmp_llc_pref, i) = NULL;
+
+ /*
+ * Resize the per rq nr_pref_llc buffer and
+ * switch to this new buffer.
+ */
+ for_each_present_cpu(i) {
+ struct rq_flags rf;
+ unsigned int *new;
+ struct rq *rq;
+
+ rq = cpu_rq(i);
+ new = alloc_new_pref_llcs(rq->nr_pref_llc, per_cpu_ptr(tmp_llc_pref, i));
+ if (!new) {
+ ret = -ENOMEM;
+
+ goto release_old;
+ }
+
+ /*
+ * Locking rq ensures that rq->nr_pref_llc values
+ * don't change with new task enqueue/dequeue
+ * when we repopulate the newly enlarged array.
+ */
+ rq_lock_irqsave(rq, &rf);
+ populate_new_pref_llcs(rq->nr_pref_llc, new);
+ rq->nr_pref_llc = new;
+ rq_unlock_irqrestore(rq, &rf);
+ }
+
+release_old:
+ /*
+ * Load balance is done under rcu_lock.
+ * Wait for load balance before and during resizing to
+ * be done. They may refer to old nr_pref_llc[]
+ * that hasn't been resized.
+ */
+ synchronize_rcu();
+ for_each_present_cpu(i)
+ kfree(*per_cpu_ptr(tmp_llc_pref, i));
+
+ free_percpu(tmp_llc_pref);
+
+ /* succeed and update */
+ if (!ret)
+ max_llcs = new_max_llcs;
+
+ return ret;
+}
+
+#else
+
+static int resize_llc_pref(void)
+{
+ max_llcs = new_max_llcs;
+ return 0;
+}
+
+#endif
+
/* Protected by sched_domains_mutex: */
static cpumask_var_t sched_domains_tmpmask;
static cpumask_var_t sched_domains_tmpmask2;
@@ -714,7 +827,7 @@ static int update_llc_id(struct sched_domain *sd,
*
* For both cases, we want to increase the number of LLCs.
*/
- per_cpu(sd_llc_id, cpu) = max_llcs++;
+ per_cpu(sd_llc_id, cpu) = new_max_llcs++;
return per_cpu(sd_llc_id, cpu);
}
@@ -2674,6 +2787,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (has_cluster)
static_branch_inc_cpuslocked(&sched_cluster_active);
+ resize_llc_pref();
+
if (rq && sched_debug_verbose)
pr_info("root domain span: %*pbl\n", cpumask_pr_args(cpu_map));
--
2.32.0
Powered by blists - more mailing lists