[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <0eaf9b9f89f0d97dbf46b760421f65aee3ffe063.1764801860.git.tim.c.chen@linux.intel.com>
Date: Wed, 3 Dec 2025 15:07:42 -0800
From: Tim Chen <tim.c.chen@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
K Prateek Nayak <kprateek.nayak@....com>,
"Gautham R . Shenoy" <gautham.shenoy@....com>,
Vincent Guittot <vincent.guittot@...aro.org>
Cc: Chen Yu <yu.c.chen@...el.com>,
Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
Hillf Danton <hdanton@...a.com>,
Shrikanth Hegde <sshegde@...ux.ibm.com>,
Jianyong Wu <jianyong.wu@...look.com>,
Yangyu Chen <cyy@...self.name>,
Tingyin Duan <tingyin.duan@...il.com>,
Vern Hao <vernhao@...cent.com>,
Vern Hao <haoxing990@...il.com>,
Len Brown <len.brown@...el.com>,
Tim Chen <tim.c.chen@...ux.intel.com>,
Aubrey Li <aubrey.li@...el.com>,
Zhao Liu <zhao1.liu@...el.com>,
Chen Yu <yu.chen.surf@...il.com>,
Adam Li <adamli@...amperecomputing.com>,
Aaron Lu <ziqianlu@...edance.com>,
Tim Chen <tim.c.chen@...el.com>,
linux-kernel@...r.kernel.org
Subject: [PATCH v2 23/23] -- DO NOT APPLY!!! -- sched/cache/debug: Display the per LLC occupancy for each process via proc fs
From: Chen Yu <yu.c.chen@...el.com>
Debug patch only.
Show the per-LLC occupancy in /proc/{PID}/schedstat, with each column
corresponding to one LLC. This can be used to verify if the cache-aware
load balancer works as expected by aggregating threads onto dedicated LLCs.
Suppose there are 2 LLCs and the sampling duration is 10 seconds:
Enable the cache aware load balance:
0 12281 <--- LLC0 residency delta is 0, LLC1 is 12 seconds
0 18881
0 16217
disable the cache aware load balance:
6497 15802
9299 5435
17811 8278
Signed-off-by: Chen Yu <yu.c.chen@...el.com>
Signed-off-by: Tim Chen <tim.c.chen@...ux.intel.com>
---
fs/proc/base.c | 22 ++++++++++++++++++++++
include/linux/mm_types.h | 19 +++++++++++++++++--
include/linux/sched.h | 3 +++
kernel/sched/fair.c | 40 ++++++++++++++++++++++++++++++++++++++--
4 files changed, 80 insertions(+), 4 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6299878e3d97..f4be96f4bd01 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -518,6 +518,28 @@ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
(unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount);
+#ifdef CONFIG_SCHED_CACHE
+ if (sched_cache_enabled()) {
+ struct mm_struct *mm = task->mm;
+ u64 *llc_runtime;
+
+ if (!mm)
+ return 0;
+
+ llc_runtime = kcalloc(max_llcs, sizeof(u64), GFP_KERNEL);
+ if (!llc_runtime)
+ return 0;
+
+ if (get_mm_per_llc_runtime(task, llc_runtime))
+ goto out;
+
+ for (int i = 0; i < max_llcs; i++)
+ seq_printf(m, "%llu ", llc_runtime[i]);
+ seq_puts(m, "\n");
+out:
+ kfree(llc_runtime);
+ }
+#endif
return 0;
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 04743983de4d..255c22be7312 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -944,6 +944,10 @@ struct mm_sched {
unsigned long epoch;
};
+struct mm_time {
+ u64 runtime_ns;
+};
+
struct kioctx_table;
struct iommu_mm_data;
struct mm_struct {
@@ -1040,6 +1044,7 @@ struct mm_struct {
* See account_mm_sched() and ...
*/
struct mm_sched __percpu *pcpu_sched;
+ struct mm_time __percpu *pcpu_time;
raw_spinlock_t mm_sched_lock;
unsigned long mm_sched_epoch;
int mm_sched_cpu;
@@ -1505,16 +1510,24 @@ static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumas
#endif /* CONFIG_SCHED_MM_CID */
#ifdef CONFIG_SCHED_CACHE
-void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *pcpu_sched);
+void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *pcpu_sched,
+ struct mm_time __percpu *pcpu_time);
static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
{
struct mm_sched __percpu *pcpu_sched = alloc_percpu_noprof(struct mm_sched);
+ struct mm_time __percpu *pcpu_time;
if (!pcpu_sched)
return -ENOMEM;
- mm_init_sched(mm, pcpu_sched);
+ pcpu_time = alloc_percpu_noprof(struct mm_time);
+ if (!pcpu_time) {
+ free_percpu(mm->pcpu_sched);
+ return -ENOMEM;
+ }
+
+ mm_init_sched(mm, pcpu_sched, pcpu_time);
return 0;
}
@@ -1523,7 +1536,9 @@ static inline int mm_alloc_sched_noprof(struct mm_struct *mm)
static inline void mm_destroy_sched(struct mm_struct *mm)
{
free_percpu(mm->pcpu_sched);
+ free_percpu(mm->pcpu_time);
mm->pcpu_sched = NULL;
+ mm->pcpu_time = NULL;
}
#else /* !CONFIG_SCHED_CACHE */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95bf080bbbf0..875ac3f4208b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2442,6 +2442,9 @@ static inline bool sched_cache_enabled(void)
{
return static_branch_unlikely(&sched_cache_on);
}
+
+int get_mm_per_llc_runtime(struct task_struct *p, u64 *buf);
+extern int max_llcs;
#endif
#endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e47b4096f0a6..205208f061bb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1355,16 +1355,19 @@ static void account_llc_dequeue(struct rq *rq, struct task_struct *p)
p->sched_llc_active = false;
}
-void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
+void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched,
+ struct mm_time __percpu *_pcpu_time)
{
unsigned long epoch;
int i;
for_each_possible_cpu(i) {
struct mm_sched *pcpu_sched = per_cpu_ptr(_pcpu_sched, i);
+ struct mm_time *pcpu_time = per_cpu_ptr(_pcpu_time, i);
struct rq *rq = cpu_rq(i);
pcpu_sched->runtime = 0;
+ pcpu_time->runtime_ns = 0;
pcpu_sched->epoch = rq->cpu_epoch;
epoch = rq->cpu_epoch;
}
@@ -1379,6 +1382,8 @@ void mm_init_sched(struct mm_struct *mm, struct mm_sched __percpu *_pcpu_sched)
* the readers may get invalid mm_sched_epoch, etc.
*/
smp_store_release(&mm->pcpu_sched, _pcpu_sched);
+ /* same as above */
+ smp_store_release(&mm->pcpu_time, _pcpu_time);
}
/* because why would C be fully specified */
@@ -1428,11 +1433,39 @@ static unsigned long __no_profile fraction_mm_sched(struct rq *rq, struct mm_sch
static unsigned int task_running_on_cpu(int cpu, struct task_struct *p);
+/* p->pi_lock is hold */
+int get_mm_per_llc_runtime(struct task_struct *p, u64 *buf)
+{
+ struct mm_struct *mm = p->mm;
+ struct mm_time *pcpu_time;
+ int cpu;
+
+ if (!mm)
+ return -EINVAL;
+
+ rcu_read_lock();
+ for_each_online_cpu(cpu) {
+ int llc = llc_id(cpu);
+ u64 runtime_ms;
+
+ if (llc < 0)
+ continue;
+
+ pcpu_time = per_cpu_ptr(mm->pcpu_time, cpu);
+ runtime_ms = div_u64(pcpu_time->runtime_ns, NSEC_PER_MSEC);
+ buf[llc] += runtime_ms;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
static inline
void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
{
struct mm_struct *mm = p->mm;
struct mm_sched *pcpu_sched;
+ struct mm_time *pcpu_time;
unsigned long epoch;
int mm_sched_llc = -1;
@@ -1444,14 +1477,17 @@ void account_mm_sched(struct rq *rq, struct task_struct *p, s64 delta_exec)
/*
* init_task and kthreads don't having mm
*/
- if (!mm || !mm->pcpu_sched)
+ if (!mm || !mm->pcpu_sched || !mm->pcpu_time)
return;
pcpu_sched = per_cpu_ptr(p->mm->pcpu_sched, cpu_of(rq));
+ pcpu_time = per_cpu_ptr(p->mm->pcpu_time, cpu_of(rq));
scoped_guard (raw_spinlock, &rq->cpu_epoch_lock) {
__update_mm_sched(rq, pcpu_sched);
pcpu_sched->runtime += delta_exec;
+ /* pure runtime without decay */
+ pcpu_time->runtime_ns += delta_exec;
rq->cpu_runtime += delta_exec;
epoch = rq->cpu_epoch;
}
--
2.32.0
Powered by blists - more mailing lists