[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <5d5a6e243b88d47a744f3c84d2a3a74832a6ef35.1745199017.git.yu.c.chen@intel.com>
Date: Mon, 21 Apr 2025 11:25:33 +0800
From: Chen Yu <yu.c.chen@...el.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
K Prateek Nayak <kprateek.nayak@....com>,
"Gautham R . Shenoy" <gautham.shenoy@....com>
Cc: Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
Tim Chen <tim.c.chen@...el.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Libo Chen <libo.chen@...cle.com>,
Abel Wu <wuyun.abel@...edance.com>,
Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
Hillf Danton <hdanton@...a.com>,
linux-kernel@...r.kernel.org,
Chen Yu <yu.c.chen@...el.com>
Subject: [RFC PATCH 5/5] sched: Add ftrace to track task migration and load balance within and across LLC
[Not for upstream]
Introduce these ftrace events for debugging purposes.
The task migration activity is an important indicator to
infer the performance regression.
Use the following bpftrace script to capture the task migrations:
tracepoint:sched:sched_attach_task
{
$src_cpu = args->src_cpu;
$dst_cpu = args->dst_cpu;
$src_llc = args->src_llc;
$dst_llc = args->dst_llc;
$idle = args->idle;
if ($src_llc == $dst_llc) {
@lb_mig_1llc[$idle] = count();
} else {
@lb_mig_2llc[$idle] = count();
}
}
tracepoint:sched:sched_select_task_rq
{
$new_cpu = args->new_cpu;
$old_cpu = args->old_cpu;
$new_llc = args->new_llc;
$old_llc = args->old_llc;
if ($new_cpu != $old_cpu) {
if ($new_llc == $old_llc) {
@wake_mig_1llc[$new_llc] = count();
} else {
@wake_mig_2llc = count();
}
}
}
interval:s:10
{
time("\n%H:%M:%S scheduler statistics: \n");
print(@lb_mig_1llc);
clear(@lb_mig_1llc);
print(@lb_mig_2llc);
clear(@lb_mig_2llc);
print(@wake_mig_1llc);
clear(@wake_mig_1llc);
print(@wake_mig_2llc);
clear(@wake_mig_2llc);
}
Signed-off-by: Chen Yu <yu.c.chen@...el.com>
---
include/trace/events/sched.h | 51 ++++++++++++++++++++++++++++++++++++
kernel/sched/fair.c | 24 ++++++++++++-----
2 files changed, 69 insertions(+), 6 deletions(-)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 3bec9fb73a36..9995e09525ed 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -10,6 +10,57 @@
#include <linux/tracepoint.h>
#include <linux/binfmts.h>
+TRACE_EVENT(sched_attach_task,
+
+ TP_PROTO(int src_cpu, int dst_cpu, int src_llc, int dst_llc, int idle),
+
+ TP_ARGS(src_cpu, dst_cpu, src_llc, dst_llc, idle),
+
+ TP_STRUCT__entry(
+ __field( int, src_cpu )
+ __field( int, dst_cpu )
+ __field( int, src_llc )
+ __field( int, dst_llc )
+ __field( int, idle )
+ ),
+
+ TP_fast_assign(
+ __entry->src_cpu = src_cpu;
+ __entry->dst_cpu = dst_cpu;
+ __entry->src_llc = src_llc;
+ __entry->dst_llc = dst_llc;
+ __entry->idle = idle;
+ ),
+
+ TP_printk("src_cpu=%d dst_cpu=%d src_llc=%d dst_llc=%d idle=%d",
+ __entry->src_cpu, __entry->dst_cpu, __entry->src_llc,
+ __entry->dst_llc, __entry->idle)
+);
+
+TRACE_EVENT(sched_select_task_rq,
+
+ TP_PROTO(int new_cpu, int old_cpu, int new_llc, int old_llc),
+
+ TP_ARGS(new_cpu, old_cpu, new_llc, old_llc),
+
+ TP_STRUCT__entry(
+ __field( int, new_cpu )
+ __field( int, old_cpu )
+ __field( int, new_llc )
+ __field( int, old_llc )
+ ),
+
+ TP_fast_assign(
+ __entry->new_cpu = new_cpu;
+ __entry->old_cpu = old_cpu;
+ __entry->new_llc = new_llc;
+ __entry->old_llc = old_llc;
+ ),
+
+ TP_printk("new_cpu=%d old_cpu=%d new_llc=%d old_llc=%d",
+ __entry->new_cpu, __entry->old_cpu, __entry->new_llc, __entry->old_llc)
+);
+
/*
* Tracepoint for calling kthread_stop, performed to end a kthread:
*/
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f74d8773c811..635fd3a6009c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8902,7 +8902,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
struct sched_domain *tmp, *sd = NULL;
int cpu = smp_processor_id();
- int new_cpu = prev_cpu;
+ int new_cpu = prev_cpu, orig_prev_cpu = prev_cpu;
int want_affine = 0;
/* SD_flags and WF_flags share the first nibble */
int sd_flag = wake_flags & 0xF;
@@ -8965,6 +8965,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
}
+ trace_sched_select_task_rq(new_cpu, orig_prev_cpu,
+ per_cpu(sd_llc_id, new_cpu),
+ per_cpu(sd_llc_id, orig_prev_cpu));
+
return new_cpu;
}
@@ -10026,11 +10030,17 @@ static int detach_tasks(struct lb_env *env)
/*
* attach_task() -- attach the task detached by detach_task() to its new rq.
*/
-static void attach_task(struct rq *rq, struct task_struct *p)
+static void attach_task(struct rq *rq, struct task_struct *p, struct lb_env *env)
{
lockdep_assert_rq_held(rq);
WARN_ON_ONCE(task_rq(p) != rq);
+
+ if (env)
+ trace_sched_attach_task(env->src_cpu, env->dst_cpu,
+ per_cpu(sd_llc_id, env->src_cpu),
+ per_cpu(sd_llc_id, env->dst_cpu),
+ env->idle);
activate_task(rq, p, ENQUEUE_NOCLOCK);
wakeup_preempt(rq, p, 0);
}
@@ -10039,13 +10049,13 @@ static void attach_task(struct rq *rq, struct task_struct *p)
* attach_one_task() -- attaches the task returned from detach_one_task() to
* its new rq.
*/
-static void attach_one_task(struct rq *rq, struct task_struct *p)
+static void attach_one_task(struct rq *rq, struct task_struct *p, struct lb_env *env)
{
struct rq_flags rf;
rq_lock(rq, &rf);
update_rq_clock(rq);
- attach_task(rq, p);
+ attach_task(rq, p, env);
rq_unlock(rq, &rf);
}
@@ -10066,7 +10076,7 @@ static void attach_tasks(struct lb_env *env)
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
- attach_task(env->dst_rq, p);
+ attach_task(env->dst_rq, p, env);
}
rq_unlock(env->dst_rq, &rf);
@@ -12457,6 +12467,7 @@ static int active_load_balance_cpu_stop(void *data)
struct sched_domain *sd;
struct task_struct *p = NULL;
struct rq_flags rf;
+ struct lb_env env_tmp;
rq_lock_irq(busiest_rq, &rf);
/*
@@ -12512,6 +12523,7 @@ static int active_load_balance_cpu_stop(void *data)
} else {
schedstat_inc(sd->alb_failed);
}
+ memcpy(&env_tmp, &env, sizeof(env));
}
rcu_read_unlock();
out_unlock:
@@ -12519,7 +12531,7 @@ static int active_load_balance_cpu_stop(void *data)
rq_unlock(busiest_rq, &rf);
if (p)
- attach_one_task(target_rq, p);
+ attach_one_task(target_rq, p, sd ? &env_tmp : NULL);
local_irq_enable();
--
2.25.1
Powered by blists - more mailing lists