lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <5d5a6e243b88d47a744f3c84d2a3a74832a6ef35.1745199017.git.yu.c.chen@intel.com>
Date: Mon, 21 Apr 2025 11:25:33 +0800
From: Chen Yu <yu.c.chen@...el.com>
To: Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	"Gautham R . Shenoy" <gautham.shenoy@....com>
Cc: Juri Lelli <juri.lelli@...hat.com>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	Tim Chen <tim.c.chen@...el.com>,
	Vincent Guittot <vincent.guittot@...aro.org>,
	Libo Chen <libo.chen@...cle.com>,
	Abel Wu <wuyun.abel@...edance.com>,
	Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
	Hillf Danton <hdanton@...a.com>,
	linux-kernel@...r.kernel.org,
	Chen Yu <yu.c.chen@...el.com>
Subject: [RFC PATCH 5/5] sched: Add ftrace to track task migration and load balance within and across LLC

[Not for upstream]
Introduce these ftrace events for debugging purposes.
The task migration activity is an important indicator to
infer the performance regression.

Use the following bpftrace script to capture the task migrations:

tracepoint:sched:sched_attach_task
{
  $src_cpu = args->src_cpu;
  $dst_cpu = args->dst_cpu;
  $src_llc = args->src_llc;
  $dst_llc = args->dst_llc;
  $idle = args->idle;

  if ($src_llc == $dst_llc) {
    @lb_mig_1llc[$idle] = count();
  } else {
    @lb_mig_2llc[$idle] = count();
  }
}

tracepoint:sched:sched_select_task_rq
{
  $new_cpu = args->new_cpu;
  $old_cpu = args->old_cpu;
  $new_llc = args->new_llc;
  $old_llc = args->old_llc;

  if ($new_cpu != $old_cpu) {
    if ($new_llc == $old_llc) {
      @wake_mig_1llc[$new_llc] = count();
    } else {
      @wake_mig_2llc = count();
    }
  }
}

interval:s:10
{
        time("\n%H:%M:%S scheduler statistics: \n");
        print(@lb_mig_1llc);
        clear(@lb_mig_1llc);
        print(@lb_mig_2llc);
        clear(@lb_mig_2llc);
        print(@wake_mig_1llc);
        clear(@wake_mig_1llc);
        print(@wake_mig_2llc);
        clear(@wake_mig_2llc);
}

Signed-off-by: Chen Yu <yu.c.chen@...el.com>
---
 include/trace/events/sched.h | 51 ++++++++++++++++++++++++++++++++++++
 kernel/sched/fair.c          | 24 ++++++++++++-----
 2 files changed, 69 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 3bec9fb73a36..9995e09525ed 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -10,6 +10,57 @@
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
+TRACE_EVENT(sched_attach_task,
+
+	TP_PROTO(int src_cpu, int dst_cpu, int src_llc, int dst_llc, int idle),
+
+	TP_ARGS(src_cpu, dst_cpu, src_llc, dst_llc, idle),
+
+	TP_STRUCT__entry(
+		__field(	int,	src_cpu		)
+		__field(	int,	dst_cpu		)
+		__field(	int,	src_llc		)
+		__field(	int,	dst_llc		)
+		__field(	int,	idle		)
+	),
+
+	TP_fast_assign(
+		__entry->src_cpu	= src_cpu;
+		__entry->dst_cpu	= dst_cpu;
+		__entry->src_llc	= src_llc;
+		__entry->dst_llc	= dst_llc;
+		__entry->idle		= idle;
+	),
+
+	TP_printk("src_cpu=%d dst_cpu=%d src_llc=%d dst_llc=%d idle=%d",
+		  __entry->src_cpu, __entry->dst_cpu, __entry->src_llc,
+		  __entry->dst_llc, __entry->idle)
+);
+
+TRACE_EVENT(sched_select_task_rq,
+
+	TP_PROTO(int new_cpu, int old_cpu, int new_llc, int old_llc),
+
+	TP_ARGS(new_cpu, old_cpu, new_llc, old_llc),
+
+	TP_STRUCT__entry(
+		__field(	int,	new_cpu		)
+		__field(	int,	old_cpu		)
+		__field(	int,	new_llc		)
+		__field(	int,	old_llc		)
+	),
+
+	TP_fast_assign(
+		__entry->new_cpu	= new_cpu;
+		__entry->old_cpu	= old_cpu;
+		__entry->new_llc	= new_llc;
+		__entry->old_llc	= old_llc;
+	),
+
+	TP_printk("new_cpu=%d old_cpu=%d new_llc=%d old_llc=%d",
+		  __entry->new_cpu, __entry->old_cpu, __entry->new_llc, __entry->old_llc)
+);
+
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f74d8773c811..635fd3a6009c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8902,7 +8902,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
-	int new_cpu = prev_cpu;
+	int new_cpu = prev_cpu, orig_prev_cpu = prev_cpu;
 	int want_affine = 0;
 	/* SD_flags and WF_flags share the first nibble */
 	int sd_flag = wake_flags & 0xF;
@@ -8965,6 +8965,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
 		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 	}
 
+	trace_sched_select_task_rq(new_cpu, orig_prev_cpu,
+				   per_cpu(sd_llc_id, new_cpu),
+				   per_cpu(sd_llc_id, orig_prev_cpu));
+
 	return new_cpu;
 }
 
@@ -10026,11 +10030,17 @@ static int detach_tasks(struct lb_env *env)
 /*
  * attach_task() -- attach the task detached by detach_task() to its new rq.
  */
-static void attach_task(struct rq *rq, struct task_struct *p)
+static void attach_task(struct rq *rq, struct task_struct *p, struct lb_env *env)
 {
 	lockdep_assert_rq_held(rq);
 
 	WARN_ON_ONCE(task_rq(p) != rq);
+
+	if (env)
+		trace_sched_attach_task(env->src_cpu, env->dst_cpu,
+					per_cpu(sd_llc_id, env->src_cpu),
+					per_cpu(sd_llc_id, env->dst_cpu),
+					env->idle);
 	activate_task(rq, p, ENQUEUE_NOCLOCK);
 	wakeup_preempt(rq, p, 0);
 }
@@ -10039,13 +10049,13 @@ static void attach_task(struct rq *rq, struct task_struct *p)
  * attach_one_task() -- attaches the task returned from detach_one_task() to
  * its new rq.
  */
-static void attach_one_task(struct rq *rq, struct task_struct *p)
+static void attach_one_task(struct rq *rq, struct task_struct *p, struct lb_env *env)
 {
 	struct rq_flags rf;
 
 	rq_lock(rq, &rf);
 	update_rq_clock(rq);
-	attach_task(rq, p);
+	attach_task(rq, p, env);
 	rq_unlock(rq, &rf);
 }
 
@@ -10066,7 +10076,7 @@ static void attach_tasks(struct lb_env *env)
 		p = list_first_entry(tasks, struct task_struct, se.group_node);
 		list_del_init(&p->se.group_node);
 
-		attach_task(env->dst_rq, p);
+		attach_task(env->dst_rq, p, env);
 	}
 
 	rq_unlock(env->dst_rq, &rf);
@@ -12457,6 +12467,7 @@ static int active_load_balance_cpu_stop(void *data)
 	struct sched_domain *sd;
 	struct task_struct *p = NULL;
 	struct rq_flags rf;
+	struct lb_env env_tmp;
 
 	rq_lock_irq(busiest_rq, &rf);
 	/*
@@ -12512,6 +12523,7 @@ static int active_load_balance_cpu_stop(void *data)
 		} else {
 			schedstat_inc(sd->alb_failed);
 		}
+		memcpy(&env_tmp, &env, sizeof(env));
 	}
 	rcu_read_unlock();
 out_unlock:
@@ -12519,7 +12531,7 @@ static int active_load_balance_cpu_stop(void *data)
 	rq_unlock(busiest_rq, &rf);
 
 	if (p)
-		attach_one_task(target_rq, p);
+		attach_one_task(target_rq, p, sd ? &env_tmp : NULL);
 
 	local_irq_enable();
 
-- 
2.25.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ