linux-kernel - [RFC PATCH 22/22] [DEBUG] sched/fair: Debug pick

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250220093257.9380-23-kprateek.nayak@amd.com>
Date: Thu, 20 Feb 2025 09:32:57 +0000
From: K Prateek Nayak <kprateek.nayak@....com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
	Juri Lelli <juri.lelli@...hat.com>, Vincent Guittot
	<vincent.guittot@...aro.org>, Valentin Schneider <vschneid@...hat.com>, "Ben
 Segall" <bsegall@...gle.com>, Thomas Gleixner <tglx@...utronix.de>, "Andy
 Lutomirski" <luto@...nel.org>, <linux-kernel@...r.kernel.org>
CC: Dietmar Eggemann <dietmar.eggemann@....com>, Steven Rostedt
	<rostedt@...dmis.org>, Mel Gorman <mgorman@...e.de>, "Sebastian Andrzej
 Siewior" <bigeasy@...utronix.de>, Clark Williams <clrkwllms@...nel.org>,
	<linux-rt-devel@...ts.linux.dev>, Tejun Heo <tj@...nel.org>, "Frederic
 Weisbecker" <frederic@...nel.org>, Barret Rhoden <brho@...gle.com>, "Petr
 Mladek" <pmladek@...e.com>, Josh Don <joshdon@...gle.com>, Qais Yousef
	<qyousef@...alina.io>, "Paul E. McKenney" <paulmck@...nel.org>, David Vernet
	<dvernet@...a.com>, K Prateek Nayak <kprateek.nayak@....com>, "Gautham R.
 Shenoy" <gautham.shenoy@....com>, Swapnil Sapkal <swapnil.sapkal@....com>
Subject: [RFC PATCH 22/22] [DEBUG] sched/fair: Debug pick_eevdf() returning NULL!

Dump the stats of cfs_rq and the entities queued on it when pick_eevdf()
fails to find a runnable entity. Take the panic that follows since this
scenario implies breakdown of the scheduling algorithm.

XXX: This will only build with CONFIG_CFS_BANDWIDTH enabled.

Signed-off-by: K Prateek Nayak <kprateek.nayak@....com>
---
 kernel/sched/fair.c | 75 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 97566a043398..270e5f4b2741 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5610,6 +5610,78 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 
 static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags);
 
+static void debug_print_se(int cpu, struct sched_entity *se, bool h_throttled)
+{
+	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+	pr_warn("CPU%d: se: load(%lu) vruntime(%lld) entity_key(%lld) deadline(%lld) min_vruntime(%lld) on_rq(%d)\n", cpu, scale_load_down(se->load.weight), se->vruntime, entity_key(cfs_rq, se), se->deadline, se->min.vruntime, se->on_rq);
+	pr_warn("CPU%d: se kcs: kernel_cs_count(%d) min_kcs_vruntime(%lld) pick_entity(%d)\n", cpu, se->kernel_cs_count, se->min.kcs_vruntime, pick_entity(cfs_rq, se, h_throttled));
+}
+
+static void debug_print_cfs_rq(int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se, bool h_throttled)
+{
+	pr_warn("CPU%d: ----- cfs_rq ----\n", cpu);
+	pr_warn("CPU%d: cfs_rq: throttled?(%d) cfs_rq->throttled(%d) h_nr_queued(%d) h_nr_runnable(%d) nr_queued(%d) gse->kernel_cs_count(%d)\n", cpu, h_throttled, cfs_rq->throttled, cfs_rq->h_nr_queued, cfs_rq->h_nr_runnable, cfs_rq->nr_queued, (se)? se->kernel_cs_count: -1);
+	pr_warn("CPU%d: cfs_rq EEVDF: avg_vruntime(%lld) avg_load(%lld) avg_kcs_vruntime(%lld) avg_kcs_load(%lld) \n", cpu, cfs_rq->avg_vruntime, cfs_rq->avg_load, cfs_rq->avg_kcs_vruntime, cfs_rq->avg_kcs_load);
+
+	if (cfs_rq->curr) {
+		pr_warn("CPU%d: ----- cfs_rq->curr ----\n", cpu);
+		debug_print_se(cpu, cfs_rq->curr, h_throttled);
+	}
+	pr_warn("CPU%d: ----- cfs_rq done ----\n", cpu);
+}
+
+static void debug_recursive(int cpu, struct rb_node *node, bool h_throttled)
+{
+	debug_print_se(cpu, __node_2_se(node), h_throttled);
+
+	if (node->rb_left) {
+		pr_warn("CPU%d: ----- Left Subtree ----\n", cpu);
+		debug_recursive(cpu, node->rb_left, h_throttled);
+		pr_warn("CPU%d: ----- Left Subtree Done ----\n", cpu);
+	}
+
+	if (node->rb_right) {
+		pr_warn("CPU%d: ----- Right Subtree ----\n", cpu);
+		debug_recursive(cpu, node->rb_right, h_throttled);
+		pr_warn("CPU%d: ----- Right Subtree Done ----\n", cpu);
+	}
+}
+
+static void debug_pick_next_entity(struct cfs_rq *cfs_rq, bool h_throttled)
+{
+	struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
+	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
+	struct task_struct *p = rq_of(cfs_rq)->curr;
+	int cpu = smp_processor_id();
+
+	if (p) {
+		pr_warn("CPU%d: ----- current task ----\n", cpu);
+		pr_warn("CPU%d: pid(%d) comm(%s) task_cpu(%d) task_on_rq_queued(%d) task_on_rq_migrating(%d) normal_policy(%d) idle_policy(%d)\n", cpu, p->pid, p->comm, task_cpu(p), task_on_rq_queued(p), task_on_rq_migrating(p), normal_policy(p->policy), idle_policy(p->policy));
+		pr_warn("CPU%d: ----- current task done ----\n", cpu);
+	}
+
+	debug_print_cfs_rq(cpu, cfs_rq, se, h_throttled);
+
+	if (node) {
+		pr_warn("CPU%d: ----- rbtree traversal: root ----\n", cpu);
+		debug_recursive(cpu, node, h_throttled);
+		pr_warn("CPU%d: ----- rbtree done ----\n", cpu);
+	}
+
+	cfs_rq = cfs_rq_of(se);
+	se = parent_entity(se);
+
+	for_each_sched_entity(se) {
+		pr_warn("CPU%d: ----- parent cfs_rq ----\n", cpu);
+		debug_print_cfs_rq(cpu, cfs_rq, se, h_throttled);
+
+		cfs_rq = cfs_rq_of(se);
+	}
+
+	debug_print_cfs_rq(cpu, cfs_rq, NULL, false);
+}
+
 /*
  * Pick the next process, keeping these things in mind, in this order:
  * 1) keep things fair between processes/task groups
@@ -5633,6 +5705,9 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq, bool h_throttled)
 	}
 
 	se = pick_eevdf(cfs_rq, h_throttled);
+	if (!se)
+		debug_pick_next_entity(cfs_rq, h_throttled);
+
 	if (se->sched_delayed) {
 		dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
 		/*
-- 
2.43.0