linux-kernel - Re: [REGRESSION] Re: [PATCH 00/24] Complete EEVDF

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20241209094941.GF21636@noisy.programming.kicks-ass.net>
Date: Mon, 9 Dec 2024 10:49:41 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Marcel Ziswiler <marcel.ziswiler@...ethink.co.uk>
Cc: mingo@...hat.com, juri.lelli@...hat.com, vincent.guittot@...aro.org,
	dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
	mgorman@...e.de, vschneid@...hat.com, linux-kernel@...r.kernel.org,
	kprateek.nayak@....com, wuyun.abel@...edance.com,
	youssefesmat@...omium.org, tglx@...utronix.de, efault@....de
Subject: Re: [REGRESSION] Re: [PATCH 00/24] Complete EEVDF


Sorry for the delay, I got laid low by snot monsters :/

On Mon, Dec 02, 2024 at 07:46:21PM +0100, Marcel Ziswiler wrote:

> Unfortunately, once I trigger the failure the system is completely dead and won't allow me to dump the trace
> buffer any longer. So I did the following instead on the serial console terminal:
> 
> tail -f /sys/kernel/debug/tracing/trace
> 
> Not sure whether there is any better way to go about this. Plus even so we run the serial console at 1.5
> megabaud I am not fully sure whether it was able to keep up logging what you are looking for.

Ah, that is unfortunate. There is a ftrace_dump_on_oops option that
might be of help. And yes, dumping trace buffers over 1m5 serial lines
is tedious -- been there done that, got a t-shirt and all that.

Still, let me see if perhaps making that WARN in enqueue_dl_entity()
return makes the whole thing less fatal.

I've included the traceoff_on_warning and ftrace_dump in the code, so
all you really need to still do is enable the stacktrace option.

   echo 1 > /sys/kernel/debug/tracing/options/stacktrace

> Yes, and do not hesitate to ask for any additional information et. al. we are happy to help. Thanks!

Could I bother you to try again with the below patch?

There are two new hunks vs the previous one, the hunk in
enqueue_dl_entity() (the very last bit) will stop tracing and dump the
buffers when that condition is hit in addition to then aborting the
double enqueue, hopefully leaving the system is a slightly better state.

The other new hunk is the one for dl_server_stop() (second hunk), while
going over the code last week, I found that this might be a possible
hole leading to the observed double enqueue, so fingers crossed.

---

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 33b4646f8b24..bd1df7612482 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1223,6 +1223,11 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
 	scoped_guard (rq_lock, rq) {
 		struct rq_flags *rf = &scope.rf;
 
+		if (dl_se == &rq->fair_server) {
+			trace_printk("timer fair server %d throttled %d\n",
+				     cpu_of(rq), dl_se->dl_throttled);
+		}
+
 		if (!dl_se->dl_throttled || !dl_se->dl_runtime)
 			return HRTIMER_NORESTART;
 
@@ -1674,6 +1679,12 @@ void dl_server_start(struct sched_dl_entity *dl_se)
 
 void dl_server_stop(struct sched_dl_entity *dl_se)
 {
+	if (current->dl_server == dl_se) {
+		struct rq *rq = rq_of_dl_se(dl_se);
+		trace_printk("stop fair server %d\n", cpu_of(rq));
+		current->dl_server = NULL;
+	}
+
 	if (!dl_se->dl_runtime)
 		return;
 
@@ -1792,6 +1803,9 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
 		rq_lock(rq, &rf);
 	}
 
+	if (dl_se == &rq->fair_server)
+		trace_printk("inactive fair server %d\n", cpu_of(rq));
+
 	sched_clock_tick();
 	update_rq_clock(rq);
 
@@ -1987,6 +2001,12 @@ update_stats_dequeue_dl(struct dl_rq *dl_rq, struct sched_dl_entity *dl_se,
 static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_se(dl_se);
+
+	if (dl_se == &rq->fair_server) {
+		trace_printk("enqueue fair server %d h_nr_running %d\n",
+			     cpu_of(rq), rq->cfs.h_nr_running);
+	}
 
 	WARN_ON_ONCE(!RB_EMPTY_NODE(&dl_se->rb_node));
 
@@ -1998,6 +2018,12 @@ static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
 static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
+	struct rq *rq = rq_of_dl_se(dl_se);
+
+	if (dl_se == &rq->fair_server) {
+		trace_printk("dequeue fair server %d h_nr_running %d\n",
+			     cpu_of(rq), rq->cfs.h_nr_running);
+	}
 
 	if (RB_EMPTY_NODE(&dl_se->rb_node))
 		return;
@@ -2012,7 +2038,11 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
 static void
 enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
 {
-	WARN_ON_ONCE(on_dl_rq(dl_se));
+	if (WARN_ON_ONCE(on_dl_rq(dl_se))) {
+		tracing_off();
+		ftrace_dump(DUMP_ALL);
+		return;
+	}
 
 	update_stats_enqueue_dl(dl_rq_of_se(dl_se), dl_se, flags);