lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 12 May 2017 11:32:09 +0800
From:   Xunlei Pang <xlpang@...hat.com>
To:     linux-kernel@...r.kernel.org
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@....com>,
        Ingo Molnar <mingo@...hat.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Luca Abeni <luca.abeni@...tannapisa.it>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        Xunlei Pang <xlpang@...hat.com>
Subject: [PATCH v2 3/3] sched/deadline: Add statistics to track runtime underruns

Add accounting to track cases that runtime isn't running
out, and export the information in "/proc/<pid>/sched".

Specifically, the patch adds three members "nr_underrun_sched",
"nr_underrun_block", and "nr_underrun_yield" in sched_dl_entity:
 -@...underrun_sched hints some scheduling issue.
 -@...underrun_block hints some block reason. E.g. long sleep.
 -@...underrun_yield hints the yield reason.

This is helpful to spot/debug deadline issues, for example,
I launched three 50% dl tasks on my dual-core machine, plus
several buggy contrained dl tasks that Daniel is trying to
address in "sched/deadline: Use the revised wakeup rule for
suspending constrained dl tasks", then I observed one 50%
deadline task's proc sched output:
$ cat /proc/3389/sched |grep underrun
dl.nr_underrun_sched                :        981
dl.nr_underrun_block                :          0
dl.nr_underrun_yield                :          0

Very large "dl.nr_underrun_sched" hints it's very likely that
there is some underlying scheduling issue.

Note that we don't use CONFIG_SCHED_DEBUG as the accounting
added has little overhead(also happens infrequently).

Suggested-by: Steven Rostedt <rostedt@...dmis.org>
Signed-off-by: Xunlei Pang <xlpang@...hat.com>
---
 include/linux/sched.h   | 10 ++++++++++
 kernel/sched/core.c     |  3 +++
 kernel/sched/deadline.c | 12 +++++++++---
 kernel/sched/debug.c    |  3 +++
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ba080e5..e17928f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -455,6 +455,16 @@ struct sched_dl_entity {
 	 * own bandwidth to be enforced, thus we need one timer per task.
 	 */
 	struct hrtimer			dl_timer;
+
+	/*
+	 * Accounting for periods that run less than @dl_runtime:
+	 * @nr_underrun_sched hints some scheduling issue.
+	 * @nr_underrun_block hints some block reason. E.g. long sleep.
+	 * @nr_underrun_yield hints the yield reason.
+	 */
+	u64				nr_underrun_sched;
+	u64				nr_underrun_block;
+	u64				nr_underrun_yield;
 };
 
 union rcu_special {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index bccd819..6214ada 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4004,6 +4004,9 @@ static struct task_struct *find_process_by_pid(pid_t pid)
 	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
 	dl_se->flags = attr->sched_flags;
 	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
+	dl_se->nr_underrun_sched = 0;
+	dl_se->nr_underrun_block = 0;
+	dl_se->nr_underrun_yield = 0;
 
 	/*
 	 * Changing the parameters of a task is 'tricky' and we're not doing
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 5691149..a7ddc03 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -394,8 +394,10 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 		dl_se->runtime = pi_se->dl_runtime;
 	}
 
-	if (dl_se->dl_yielded && dl_se->runtime > 0)
+	if (dl_se->dl_yielded && dl_se->runtime > 0) {
 		dl_se->runtime = 0;
+		++dl_se->nr_underrun_yield;
+	}
 
 	/*
 	 * We keep moving the deadline away until we get some
@@ -723,8 +725,10 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
 		if (unlikely(dl_se->dl_boosted || !start_dl_timer(p)))
 			return;
 		dl_se->dl_throttled = 1;
-		if (dl_se->runtime > 0)
+		if (dl_se->runtime > 0) {
 			dl_se->runtime = 0;
+			++dl_se->nr_underrun_block;
+		}
 	}
 }
 
@@ -733,8 +737,10 @@ int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)
 {
 	bool dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));
 
-	if (dmiss && dl_se->runtime > 0)
+	if (dmiss && dl_se->runtime > 0) {
 		dl_se->runtime = 0;
+		++dl_se->nr_underrun_sched;
+	}
 
 	return (dl_se->runtime <= 0);
 }
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 38f0193..904b43f 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -957,6 +957,9 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	if (p->policy == SCHED_DEADLINE) {
 		P(dl.runtime);
 		P(dl.deadline);
+		P(dl.nr_underrun_sched);
+		P(dl.nr_underrun_block);
+		P(dl.nr_underrun_yield);
 	}
 #undef PN_SCHEDSTAT
 #undef PN
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ