lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1467661564-7516-1-git-send-email-jdesfossez@efficios.com>
Date:	Mon,  4 Jul 2016 15:46:04 -0400
From:	Julien Desfossez <jdesfossez@...icios.com>
To:	peterz@...radead.org, tglx@...utronix.de, rostedt@...dmis.org,
	mingo@...nel.org, daolivei@...hat.com
Cc:	mathieu.desnoyers@...icios.com, linux-kernel@...r.kernel.org,
	Julien Desfossez <jdesfossez@...icios.com>
Subject: [RFC PATCH] tracing: add sched_prio_update

This tracepoint allows to keep track of all priority changes of a task.
It outputs the scheduling policy, the nice value, the rt_priority and
the deadline-related attributes (dl_runtime, dl_deadline and dl_period).

It is emitted in the code path of the sched_setscheduler, sched_setattr,
sched_setparam, nice and the fork system calls. For fork, it is emitted
after the sched_process_fork tracepoint for timeline consistency and
because the PID is not yet set when sched_fork() is called.

This allows the analysis of real-time scheduling delays based on the
configured scheduling priorities and policies, which cannot be performed
with the current instrumentation in sched_switch. Also, instead of
exposing the internal kernel prio field, this tracepoint only outputs
the user-visible priority attributes.

The effective priority of running threads can also be temporarily
changed in the PI code, but a dedicated tracepoint is already in place
to cover this case.

Here are a few output examples:
After fork of a normal task:
sched_prio_update: comm=bash pid=2104, policy=SCHED_NORMAL, nice=0,
	rt_priority=0, dl_runtime=0, dl_deadline=0, dl_period=0

renice -n 10 of a normal task:
sched_prio_update: comm=sleep pid=2130, policy=SCHED_NORMAL, nice=10,
	rt_priority=0, dl_runtime=0, dl_deadline=0, dl_period=0

SCHED_FIFO 60:
sched_prio_update: comm=chrt pid=2105, policy=SCHED_FIFO, nice=0,
	rt_priority=60, dl_runtime=0, dl_deadline=0, dl_period=0

SCHED_RR 60:
sched_prio_update: comm=chrt pid=2109, policy=SCHED_RR, nice=0,
	rt_priority=60, dl_runtime=0, dl_deadline=0, dl_period=0

SCHED_DEADLINE:
sched_prio_update: comm=b pid=2110, policy=SCHED_DEADLING, nice=0,
	rt_priority=0, dl_runtime=10000000, dl_deadline=30000000,
	dl_period=30000000

Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Signed-off-by: Julien Desfossez <jdesfossez@...icios.com>
---
 include/trace/events/sched.h | 68 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/fork.c                |  1 +
 kernel/sched/core.c          |  3 ++
 3 files changed, 72 insertions(+)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57..fcb0f29 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -8,6 +8,34 @@
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
+#define SCHEDULING_POLICY				\
+	EM( SCHED_NORMAL,	"SCHED_NORMAL")		\
+	EM( SCHED_FIFO,		"SCHED_FIFO")		\
+	EM( SCHED_RR,		"SCHED_RR")		\
+	EM( SCHED_BATCH,	"SCHED_BATCH")		\
+	EM( SCHED_IDLE,		"SCHED_IDLE")		\
+	EMe(SCHED_DEADLINE,	"SCHED_DEADLINE")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)	TRACE_DEFINE_ENUM(a);
+
+SCHEDULING_POLICY
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	{a, b},
+#define EMe(a, b)	{a, b}
+
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
@@ -562,6 +590,46 @@ TRACE_EVENT(sched_wake_idle_without_ipi,
 
 	TP_printk("cpu=%d", __entry->cpu)
 );
+
+/*
+ * Tracepoint for showing scheduling priority changes.
+ */
+TRACE_EVENT(sched_prio_update,
+
+	TP_PROTO(struct task_struct *tsk),
+
+	TP_ARGS(tsk),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( unsigned int,	policy		)
+		__field( int,	nice			)
+		__field( unsigned int,	rt_priority	)
+		__field( u64,	dl_runtime		)
+		__field( u64,	dl_deadline		)
+		__field( u64,	dl_period		)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->policy		= tsk->policy;
+		__entry->nice		= task_nice(tsk);
+		__entry->rt_priority	= tsk->rt_priority;
+		__entry->dl_runtime	= tsk->dl.dl_runtime;
+		__entry->dl_deadline	= tsk->dl.dl_deadline;
+		__entry->dl_period	= tsk->dl.dl_period;
+	),
+
+	TP_printk("comm=%s pid=%d, policy=%s, nice=%d, rt_priority=%u, "
+			"dl_runtime=%Lu, dl_deadline=%Lu, dl_period=%Lu",
+			__entry->comm, __entry->pid,
+			__print_symbolic(__entry->policy, SCHEDULING_POLICY),
+			__entry->nice, __entry->rt_priority,
+			__entry->dl_runtime, __entry->dl_deadline,
+			__entry->dl_period)
+);
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7926993..ac4294a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1773,6 +1773,7 @@ long _do_fork(unsigned long clone_flags,
 		struct pid *pid;
 
 		trace_sched_process_fork(current, p);
+		trace_sched_prio_update(p);
 
 		pid = get_task_pid(p, PIDTYPE_PID);
 		nr = pid_vnr(pid);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ce83e39..c729425 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3708,6 +3708,7 @@ void set_user_nice(struct task_struct *p, long nice)
 			resched_curr(rq);
 	}
 out_unlock:
+	trace_sched_prio_update(p);
 	task_rq_unlock(rq, p, &rf);
 }
 EXPORT_SYMBOL(set_user_nice);
@@ -3912,6 +3913,8 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
 		p->sched_class = &rt_sched_class;
 	else
 		p->sched_class = &fair_sched_class;
+
+	trace_sched_prio_update(p);
 }
 
 static void
-- 
1.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ