lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1484327993-5036-3-git-send-email-jdesfossez@efficios.com>
Date:   Fri, 13 Jan 2017 12:19:53 -0500
From:   Julien Desfossez <jdesfossez@...icios.com>
To:     peterz@...radead.org, rostedt@...dmis.org, tglx@...utronix.de,
        mingo@...hat.com, bristot@...hat.com,
        mathieu.desnoyers@...icios.com
Cc:     linux-kernel@...r.kernel.org,
        Julien Desfossez <jdesfossez@...icios.com>
Subject: [RFC PATCH v3 2/2] tracing: add policy-based sched_switch events

Add 3 new tracepoints: sched_switch_fair, sched_switch_rt and
sched_switch_dl.

These conditional tracepoints are emitted based on the scheduling class
of the next task. Each of these tracepoint gets rid of the prio field
from the original sched_switch and replaces it with fields that are
relevant to the policy of the next task:
  - for a fair task: the nice value,
  - for a rt task: the nice and rt_priority values,
  - for a dl task: the runtime, deadline and period values.

The original sched_switch event is left unmodified, so these new events
can be enabled at the same time (but they are emitted consecutively so
we can see a timestamp offset).

Example output from the 3 new events:
sched_switch_fair: prev_comm=cat prev_pid=2179 prev_state=R+ ==> next_comm=b
                   next_pid=874 next_policy=SCHED_NORMAL next_nice=0

sched_switch_rt: prev_comm=swapper/10 prev_pid=0 prev_state=R ==> next_comm=b
                 next_pid=2215 next_policy=SCHED_FIFO next_nice=0
		 next_rt_priority=100

sched_switch_dl: prev_comm=swapper/10 prev_pid=0 prev_state=R ==> next_comm=b
                 next_pid=2215 next_policy=SCHED_DEADLINE
		 next_dl_runtime=10000000 next_dl_deadline=30000000
		 next_dl_period=30000000

Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Steven Rostedt (Red Hat) <rostedt@...dmis.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...hat.com>
Cc: Daniel Bristot de Oliveira <bristot@...hat.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Signed-off-by: Julien Desfossez <jdesfossez@...icios.com>
---
 include/trace/events/sched.h | 192 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 192 insertions(+)

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 9b90c57..c506ed1 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -5,9 +5,39 @@
 #define _TRACE_SCHED_H
 
 #include <linux/sched.h>
+#include <linux/sched/deadline.h>
+#include <linux/sched/rt.h>
 #include <linux/tracepoint.h>
 #include <linux/binfmts.h>
 
+#define SCHEDULING_POLICY				\
+	EM( SCHED_NORMAL,	"SCHED_NORMAL")		\
+	EM( SCHED_FIFO,		"SCHED_FIFO")		\
+	EM( SCHED_RR,		"SCHED_RR")		\
+	EM( SCHED_BATCH,	"SCHED_BATCH")		\
+	EM( SCHED_IDLE,		"SCHED_IDLE")		\
+	EMe(SCHED_DEADLINE,	"SCHED_DEADLINE")
+
+/*
+ * First define the enums in the above macros to be exported to userspace
+ * via TRACE_DEFINE_ENUM().
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)       TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)      TRACE_DEFINE_ENUM(a);
+
+SCHEDULING_POLICY
+
+/*
+ * Now redefine the EM() and EMe() macros to map the enums to the strings
+ * that will be printed in the output.
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)       {a, b},
+#define EMe(a, b)      {a, b}
+
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
@@ -162,6 +192,168 @@ static inline long __trace_sched_switch_state(bool preempt, struct task_struct *
 );
 
 /*
+ * Tracepoint for task switches, performed by the scheduler where the next
+ * task has a fair scheduling policy.
+ */
+TRACE_EVENT_MAP_COND(sched_switch, sched_switch_fair,
+
+	TP_PROTO(bool preempt,
+		 struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(preempt, prev, next),
+
+	TP_CONDITION(!dl_prio(next->prio) && !rt_prio(next->prio)),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	long,	prev_state			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	unsigned int, next_policy		)
+		__field(	int,	next_nice			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		__entry->next_policy	= next->policy;
+		__entry->next_nice	= task_nice(next);
+	),
+
+	TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s "
+			"next_pid=%d next_policy=%s next_nice=%d",
+		__entry->prev_comm, __entry->prev_pid,
+		__entry->prev_state & (TASK_STATE_MAX-1) ?
+		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "K" }, { 256, "W" }, { 512, "P" },
+				{ 1024, "N" }) : "R",
+		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
+		__entry->next_comm, __entry->next_pid,
+		__print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+		__entry->next_nice)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler where the next
+ * task has a rt scheduling policy.
+ */
+TRACE_EVENT_MAP_COND(sched_switch, sched_switch_rt,
+
+	TP_PROTO(bool preempt,
+		 struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(preempt, prev, next),
+
+	TP_CONDITION(rt_prio(next->prio)),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	long,	prev_state			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	unsigned int, next_policy		)
+		__field(	int,	next_nice			)
+		__field(	unsigned int,	next_rt_priority	)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		/*
+		 * With PI, a real RT policy might not be set and the default
+		 * RT policy is SCHED_FIFO.
+		 */
+		__entry->next_policy	= (next->policy == SCHED_RR) ?
+						SCHED_RR : SCHED_FIFO;
+		__entry->next_nice	= task_nice(next);
+		__entry->next_rt_priority = MAX_RT_PRIO - 1 - next->prio;
+	),
+
+	TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s "
+			"next_pid=%d next_policy=%s next_nice=%d "
+			"next_rt_priority=%u",
+		__entry->prev_comm, __entry->prev_pid,
+		__entry->prev_state & (TASK_STATE_MAX-1) ?
+		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "K" }, { 256, "W" }, { 512, "P" },
+				{ 1024, "N" }) : "R",
+		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
+		__entry->next_comm, __entry->next_pid,
+		__print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+		__entry->next_nice, __entry->next_rt_priority)
+);
+
+/*
+ * Tracepoint for task switches, performed by the scheduler where the next
+ * task has a deadline scheduling policy.
+ */
+TRACE_EVENT_MAP_COND(sched_switch, sched_switch_dl,
+
+	TP_PROTO(bool preempt,
+		 struct task_struct *prev,
+		 struct task_struct *next),
+
+	TP_ARGS(preempt, prev, next),
+
+	TP_CONDITION(dl_prio(next->prio)),
+
+	TP_STRUCT__entry(
+		__array(	char,	prev_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	prev_pid			)
+		__field(	long,	prev_state			)
+		__array(	char,	next_comm,	TASK_COMM_LEN	)
+		__field(	pid_t,	next_pid			)
+		__field(	unsigned int, next_policy		)
+		__field( 	u64,	next_dl_runtime			)
+		__field( 	u64,	next_dl_deadline		)
+		__field( 	u64,	next_dl_period			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
+		__entry->prev_pid	= prev->pid;
+		__entry->prev_state	= __trace_sched_switch_state(preempt, prev);
+		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
+		__entry->next_pid	= next->pid;
+		__entry->next_policy	= SCHED_DEADLINE;
+		__entry->next_dl_runtime	= next->dl.dl_runtime;
+		__entry->next_dl_deadline	= next->dl.dl_deadline;
+		__entry->next_dl_period		= next->dl.dl_period;
+	),
+
+	TP_printk("prev_comm=%s prev_pid=%d prev_state=%s%s ==> next_comm=%s "
+			"next_pid=%d next_policy=%s next_dl_runtime=%Lu "
+			"next_dl_deadline=%Lu next_dl_period=%Lu",
+		__entry->prev_comm, __entry->prev_pid,
+		__entry->prev_state & (TASK_STATE_MAX-1) ?
+		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
+				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
+				{ 128, "K" }, { 256, "W" }, { 512, "P" },
+				{ 1024, "N" }) : "R",
+		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
+		__entry->next_comm, __entry->next_pid,
+		__print_symbolic(__entry->next_policy, SCHEDULING_POLICY),
+		__entry->next_dl_runtime, __entry->next_dl_deadline,
+		__entry->next_dl_period)
+
+);
+
+/*
  * Tracepoint for a task being migrated:
  */
 TRACE_EVENT(sched_migrate_task,
-- 
1.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ