linux-kernel - [RFC/PATCH] perf sched: Implement 'perf sched runtime' command

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1440483083-19376-1-git-send-email-namhyung@kernel.org>
Date:	Tue, 25 Aug 2015 15:11:23 +0900
From:	Namhyung Kim <namhyung@...nel.org>
To:	Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:	Ingo Molnar <mingo@...nel.org>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Jiri Olsa <jolsa@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>,
	David Ahern <dsahern@...il.com>
Subject: [RFC/PATCH] perf sched: Implement 'perf sched runtime' command

Currently 'perf sched record' commandsrecords various task stat info but
other perf sched commands don't use them.  This is a very simple
implementation of showing those info to users.

  $ sudo perf sched record -a sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.346 MB perf.data (774 samples) ]

  $ sudo perf sched runtime | head
    TID:COMM                  Run-time    Sleep-time     Wait-time   IOWait-time  Switch  Migrate
  ----------------------  ------------  ------------  ------------  ------------  ------  -------
   1347:Socket Thread         20901793    4895322293             0             0       2        2
  14855:sshd                   7404420     660792968             0             0       4        0
   1342:firefox                3657944     998405339         82358             0      11        0
   1360:Timer                  1429014    1002239227             0             0      11        0
    170:systemd-journal        1209607     644878917             0             0       1        0
      7:rcu_preempt            1081471     958866004        416514             0      26        6
  14856:sshd                    973632     506809918             0             0       6        0
  14859:sleep                   887558    1030418783          4055             0       3        1

Cc: David Ahern <dsahern@...il.com>
Signed-off-by: Namhyung Kim <namhyung@...nel.org>
---
 tools/perf/builtin-sched.c | 361 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 359 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 33962612a5e9..112596121242 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -98,6 +98,18 @@ struct work_atoms {
 	int			num_merged;
 };
 
+struct task_runtime {
+	struct list_head	list;
+	struct rb_node		node;
+	struct thread		*thread;
+	u64			run_time;
+	u64			sleep_time;
+	u64			wait_time;
+	u64			iowait_time;
+	int			num_switch;
+	int			num_migrate;
+};
+
 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
 
 struct perf_sched;
@@ -109,6 +121,15 @@ struct trace_sched_handler {
 	int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
 			     struct perf_sample *sample, struct machine *machine);
 
+	int (*sleep_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			   struct perf_sample *sample, struct machine *machine);
+
+	int (*wait_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			  struct perf_sample *sample, struct machine *machine);
+
+	int (*iowait_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+			    struct perf_sample *sample, struct machine *machine);
+
 	int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
 			    struct perf_sample *sample, struct machine *machine);
 
@@ -170,7 +191,7 @@ struct perf_sched {
 	u64		 all_count;
 	u64		 cpu_last_switched[MAX_CPUS];
 	struct rb_root	 atom_root, sorted_atom_root, merged_atom_root;
-	struct list_head sort_list, cmp_pid;
+	struct list_head sort_list, cmp_pid, runtime_list;
 	bool force;
 	bool skip_merge;
 };
@@ -1424,6 +1445,120 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	return 0;
 }
 
+static struct task_runtime *get_task_runtime(struct perf_sched *sched,
+					     struct machine *machine, int pid)
+{
+	struct thread *th;
+	struct task_runtime *tr;
+
+	th = machine__findnew_thread(machine, -1, pid);
+	if (th == NULL)
+		return NULL;
+
+	tr = thread__priv(th);
+	if (tr == NULL) {
+		tr = zalloc(sizeof(*tr));
+		if (tr == NULL)
+			return NULL;
+
+		thread__set_priv(th, tr);
+
+		/* hold a refcnt not to free the thread */
+		tr->thread = thread__get(th);
+
+		list_add(&tr->list, &sched->runtime_list);
+	}
+	return tr;
+}
+
+static int runtime_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
+				struct perf_sample *sample, struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "prev_pid");
+	struct task_runtime *tr;
+
+	tr = get_task_runtime(sched, machine, pid);
+	if (tr == NULL)
+		return -1;
+
+	tr->num_switch++;
+	return 0;
+}
+
+static int runtime_runtime_event(struct perf_sched *sched, struct perf_evsel *evsel,
+				 struct perf_sample *sample, struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u64 time = perf_evsel__intval(evsel, sample, "runtime");
+	struct task_runtime *tr;
+
+	tr = get_task_runtime(sched, machine, pid);
+	if (tr == NULL)
+		return -1;
+
+	tr->run_time += time;
+	return 0;
+}
+
+static int runtime_sleep_event(struct perf_sched *sched, struct perf_evsel *evsel,
+			       struct perf_sample *sample, struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u64 time = perf_evsel__intval(evsel, sample, "delay");
+	struct task_runtime *tr;
+
+	tr = get_task_runtime(sched, machine, pid);
+	if (tr == NULL)
+		return -1;
+
+	tr->sleep_time += time;
+	return 0;
+}
+
+static int runtime_wait_event(struct perf_sched *sched, struct perf_evsel *evsel,
+			      struct perf_sample *sample, struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u64 time = perf_evsel__intval(evsel, sample, "delay");
+	struct task_runtime *tr;
+
+	tr = get_task_runtime(sched, machine, pid);
+	if (tr == NULL)
+		return -1;
+
+	tr->wait_time += time;
+	return 0;
+}
+
+static int runtime_iowait_event(struct perf_sched *sched, struct perf_evsel *evsel,
+				struct perf_sample *sample, struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	const u64 time = perf_evsel__intval(evsel, sample, "delay");
+	struct task_runtime *tr;
+
+	tr = get_task_runtime(sched, machine, pid);
+	if (tr == NULL)
+		return -1;
+
+	tr->iowait_time += time;
+	return 0;
+}
+
+static int runtime_migrate_event(struct perf_sched *sched, struct perf_evsel *evsel,
+				 struct perf_sample *sample, struct machine *machine)
+{
+	const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	struct task_runtime *tr;
+
+	tr = get_task_runtime(sched, machine, pid);
+	if (tr == NULL)
+		return -1;
+
+	tr->num_migrate++;
+	return 0;
+}
+
 static int process_sched_switch_event(struct perf_tool *tool,
 				      struct perf_evsel *evsel,
 				      struct perf_sample *sample,
@@ -1463,6 +1598,45 @@ static int process_sched_runtime_event(struct perf_tool *tool,
 	return 0;
 }
 
+static int process_sched_sleep_event(struct perf_tool *tool,
+				     struct perf_evsel *evsel,
+				     struct perf_sample *sample,
+				     struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	if (sched->tp_handler->sleep_event)
+		return sched->tp_handler->sleep_event(sched, evsel, sample, machine);
+
+	return 0;
+}
+
+static int process_sched_wait_event(struct perf_tool *tool,
+				    struct perf_evsel *evsel,
+				    struct perf_sample *sample,
+				    struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	if (sched->tp_handler->wait_event)
+		return sched->tp_handler->wait_event(sched, evsel, sample, machine);
+
+	return 0;
+}
+
+static int process_sched_iowait_event(struct perf_tool *tool,
+				      struct perf_evsel *evsel,
+				      struct perf_sample *sample,
+				      struct machine *machine)
+{
+	struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+	if (sched->tp_handler->iowait_event)
+		return sched->tp_handler->iowait_event(sched, evsel, sample, machine);
+
+	return 0;
+}
+
 static int perf_sched__process_fork_event(struct perf_tool *tool,
 					  union perf_event *event,
 					  struct perf_sample *sample,
@@ -1560,6 +1734,143 @@ out_delete:
 	return rc;
 }
 
+static int perf_sched__read_runtime_events(struct perf_sched *sched)
+{
+	const struct perf_evsel_str_handler handlers[] = {
+		{ "sched:sched_switch",	      process_sched_switch_event, },
+		{ "sched:sched_stat_runtime", process_sched_runtime_event, },
+		{ "sched:sched_stat_sleep",   process_sched_sleep_event, },
+		{ "sched:sched_stat_wait",    process_sched_wait_event, },
+		{ "sched:sched_stat_iowait",  process_sched_iowait_event, },
+		{ "sched:sched_migrate_task", process_sched_migrate_task_event, },
+	};
+	struct perf_session *session;
+	struct perf_data_file file = {
+		.path = input_name,
+		.mode = PERF_DATA_MODE_READ,
+		.force = sched->force,
+	};
+	int rc = -1;
+
+	session = perf_session__new(&file, false, &sched->tool);
+	if (session == NULL) {
+		pr_debug("No Memory for session\n");
+		return -1;
+	}
+
+	symbol__init(&session->header.env);
+
+	if (perf_session__set_tracepoints_handlers(session, handlers))
+		goto out_delete;
+
+	if (perf_session__has_traces(session, "record -R")) {
+		int err = perf_session__process_events(session);
+		if (err) {
+			pr_err("Failed to process events, error %d", err);
+			goto out_delete;
+		}
+
+		sched->nr_events      = session->evlist->stats.nr_events[0];
+		sched->nr_lost_events = session->evlist->stats.total_lost;
+		sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
+	}
+
+	rc = 0;
+out_delete:
+	perf_session__delete(session);
+	return rc;
+}
+
+static int task_runtime_cmp(struct perf_sched *sched __maybe_unused,
+			    struct task_runtime *a, struct task_runtime *b)
+{
+	return b->run_time - a->run_time;
+}
+
+static void sort_task_runtime(struct perf_sched *sched, struct task_runtime *tr)
+{
+	struct rb_root *root = &sched->sorted_atom_root;
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*p) {
+		struct task_runtime *iter;
+		int cmp;
+
+		parent = *p;
+		iter = rb_entry(parent, struct task_runtime, node);
+
+		cmp = task_runtime_cmp(sched, iter, tr);
+
+		if (cmp > 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	rb_link_node(&tr->node, parent, p);
+	rb_insert_color(&tr->node, root);
+}
+
+static void perf_sched__sort_runtime(struct perf_sched *sched)
+{
+	while (!list_empty(&sched->runtime_list)) {
+		struct task_runtime *tr;
+
+		tr = list_first_entry(&sched->runtime_list,
+				      struct task_runtime, list);
+		list_del(&tr->list);
+
+		sort_task_runtime(sched, tr);
+	}
+}
+
+static void print_task_runtime(struct task_runtime *tr)
+{
+	struct thread *th = tr->thread;
+
+	/*      tid:comm   run_time     sleep_time   wait_time    iowait_time  switch  migrate*/
+	printf("%5d:%-16s  %12"PRIu64"  %12"PRIu64"  %12"PRIu64"  %12"PRIu64"  %6d  %7d\n",
+	       th->tid, thread__comm_str(th), tr->run_time, tr->sleep_time,
+	       tr->wait_time, tr->iowait_time, tr->num_switch, tr->num_migrate);
+}
+
+static void perf_sched__print_runtime(struct perf_sched *sched)
+{
+	struct task_runtime *tr;
+	struct rb_root *root = &sched->sorted_atom_root;
+	struct rb_node *node = rb_first(root);
+
+	printf("%5s:%-16s  %12s  %12s  %12s  %12s  %6s  %7s\n",
+	       "TID", "COMM", "Run-time", "Sleep-time", "Wait-time",
+	       "IOWait-time", "Switch", "Migrate");
+	printf("%.22s  %.12s  %.12s  %.12s  %.12s  %.6s  %.7s\n",
+	       graph_dotted_line, graph_dotted_line, graph_dotted_line,
+	       graph_dotted_line, graph_dotted_line, graph_dotted_line,
+	       graph_dotted_line);
+
+	while (node) {
+		tr = rb_entry(node, struct task_runtime, node);
+		print_task_runtime(tr);
+
+		node = rb_next(node);
+	}
+
+	/* release thread refcnt */
+	node = rb_first(root);
+	while (node) {
+		struct rb_node *next = rb_next(node);
+		rb_erase(node, root);
+
+		tr = rb_entry(node, struct task_runtime, node);
+		thread__set_priv(tr->thread, NULL);
+		thread__put(tr->thread);
+		free(tr);
+
+		node = next;
+	}
+}
+
 static void print_bad_events(struct perf_sched *sched)
 {
 	if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
@@ -1720,6 +2031,23 @@ static int perf_sched__replay(struct perf_sched *sched)
 	return 0;
 }
 
+static int perf_sched__runtime(struct perf_sched *sched)
+{
+	setup_pager();
+
+	INIT_LIST_HEAD(&sched->runtime_list);
+	if (perf_sched__read_runtime_events(sched))
+		return -1;
+
+	perf_sched__sort_runtime(sched);
+	perf_sched__print_runtime(sched);
+
+	print_bad_events(sched);
+	printf("\n");
+
+	return 0;
+}
+
 static void setup_sorting(struct perf_sched *sched, const struct option *options,
 			  const char * const usage_msg[])
 {
@@ -1821,6 +2149,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
 	OPT_END()
 	};
+	const struct option runtime_options[] = {
+	OPT_INCR('v', "verbose", &verbose,
+		    "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
+	OPT_END()
+	};
 	const struct option sched_options[] = {
 	OPT_STRING('i', "input", &input_name, "file",
 		    "input file name"),
@@ -1838,8 +2174,13 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf sched replay [<options>]",
 		NULL
 	};
+	const char * const runtime_usage[] = {
+		"perf sched runtime [<options>]",
+		NULL
+	};
 	const char *const sched_subcommands[] = { "record", "latency", "map",
-						  "replay", "script", NULL };
+						  "replay", "script", "runtime",
+						  NULL };
 	const char *sched_usage[] = {
 		NULL,
 		NULL
@@ -1858,6 +2199,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		.switch_event	    = replay_switch_event,
 		.fork_event	    = replay_fork_event,
 	};
+	struct trace_sched_handler runtime_ops  = {
+		.switch_event	    = runtime_switch_event,
+		.runtime_event	    = runtime_runtime_event,
+		.sleep_event	    = runtime_sleep_event,
+		.wait_event	    = runtime_wait_event,
+		.iowait_event	    = runtime_iowait_event,
+		.migrate_task_event = runtime_migrate_event,
+	};
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
@@ -1897,6 +2246,14 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 				usage_with_options(replay_usage, replay_options);
 		}
 		return perf_sched__replay(&sched);
+	} else if (!strncmp(argv[0], "run", 3)) {
+		sched.tp_handler = &runtime_ops;
+		if (argc) {
+			argc = parse_options(argc, argv, runtime_options, runtime_usage, 0);
+			if (argc)
+				usage_with_options(runtime_usage, runtime_options);
+		}
+		return perf_sched__runtime(&sched);
 	} else {
 		usage_with_options(sched_usage, sched_options);
 	}
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/