linux-kernel - [PATCH 5/9] perf trace: Handle unpaired raw_syscalls:sys

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170401021101.10198-6-acme@kernel.org>
Date:   Fri, 31 Mar 2017 23:10:57 -0300
From:   Arnaldo Carvalho de Melo <acme@...nel.org>
To:     Ingo Molnar <mingo@...nel.org>
Cc:     linux-kernel@...r.kernel.org,
        Arnaldo Carvalho de Melo <acme@...hat.com>,
        Adrian Hunter <adrian.hunter@...el.com>,
        David Ahern <dsahern@...il.com>, Jiri Olsa <jolsa@...nel.org>,
        Luis Claudio Gonçalves <lclaudio@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Wang Nan <wangnan0@...wei.com>
Subject: [PATCH 5/9] perf trace: Handle unpaired raw_syscalls:sys_exit event

From: Arnaldo Carvalho de Melo <acme@...hat.com>

Which may happen when we start a tracing session and a thread is waiting
for something like "poll" to return, in which case we better print "?"
both for the syscall entry timestamp and for the duration.

E.g.:

Tracing existing mutt session:

  # perf trace -p `pidof mutt`
          ? (     ?   ): mutt/17135  ... [continued]: poll()) = 1
      0.027 ( 0.013 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1
      0.047 ( 0.008 ms): mutt/17135 poll(ufds: 0x7ffcb3c42c50, nfds: 1, timeout_msecs: 1000) = 1
      0.059 ( 0.008 ms): mutt/17135 read(buf: 0x7ffcb3c42cef, count: 1) = 1
  <SNIP>

Before it would print a large number because we'd do:

  ttrace->entry_time - trace->base_time

And entry_time would be 0, while base_time would be the timestamp for
the first event 'perf trace' reads, oops.

Cc: Adrian Hunter <adrian.hunter@...el.com>
Cc: David Ahern <dsahern@...il.com>
Cc: Jiri Olsa <jolsa@...nel.org>
Cc: Luis Claudio Gonçalves <lclaudio@...hat.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Wang Nan <wangnan0@...wei.com>
Link: http://lkml.kernel.org/n/tip-wbcb93ofva2qdjd5ltn5eeqq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
---
 tools/perf/builtin-trace.c | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c88f9f215e6f..7379792a6504 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -821,12 +821,21 @@ struct syscall {
 	void		    **arg_parm;
 };
 
-static size_t fprintf_duration(unsigned long t, FILE *fp)
+/*
+ * We need to have this 'calculated' boolean because in some cases we really
+ * don't know what is the duration of a syscall, for instance, when we start
+ * a session and some threads are waiting for a syscall to finish, say 'poll',
+ * in which case all we can do is to print "( ? ) for duration and for the
+ * start timestamp.
+ */
+static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
 {
 	double duration = (double)t / NSEC_PER_MSEC;
 	size_t printed = fprintf(fp, "(");
 
-	if (duration >= 1.0)
+	if (!calculated)
+		printed += fprintf(fp, "     ?   ");
+	else if (duration >= 1.0)
 		printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
 	else if (duration >= 0.01)
 		printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
@@ -1028,13 +1037,27 @@ static bool trace__filter_duration(struct trace *trace, double t)
 	return t < (trace->duration_filter * NSEC_PER_MSEC);
 }
 
-static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
 {
 	double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
 
 	return fprintf(fp, "%10.3f ", ts);
 }
 
+/*
+ * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
+ * using ttrace->entry_time for a thread that receives a sys_exit without
+ * first having received a sys_enter ("poll" issued before tracing session
+ * starts, lost sys_enter exit due to ring buffer overflow).
+ */
+static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+	if (tstamp > 0)
+		return __trace__fprintf_tstamp(trace, tstamp, fp);
+
+	return fprintf(fp, "         ? ");
+}
+
 static bool done = false;
 static bool interrupted = false;
 
@@ -1045,10 +1068,10 @@ static void sig_handler(int sig)
 }
 
 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
-					u64 duration, u64 tstamp, FILE *fp)
+					u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
 {
 	size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
-	printed += fprintf_duration(duration, fp);
+	printed += fprintf_duration(duration, duration_calculated, fp);
 
 	if (trace->multiple_threads) {
 		if (trace->show_comm)
@@ -1450,7 +1473,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp
 
 	duration = sample->time - ttrace->entry_time;
 
-	printed  = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output);
+	printed  = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
 	printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
 	ttrace->entry_pending = false;
 
@@ -1497,7 +1520,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 
 	if (sc->is_exit) {
 		if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
-			trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output);
+			trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
 			fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
 		}
 	} else {
@@ -1545,6 +1568,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 {
 	long ret;
 	u64 duration = 0;
+	bool duration_calculated = false;
 	struct thread *thread;
 	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
@@ -1573,6 +1597,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 		duration = sample->time - ttrace->entry_time;
 		if (trace__filter_duration(trace, duration))
 			goto out;
+		duration_calculated = true;
 	} else if (trace->duration_filter)
 		goto out;
 
@@ -1588,7 +1613,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	if (trace->summary_only)
 		goto out;
 
-	trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output);
+	trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
 
 	if (ttrace->entry_pending) {
 		fprintf(trace->output, "%-70s", ttrace->entry_str);
@@ -1855,7 +1880,7 @@ static int trace__pgfault(struct trace *trace,
 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
 
-	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
+	trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
 
 	fprintf(trace->output, "%sfault [",
 		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
-- 
2.9.3