lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20161118002138.29150-3-alexis.berlemont@gmail.com>
Date:   Fri, 18 Nov 2016 01:21:38 +0100
From:   Alexis Berlemont <alexis.berlemont@...il.com>
To:     linux-kernel@...r.kernel.org
Cc:     Alexis Berlemont <alexis.berlemont@...il.com>,
        peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
        alexander.shishkin@...ux.intel.com
Subject: [PATCH v3 2/2] perf: add page fault duration measures in perf trace

Thanks to the tracepoints exceptions:page_fault_kernel_exit and
exceptions:page_fault_user_exit, the subcommand perf-trace can now
print the duration of page-fault exception handling just like it is
done for syscalls.

Here is the result; the page fault durations are indicated in
parenthesis (like syscalls):

Here is an example; the durations are indicated in parenthesis just
like with syscalls:

 0.053 ( 0.053 ms): ls/152 minfault [__clear_user+0x25] => 0x694e7e (?k)
 0.114 ( 0.027 ms): ls/152 minfault [__clear_user+0x25] => 0x7fb912eaafc0 (?k)
 0.201 ( 0.030 ms): ls/152 minfault [_start+0x0] => /lib/ld-2.23.so@...70 (x.)
...

Signed-off-by: Alexis Berlemont <alexis.berlemont@...il.com>
---
 tools/perf/Documentation/perf-trace.txt |   4 +-
 tools/perf/builtin-trace.c              | 231 ++++++++++++++++++++++++++++----
 2 files changed, 208 insertions(+), 27 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 781b019..53c103c 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -117,7 +117,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 -F=[all|min|maj]::
 --pf=[all|min|maj]::
 	Trace pagefaults. Optionally, you can specify whether you want minor,
-	major or all pagefaults. Default value is maj.
+	major or all pagefaults. Default value is maj. Durations of
+	page-fault handling will be printed if possible (need for some
+	architecture-dependent tracepoints).
 
 --syscalls::
 	Trace system calls. This options is enabled by default.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5f45166..f0957a8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -861,6 +861,8 @@ struct thread_trace {
 	} paths;
 
 	struct intlist *syscall_stats;
+	u64	       pgfault_entry_time;
+	char           *pgfault_entry_str;
 };
 
 static struct thread_trace *thread_trace__new(void)
@@ -1797,21 +1799,56 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
 	return 0;
 }
 
-static void print_location(FILE *f, struct perf_sample *sample,
-			   struct addr_location *al,
-			   bool print_dso, bool print_sym)
+static int trace__pgfault_enter(struct trace *trace,
+				struct perf_evsel *evsel __maybe_unused,
+				union perf_event *event __maybe_unused,
+				struct perf_sample *sample)
 {
+	struct thread *thread;
+	struct thread_trace *ttrace;
+	int err = -1;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	if (!thread)
+		goto out;
+
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	ttrace->pgfault_entry_time = sample->time;
+
+out:
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static size_t scnprintf_location(char *bf, size_t size,
+				struct perf_sample *sample,
+				struct addr_location *al,
+				bool print_dso, bool print_sym)
+{
+	size_t printed = 0;
 
 	if ((verbose || print_dso) && al->map)
-		fprintf(f, "%s@", al->map->dso->long_name);
+		printed += scnprintf(bf + printed,
+				size - printed, "%s@", al->map->dso->long_name);
 
 	if ((verbose || print_sym) && al->sym)
-		fprintf(f, "%s+0x%" PRIx64, al->sym->name,
-			al->addr - al->sym->start);
+		printed += scnprintf(bf + printed,
+				size - printed,
+				"%s+0x%" PRIx64, al->sym->name,
+				al->addr - al->sym->start);
 	else if (al->map)
-		fprintf(f, "0x%" PRIx64, al->addr);
+		printed += scnprintf(bf + printed,
+				size - printed, "0x%" PRIx64, al->addr);
 	else
-		fprintf(f, "0x%" PRIx64, sample->addr);
+		printed += scnprintf(bf + printed,
+				size - printed, "0x%" PRIx64, sample->addr);
+
+	return printed;
 }
 
 static int trace__pgfault(struct trace *trace,
@@ -1823,13 +1860,22 @@ static int trace__pgfault(struct trace *trace,
 	struct addr_location al;
 	char map_type = 'd';
 	struct thread_trace *ttrace;
+	size_t printed = 0;
 	int err = -1;
 	int callchain_ret = 0;
 
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+	if (!thread)
+		goto out;
 
-	if (sample->callchain) {
-		callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+	ttrace = thread__trace(thread, trace->output);
+	if (ttrace == NULL)
+		goto out_put;
+
+	if (!ttrace->pgfault_entry_time && sample->callchain) {
+		callchain_ret =
+			trace__resolve_callchain(trace, evsel,
+						sample, &callchain_cursor);
 		if (callchain_ret == 0) {
 			if (callchain_cursor.nr < trace->min_stack)
 				goto out_put;
@@ -1837,10 +1883,6 @@ static int trace__pgfault(struct trace *trace,
 		}
 	}
 
-	ttrace = thread__trace(thread, trace->output);
-	if (ttrace == NULL)
-		goto out_put;
-
 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
 		ttrace->pfmaj++;
 	else
@@ -1849,18 +1891,27 @@ static int trace__pgfault(struct trace *trace,
 	if (trace->summary_only)
 		goto out;
 
+	if (ttrace->pgfault_entry_str == NULL) {
+		ttrace->pgfault_entry_str = malloc(trace__entry_str_size);
+		if (!ttrace->pgfault_entry_str)
+			goto out_put;
+		ttrace->pgfault_entry_str[0] = '\0';
+	}
+
 	thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
 
-	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
-
-	fprintf(trace->output, "%sfault [",
-		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
-		"maj" : "min");
+	printed += scnprintf(ttrace->pgfault_entry_str + printed,
+			trace__entry_str_size - printed, "%sfault [",
+			evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+			"maj" : "min");
 
-	print_location(trace->output, sample, &al, false, true);
+	printed += scnprintf_location(ttrace->pgfault_entry_str + printed,
+				trace__entry_str_size - printed,
+				sample, &al, false, true);
 
-	fprintf(trace->output, "] => ");
+	printed += scnprintf(ttrace->pgfault_entry_str + printed,
+			trace__entry_str_size - printed, "] => ");
 
 	thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
 				   sample->addr, &al);
@@ -1875,14 +1926,97 @@ static int trace__pgfault(struct trace *trace,
 			map_type = '?';
 	}
 
-	print_location(trace->output, sample, &al, true, false);
+	printed += scnprintf_location(ttrace->pgfault_entry_str + printed,
+				trace__entry_str_size - printed,
+				sample, &al, true, false);
+
+	printed += scnprintf(ttrace->pgfault_entry_str + printed,
+			trace__entry_str_size - printed,
+			" (%c%c)", map_type, al.level);
+
+	if (!ttrace->pgfault_entry_time) {
+		trace__fprintf_entry_head(trace, thread,
+					0, sample->time, trace->output);
+		fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str);
+
+		if (callchain_ret > 0)
+			trace__fprintf_callchain(trace, sample);
+		else if (callchain_ret < 0)
+			pr_err("Problem processing %s callchain, skipping...\n",
+				perf_evsel__name(evsel));
+	}
+
+out:
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
+}
+
+static int trace__pgfault_exit(struct trace *trace, struct perf_evsel *evsel,
+			      union perf_event *event __maybe_unused,
+			      struct perf_sample *sample)
+{
+	struct thread *thread;
+	struct thread_trace *ttrace;
+	u64 duration = 0;
+	int err = -1, callchain_ret = 0;
+
+	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+	ttrace = thread__priv(thread);
+	if (!ttrace)
+		goto out_put;
+
+	if (!ttrace->pgfault_entry_time)
+		goto out;
+
+	/*
+	 * The check below is necessary for a specific case: it is
+	 * possible to enable only major (or minor) page faults
+	 * software events but it is impossible to filter page-fault
+	 * related tracepoints according to the major / minor
+	 * characteristic.
+	 */
+
+	if (!ttrace->pgfault_entry_str ||
+		strlen(ttrace->pgfault_entry_str) == 0)
+		goto out;
+
+	if (sample->callchain) {
+		callchain_ret =
+			trace__resolve_callchain(trace, evsel,
+						sample, &callchain_cursor);
+		if (callchain_ret == 0) {
+			if (callchain_cursor.nr < trace->min_stack)
+				goto out;
+			callchain_ret = 1;
+		}
+	}
+
+	if (ttrace->pgfault_entry_time) {
+		duration = sample->time - ttrace->pgfault_entry_time;
+		if (trace__filter_duration(trace, duration))
+			goto out;
+	}
+
+	trace__fprintf_entry_head(trace, thread,
+				duration, sample->time, trace->output);
 
-	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+	fprintf(trace->output, "%-70s\n", ttrace->pgfault_entry_str);
+
+	/*
+	 * Once the string is printed; clear it just in case the next
+	 * software events is filtered (because of major / minor.
+	 */
+	ttrace->pgfault_entry_str[0] = '\0';
 
 	if (callchain_ret > 0)
 		trace__fprintf_callchain(trace, sample);
 	else if (callchain_ret < 0)
-		pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+		pr_err("Problem processing %s callchain, skipping...\n",
+			perf_evsel__name(evsel));
+
 out:
 	err = 0;
 out_put:
@@ -2062,6 +2196,43 @@ static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
 	return evsel;
 }
 
+static int trace__add_pgfault_newtp(struct trace *trace)
+{
+	int err = -1;
+	struct perf_evlist *evlist = trace->evlist;
+
+	/*
+	 * The tracepoints exceptions::page_fault_* are not available
+	 * on all architecture.; so, we consider it is not an error if
+	 * the 1st one's initialization is KO...
+	 */
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_kernel_exit", trace__pgfault_exit);
+	if (err < 0)
+		return 0;
+
+	/*
+	 * ...however, if the 2nd, 3rd or 4th init is KO; there is
+	 * definitely something wrong not related with the absence of
+	 * tracepoints
+	 */
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_user_exit", trace__pgfault_exit);
+	if (err < 0)
+		return -1;
+
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_kernel_entry",
+				trace__pgfault_enter);
+	if (err < 0)
+		return -1;
+
+	err = perf_evlist__add_newtp(evlist, "exceptions",
+				"page_fault_user_entry", trace__pgfault_enter);
+
+	return err;
+}
+
 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
 {
 	const u32 type = event->header.type;
@@ -2193,9 +2364,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 		perf_evlist__add(evlist, pgfault_min);
 	}
 
+	if (trace->trace_pgfaults && trace__add_pgfault_newtp(trace))
+		goto out_error_pgfault_tp;
+
 	if (trace->sched &&
-	    perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
-				   trace__sched_stat_runtime))
+		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+				trace__sched_stat_runtime))
 		goto out_error_sched_stat_runtime;
 
 	err = perf_evlist__create_maps(evlist, &trace->opts.target);
@@ -2399,6 +2573,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
 	goto out_error;
 
+out_error_pgfault_tp:
+	tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf),
+				"exceptions", "page_fault_(kernel|user)");
+	goto out_error;
+
 out_error_mmap:
 	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
 	goto out_error;
-- 
2.10.2

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ