lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20140322020156.GA12474@us.ibm.com>
Date:	Fri, 21 Mar 2014 19:01:56 -0700
From:	Sukadev Bhattiprolu <sukadev@...ux.vnet.ibm.com>
To:	Arnaldo Carvalho de Melo <acme@...stprotocols.net>
Cc:	Jiri Olsa <jolsa@...hat.com>, Paul Mackerras <paulus@...ba.org>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Anton Blanchard <anton@....ibm.com>, linuxppc-dev@...abs.org,
	linux-kernel@...r.kernel.org
Subject: [RFC][PATCH] perf: Add 'merge-recursive' callchain option


>From 9ad9432dab2bf4d1c8e6ff9201e88d5ae9f3994a Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@...ux.vnet.ibm.com>
Date: Wed, 19 Mar 2014 20:24:22 -0500
Subject: [PATCH 1/1] perf: Add 'merge-recursive' callchain option

Powerpc saves the link register (LR) with each sample to help resolve
callchains for programs involving tail calls. Sometimes the value in the
LR is same as the NIP register. Other times it is not. This results in
callchains samples like these:

3547953334790 0x1ec0 [0x78]: PERF_RECORD_SAMPLE(IP, 2): 4667/4667: 0x80a7be3c58 period: 1773985 addr: 0
... chain: nr:9
.....  0: fffffffffffffe00
.....  1: 00000080a7be3c58	__random
.....  2: 00000080a7be3c40	__random
.....  3: 00000080a7be4270	rand
.....  4: 0000000010000784	do_my_sprintf
.....  5: 00000000100009d8	main
.....  6: 00000080a7bc482c
.....  7: 00000080a7bc4a34
.....  8: 0000000000000000
 ... thread: sprintft:4667
 ...... dso: /usr/lib64/libc-2.18.so

67470723107802 0x2f10 [0x78]: PERF_RECORD_SAMPLE(IP, 0x2): 5706/5706: 0x80a7be3c20 period: 872629 addr: 0
... chain: nr:9
.....  0: fffffffffffffe00
.....  1: 00000080a7be3c20	__random
.....  2: 00000080a7be4270	rand
.....  3: 00000080a7be4270	rand
.....  4: 0000000010000784
.....  5: 00000000100009d8
.....  6: 00000080a7bc482c
.....  7: 00000080a7bc4a34
.....  8: 0000000000000000
 ... thread: sprintft:5706
 ...... dso: /usr/lib64/libc-2.18.so

67470738362072 0x4cf8 [0x78]: PERF_RECORD_SAMPLE(IP, 0x2): 5706/5706: 0x80a7be3c14 period: 869774 addr: 0
... chain: nr:9
.....  0: fffffffffffffe00
.....  1: 00000080a7be3c14	__random
.....  2: 00000080a7be4270	rand
.....  3: 00000080a7be4270	rand
.....  4: 0000000010000784	do_my_sprintf
.....  5: 00000000100009d8	main
.....  6: 00000080a7bc482c
.....  7: 00000080a7bc4a34
.....  8: 0000000000000000
 ... thread: sprintft:5706
 ...... dso: /usr/lib64/libc-2.18.so

In the perf tool, these samples end up on different branches of the RB-Tree
resulting in duplicated arcs in the final call-graph.

    15.02%  sprintft  libc-2.18.so       [.] __random
            |
            --- __random
               |
               |--58.45%-- rand
               |          rand
               |          do_my_sprintf
               |          main
               |          generic_start_main.isra.0
               |          __libc_start_main
               |          0x0
               |
                --41.55%-- __random
                          rand
                          do_my_sprintf
                          main
                          generic_start_main.isra.0
                          __libc_start_main
                          0x0

This is an RFC for adding a 'merge-recursive' call-graph option that would
discard the duplicate entries resulting in a more compact call-graph. The
duplicates can be either identical instruction pointer values or IP values
within the same function.

	perf report --call-graph=fractal,0.5,callee,function,merge-recursive

    15.02%  sprintft  libc-2.18.so       [.] __random
            |
            ---__random
               rand
               do_my_sprintf
               main
               generic_start_main.isra.0
               __libc_start_main
                (nil)

This option could also be used to collapse call-graphs of recursive functions.

AFAICT, the existing CCKEY_FUNCTION does not address this case because
the callchain lengths can vary across samples.

Signed-off-by: Sukadev Bhattiprolu <sukadev@...ux.vnet.ibm.com>
---
 tools/perf/builtin-report.c |   14 ++++++++++++--
 tools/perf/util/callchain.h |    1 +
 tools/perf/util/machine.c   |   39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index d882b6f..0b68749 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -647,6 +647,16 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 		callchain_param.key = CCKEY_ADDRESS;
 	else
 		return -1;
+
+	tok2 = strtok(NULL, ",");
+	if (!tok2)
+		goto setup;
+
+	if (!strncmp(tok2, "merge-recursive", strlen("merge-recursive")))
+		callchain_param.merge_recursive = 1;
+	else
+		return -1;
+
 setup:
 	if (callchain_register_param(&callchain_param) < 0) {
 		pr_err("Can't register callchain params\n");
@@ -762,8 +772,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "regex filter to identify parent, see: '--sort parent'"),
 	OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
 		    "Only display entries with parent-match"),
-	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
-		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
+	OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order,merge-recursive",
+		     "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address), merge or don't merge recursive calls"
 		     "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
 	OPT_INTEGER(0, "max-stack", &report.max_stack,
 		    "Set the maximum stack depth when parsing the callchain, "
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 8ad97e9..d8d0822 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -53,6 +53,7 @@ struct callchain_param {
 	sort_chain_func_t	sort;
 	enum chain_order	order;
 	enum chain_key		key;
+	u32			merge_recursive;
 };
 
 struct callchain_list {
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index eb26544..e74ad95 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1272,6 +1272,32 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static int duplicate_symbol(struct symbol *sym, char **prev_name)
+{
+	char *prev = *prev_name;
+
+	if (sym) {
+		if (prev) {
+			if (!strcmp(sym->name, prev))
+				return 1;		/* duplicate */
+			free(prev);
+		}
+
+		*prev_name = prev = strdup(sym->name);
+		if (!prev) {
+			pr_debug("%s: Unable to alloc memory\n", __func__);
+			return -ENOMEM;
+		}
+	} else if (prev) {
+		/* new symbol is NULL, so forget prev name */
+		free(prev);
+		*prev_name = NULL;
+	}
+	
+	/* Not duplicate */
+	return 0;
+}
+
 static int machine__resolve_callchain_sample(struct machine *machine,
 					     struct thread *thread,
 					     struct ip_callchain *chain,
@@ -1283,6 +1309,8 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 	int chain_nr = min(max_stack, (int)chain->nr);
 	int i;
 	int err;
+	u64 prev_ip = 0;
+	char *prev_name = NULL;
 
 	callchain_cursor_reset(&callchain_cursor);
 
@@ -1324,6 +1352,10 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 			continue;
 		}
 
+		if (callchain_param.merge_recursive && prev_ip == ip)
+			continue;
+		prev_ip = ip;
+
 		al.filtered = false;
 		thread__find_addr_location(thread, machine, cpumode,
 					   MAP__FUNCTION, ip, &al);
@@ -1340,6 +1372,13 @@ static int machine__resolve_callchain_sample(struct machine *machine,
 			}
 		}
 
+		if (callchain_param.merge_recursive) {
+			if ((err = duplicate_symbol(al.sym, &prev_name)) > 0)
+				continue;	/* duplicate */
+			else if (err < 0)
+				return err;
+		}
+
 		err = callchain_cursor_append(&callchain_cursor,
 					      ip, al.map, al.sym);
 		if (err)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ