lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <tip-f14445ee72c59f32aa5cbf4d0f0330a5f62a752d@git.kernel.org>
Date:	Tue, 29 Sep 2015 01:45:56 -0700
From:	tip-bot for Adrian Hunter <tipbot@...or.com>
To:	linux-tip-commits@...r.kernel.org
Cc:	jolsa@...hat.com, hpa@...or.com, ak@...ux.intel.com,
	acme@...hat.com, adrian.hunter@...el.com, mingo@...nel.org,
	tglx@...utronix.de, linux-kernel@...r.kernel.org
Subject: [tip:perf/core] perf intel-pt: Support generating branch stack

Commit-ID:  f14445ee72c59f32aa5cbf4d0f0330a5f62a752d
Gitweb:     http://git.kernel.org/tip/f14445ee72c59f32aa5cbf4d0f0330a5f62a752d
Author:     Adrian Hunter <adrian.hunter@...el.com>
AuthorDate: Fri, 25 Sep 2015 16:15:45 +0300
Committer:  Arnaldo Carvalho de Melo <acme@...hat.com>
CommitDate: Mon, 28 Sep 2015 16:59:14 -0300

perf intel-pt: Support generating branch stack

Add support for generating branch stack context for PT samples.  The
decoder reports a configurable number of branches as branch context for
each sample. Internally it keeps track of them by using a simple sliding
window.  We also flush the last branch buffer on each sample to avoid
overlapping intervals.

This is useful for:

- Reporting accurate basic block edge frequencies through the perf
  report branch view
- Using with --branch-history to get the wider context of samples
- Other users of LBRs

Also the Documentation is updated.

Examples:

	Record with Intel PT:

		perf record -e intel_pt//u ls

	Branch stacks are used by default if synthesized so:

		perf report --itrace=ile

	is the same as:

		perf report --itrace=ile -b

	Branch history can be requested also:

		perf report --itrace=igle --branch-history

Based-on-patch-by: Andi Kleen <ak@...ux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
Cc: Jiri Olsa <jolsa@...hat.com>
Link: http://lkml.kernel.org/r/1443186956-18718-15-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@...hat.com>
---
 tools/perf/Documentation/intel-pt.txt |  10 +++
 tools/perf/util/intel-pt.c            | 115 ++++++++++++++++++++++++++++++++++
 2 files changed, 125 insertions(+)

diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 886612b..a0fbb5d 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -671,6 +671,7 @@ The letters are:
 	e	synthesize tracing error events
 	d	create a debug log
 	g	synthesize a call chain (use with i or x)
+	l	synthesize last branch entries (use with i or x)
 
 "Instructions" events look like they were recorded by "perf record -e
 instructions".
@@ -718,6 +719,15 @@ transactions events can be specified. e.g.
 	--itrace=ig32
 	--itrace=xg32
 
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+       --itrace=il10
+       --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
 To disable trace decoding entirely, use the option --no-itrace.
 
 
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 2c01e72..05e8fcc51 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -22,6 +22,7 @@
 #include "../perf.h"
 #include "session.h"
 #include "machine.h"
+#include "sort.h"
 #include "tool.h"
 #include "event.h"
 #include "evlist.h"
@@ -115,6 +116,9 @@ struct intel_pt_queue {
 	void *decoder;
 	const struct intel_pt_state *state;
 	struct ip_callchain *chain;
+	struct branch_stack *last_branch;
+	struct branch_stack *last_branch_rb;
+	size_t last_branch_pos;
 	union perf_event *event_buf;
 	bool on_heap;
 	bool stop;
@@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 			goto out_free;
 	}
 
+	if (pt->synth_opts.last_branch) {
+		size_t sz = sizeof(struct branch_stack);
+
+		sz += pt->synth_opts.last_branch_sz *
+		      sizeof(struct branch_entry);
+		ptq->last_branch = zalloc(sz);
+		if (!ptq->last_branch)
+			goto out_free;
+		ptq->last_branch_rb = zalloc(sz);
+		if (!ptq->last_branch_rb)
+			goto out_free;
+	}
+
 	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
 	if (!ptq->event_buf)
 		goto out_free;
@@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 
 out_free:
 	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->last_branch_rb);
 	zfree(&ptq->chain);
 	free(ptq);
 	return NULL;
@@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv)
 	thread__zput(ptq->thread);
 	intel_pt_decoder_free(ptq->decoder);
 	zfree(&ptq->event_buf);
+	zfree(&ptq->last_branch);
+	zfree(&ptq->last_branch_rb);
 	zfree(&ptq->chain);
 	free(ptq);
 }
@@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
 	return 0;
 }
 
+static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	struct branch_stack *bs_src = ptq->last_branch_rb;
+	struct branch_stack *bs_dst = ptq->last_branch;
+	size_t nr = 0;
+
+	bs_dst->nr = bs_src->nr;
+
+	if (!bs_src->nr)
+		return;
+
+	nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
+	memcpy(&bs_dst->entries[0],
+	       &bs_src->entries[ptq->last_branch_pos],
+	       sizeof(struct branch_entry) * nr);
+
+	if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
+		memcpy(&bs_dst->entries[nr],
+		       &bs_src->entries[0],
+		       sizeof(struct branch_entry) * ptq->last_branch_pos);
+	}
+}
+
+static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	ptq->last_branch_pos = 0;
+	ptq->last_branch_rb->nr = 0;
+}
+
+static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
+{
+	const struct intel_pt_state *state = ptq->state;
+	struct branch_stack *bs = ptq->last_branch_rb;
+	struct branch_entry *be;
+
+	if (!ptq->last_branch_pos)
+		ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
+
+	ptq->last_branch_pos -= 1;
+
+	be              = &bs->entries[ptq->last_branch_pos];
+	be->from        = state->from_ip;
+	be->to          = state->to_ip;
+	be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
+	be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
+	/* No support for mispredict */
+
+	if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
+		bs->nr += 1;
+}
+
 static int intel_pt_inject_event(union perf_event *event,
 				 struct perf_sample *sample, u64 type,
 				 bool swapped)
@@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	struct intel_pt *pt = ptq->pt;
 	union perf_event *event = ptq->event_buf;
 	struct perf_sample sample = { .ip = 0, };
+	struct dummy_branch_stack {
+		u64			nr;
+		struct branch_entry	entries;
+	} dummy_bs;
 
 	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
 		return 0;
@@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
 	sample.flags = ptq->flags;
 	sample.insn_len = ptq->insn_len;
 
+	/*
+	 * perf report cannot handle events without a branch stack when using
+	 * SORT_MODE__BRANCH so make a dummy one.
+	 */
+	if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+		dummy_bs = (struct dummy_branch_stack){
+			.nr = 1,
+			.entries = {
+				.from = sample.ip,
+				.to = sample.addr,
+			},
+		};
+		sample.branch_stack = (struct branch_stack *)&dummy_bs;
+	}
+
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
 					    pt->branches_sample_type,
@@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 		sample.callchain = ptq->chain;
 	}
 
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample.branch_stack = ptq->last_branch;
+	}
+
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
 					    pt->instructions_sample_type,
@@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
 		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
 		       ret);
 
+	if (pt->synth_opts.last_branch)
+		intel_pt_reset_last_branch_rb(ptq);
+
 	return ret;
 }
 
@@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 		sample.callchain = ptq->chain;
 	}
 
+	if (pt->synth_opts.last_branch) {
+		intel_pt_copy_last_branch_rb(ptq);
+		sample.branch_stack = ptq->last_branch;
+	}
+
 	if (pt->synth_opts.inject) {
 		ret = intel_pt_inject_event(event, &sample,
 					    pt->transactions_sample_type,
@@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
 		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
 		       ret);
 
+	if (pt->synth_opts.callchain)
+		intel_pt_reset_last_branch_rb(ptq);
+
 	return ret;
 }
 
@@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 			return err;
 	}
 
+	if (pt->synth_opts.last_branch)
+		intel_pt_update_last_branch_rb(ptq);
+
 	if (!pt->sync_switch)
 		return 0;
 
@@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		pt->instructions_sample_period = attr.sample_period;
 		if (pt->synth_opts.callchain)
 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+		if (pt->synth_opts.last_branch)
+			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
 		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
 			 id, (u64)attr.sample_type);
 		err = intel_pt_synth_event(session, &attr, id);
@@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		attr.sample_period = 1;
 		if (pt->synth_opts.callchain)
 			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+		if (pt->synth_opts.last_branch)
+			attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
 		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
 			 id, (u64)attr.sample_type);
 		err = intel_pt_synth_event(session, &attr, id);
@@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
 		attr.sample_period = 1;
 		attr.sample_type |= PERF_SAMPLE_ADDR;
 		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
+		attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
 		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
 			 id, (u64)attr.sample_type);
 		err = intel_pt_synth_event(session, &attr, id);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ