lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150511132211.GA28183@kernel.org>
Date:	Mon, 11 May 2015 10:22:11 -0300
From:	Arnaldo Carvalho de Melo <acme@...nel.org>
To:	Adrian Hunter <adrian.hunter@...el.com>
Cc:	Peter Zijlstra <peterz@...radead.org>,
	linux-kernel@...r.kernel.org, David Ahern <dsahern@...il.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Jiri Olsa <jolsa@...hat.com>,
	Namhyung Kim <namhyung@...il.com>,
	Stephane Eranian <eranian@...gle.com>
Subject: Re: [PATCH V4 14/24] perf tools: Add Intel PT decoder

Em Thu, Apr 30, 2015 at 05:37:37PM +0300, Adrian Hunter escreveu:
> Add support for decoding an Intel Processor Trace.

Thanks for the function comments in kerneldoc style, we need more of that!

Some issues below:

- Arnaldo
 
> Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
> ---
>  tools/perf/util/intel-pt-decoder/Build             |    2 +-
>  .../perf/util/intel-pt-decoder/intel-pt-decoder.c  | 1738 ++++++++++++++++++++
>  .../perf/util/intel-pt-decoder/intel-pt-decoder.h  |   89 +
>  3 files changed, 1828 insertions(+), 1 deletion(-)
>  create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
>  create mode 100644 tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> 
> diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
> index 587321a..fa12eac 100644
> --- a/tools/perf/util/intel-pt-decoder/Build
> +++ b/tools/perf/util/intel-pt-decoder/Build
> @@ -1,4 +1,4 @@
> -libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o
> +libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
>  
>  inat_tables_script = ../../arch/x86/tools/gen-insn-attr-x86.awk
>  inat_tables_maps = ../../arch/x86/lib/x86-opcode-map.txt
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> new file mode 100644
> index 0000000..435b61b
> --- /dev/null
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
> @@ -0,0 +1,1738 @@
> +/*
> + * intel_pt_decoder.c: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +#include <stdlib.h>
> +#include <stdbool.h>
> +#include <string.h>
> +#include <errno.h>
> +#include <stdint.h>
> +#include <inttypes.h>
> +
> +#include "intel-pt-insn-decoder.h"
> +#include "intel-pt-pkt-decoder.h"
> +#include "intel-pt-decoder.h"
> +#include "intel-pt-log.h"
> +
> +#define INTEL_PT_BLK_SIZE 1024
> +
> +#define BIT63 (((uint64_t)1 << 63))
> +
> +#define INTEL_PT_RETURN 1
> +
> +struct intel_pt_blk {
> +	struct intel_pt_blk *prev;
> +	uint64_t ip[INTEL_PT_BLK_SIZE];
> +};
> +
> +struct intel_pt_stack {
> +	struct intel_pt_blk *blk;
> +	struct intel_pt_blk *spare;
> +	int pos;
> +};
> +
> +enum intel_pt_pkt_state {
> +	INTEL_PT_STATE_NO_PSB,
> +	INTEL_PT_STATE_NO_IP,
> +	INTEL_PT_STATE_ERR_RESYNC,
> +	INTEL_PT_STATE_IN_SYNC,
> +	INTEL_PT_STATE_TNT,
> +	INTEL_PT_STATE_TIP,
> +	INTEL_PT_STATE_TIP_PGD,
> +	INTEL_PT_STATE_FUP,
> +	INTEL_PT_STATE_FUP_NO_TIP,
> +};
> +
> +#ifdef INTEL_PT_STRICT
> +#define INTEL_PT_STATE_ERR1	INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_NO_PSB
> +#define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_NO_PSB
> +#else
> +#define INTEL_PT_STATE_ERR1	(decoder->pkt_state)
> +#define INTEL_PT_STATE_ERR2	INTEL_PT_STATE_NO_IP
> +#define INTEL_PT_STATE_ERR3	INTEL_PT_STATE_ERR_RESYNC
> +#define INTEL_PT_STATE_ERR4	INTEL_PT_STATE_IN_SYNC
> +#endif
> +
> +struct intel_pt_decoder {
> +	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
> +	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
> +			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
> +			 uint64_t max_insn_cnt, void *data);
> +	void *data;
> +	struct intel_pt_state state;
> +	const unsigned char *buf;
> +	size_t len;
> +	bool return_compression;
> +	bool pge;
> +	uint64_t pos;
> +	uint64_t last_ip;
> +	uint64_t ip;
> +	uint64_t cr3;
> +	uint64_t timestamp;
> +	uint64_t tsc_timestamp;
> +	uint64_t ref_timestamp;
> +	uint64_t ret_addr;
> +	struct intel_pt_stack stack;
> +	enum intel_pt_pkt_state pkt_state;
> +	struct intel_pt_pkt packet;
> +	struct intel_pt_pkt tnt;
> +	int pkt_step;
> +	int pkt_len;
> +	unsigned int cbr;
> +	int exec_mode;
> +	unsigned int insn_bytes;
> +	uint64_t sign_bit;
> +	uint64_t sign_bits;
> +	uint64_t period;
> +	enum intel_pt_period_type period_type;
> +	uint64_t period_insn_cnt;
> +	uint64_t period_mask;
> +	uint64_t period_ticks;
> +	uint64_t last_masked_timestamp;
> +	bool continuous_period;
> +	bool overflow;
> +	bool set_fup_tx_flags;
> +	unsigned int fup_tx_flags;
> +	unsigned int tx_flags;
> +	uint64_t timestamp_insn_cnt;
> +	const unsigned char *next_buf;
> +	size_t next_len;
> +	unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
> +};
> +
> +static uint64_t intel_pt_lower_power_of_2(uint64_t x)
> +{
> +	int i;
> +
> +	for (i = 0; x != 1; i++)
> +		x >>= 1;
> +
> +	return x << i;
> +}

We have in tools/perf/ the same function used in the kernel:

/**
 * rounddown_pow_of_two - round the given value down to nearest power of
 * two
 * @n - parameter
 *
 * round the given value down to the nearest power of two
 * - the result is undefined when n == 0
 * - this can be used to initialise global variables from constant data
 */
#define rounddown_pow_of_two(n)                 \
(                                               \
        __builtin_constant_p(n) ? (             \
                (1UL << ilog2(n))) :            \
        __rounddown_pow_of_two(n)               \
 )

> +
> +static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
> +{
> +	if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
> +		uint64_t period;
> +
> +		period = intel_pt_lower_power_of_2(decoder->period);
> +		decoder->period_mask = ~(period - 1);
> +		decoder->period_ticks = period;
> +	}
> +}
> +
> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
> +{
> +	struct intel_pt_decoder *decoder;
> +
> +	if (!params->get_trace || !params->walk_insn)
> +		return NULL;
> +
> +	decoder = malloc(sizeof(struct intel_pt_decoder));
> +	if (!decoder)
> +		return NULL;
> +
> +	memset(decoder, 0, sizeof(struct intel_pt_decoder));


We have either zalloc or calloc for the above sequence of allocating +
zeroing.

> +
> +	decoder->get_trace = params->get_trace;
> +	decoder->walk_insn = params->walk_insn;
> +	decoder->data = params->data;
> +	decoder->return_compression = params->return_compression;

I am not strict about this, but its common in reviews to ask for
aligning the =.

> +
> +	decoder->sign_bit = (uint64_t)1 << 47;
> +	decoder->sign_bits = ~(((uint64_t)1 << 48) - 1);
> +
> +	decoder->period = params->period;
> +	decoder->period_type = params->period_type;
> +
> +	intel_pt_setup_period(decoder);
> +
> +	return decoder;
> +}
> +
> +static void intel_pt_pop_blk(struct intel_pt_stack *stack)
> +{
> +	struct intel_pt_blk *blk;
> +
> +	blk = stack->blk;


Also not strict about this, but this makes the function shorter:

+	struct intel_pt_blk *blk = stack->blk;

> +	stack->blk = blk->prev;
> +	if (!stack->spare)
> +		stack->spare = blk;
> +	else
> +		free(blk);
> +}
> +
> +static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
> +{
> +	if (!stack->pos) {
> +		if (!stack->blk)
> +			return 0;
> +		intel_pt_pop_blk(stack);
> +		if (!stack->blk)
> +			return 0;
> +		stack->pos = INTEL_PT_BLK_SIZE;
> +	}
> +	return stack->blk->ip[--stack->pos];
> +}
> +
> +static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
> +{
> +	struct intel_pt_blk *blk;
> +
> +	if (stack->spare) {
> +		blk = stack->spare;
> +		stack->spare = NULL;
> +	} else {
> +		blk = malloc(sizeof(struct intel_pt_blk));
> +		if (!blk)
> +			return -ENOMEM;
> +	}
> +
> +	blk->prev = stack->blk;
> +	stack->blk = blk;
> +	stack->pos = 0;
> +	return 0;
> +}
> +
> +static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
> +{
> +	int err;
> +
> +	if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
> +		err = intel_pt_alloc_blk(stack);
> +		if (err)
> +			return err;
> +	}
> +
> +	stack->blk->ip[stack->pos++] = ip;
> +	return 0;
> +}


All of those routines is not "intel_pt" specific at all, right?

> +
> +static void intel_pt_clear_stack(struct intel_pt_stack *stack)
> +{
> +	while (stack->blk)
> +		intel_pt_pop_blk(stack);
> +	stack->pos = 0;
> +}
> +
> +static void intel_pt_free_stack(struct intel_pt_stack *stack)
> +{
> +	intel_pt_clear_stack(stack);
> +	free(stack->blk);
> +	free(stack->spare);
> +}

zfree was introduced to zero out these variables, i.e.:

	zfree(&stack->blk);
	zfree(&stack->spare);

> +
> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
> +{
> +	intel_pt_free_stack(&decoder->stack);
> +	free(decoder);
> +}
> +
> +const char *intel_pt_error_message(int code)
> +{
> +	switch (code) {
> +	case ENOMEM:
> +		return "Memory allocation failed";
> +	case ENOSYS:
> +		return "Internal error";
> +	case EBADMSG:
> +		return "Bad packet";
> +	case ENODATA:
> +		return "No more data";
> +	case EILSEQ:
> +		return "Failed to get instruction";
> +	case ENOENT:
> +		return "Trace doesn't match instruction";
> +	case EOVERFLOW:
> +		return "Overflow packet";
> +	case ESHUTDOWN:
> +		return "Trace stop packet";
> +	default:
> +		return "Unknown error!";
> +	}


The above idiom uses intel_pt__strerror(int err) {} elsewhere, i.e. a
way to map a errno to a string ios called "strerror", see for instance:

[acme@zoo linux]$ grep __strerror tools/perf/*.c
tools/perf/builtin-kvm.c:		target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
tools/perf/builtin-record.c:		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
tools/perf/builtin-record.c:		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
tools/perf/builtin-top.c:				dso__strerror_load(al.map->dso, serr, sizeof(serr));
tools/perf/builtin-top.c:		target__strerror(target, status, errbuf, BUFSIZ);
tools/perf/builtin-top.c:		target__strerror(target, status, errbuf, BUFSIZ);
tools/perf/builtin-trace.c:	debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
tools/perf/builtin-trace.c:	debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
tools/perf/builtin-trace.c:	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
tools/perf/builtin-trace.c:	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
tools/perf/builtin-trace.c:		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
tools/perf/builtin-trace.c:		target__strerror(&trace.opts.target, err, bf, sizeof(bf));
[acme@zoo linux]$ 

Also, we try to be consistennt in separating the class name (intel_pt) from the
method (strerror).

> +}
> +
> +static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
> +				 const struct intel_pt_pkt *packet,
> +				 uint64_t last_ip)
> +{
> +	uint64_t ip;
> +
> +	switch (packet->count) {
> +	case 2:
> +		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
> +		     packet->payload;
> +		break;
> +	case 4:
> +		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
> +		     packet->payload;
> +		break;
> +	case 6:
> +		ip = packet->payload;
> +		break;
> +	default:
> +		return 0;
> +	}
> +
> +	if (ip & decoder->sign_bit)
> +		return ip | decoder->sign_bits;
> +
> +	return ip;
> +}
> +
> +static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
> +{
> +	decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
> +					    decoder->last_ip);
> +}
> +
> +static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
> +{
> +	intel_pt_set_last_ip(decoder);
> +	decoder->ip = decoder->last_ip;
> +}
> +
> +static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
> +{
> +	intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
> +			    decoder->buf);
> +}
> +
> +static int intel_pt_bug(struct intel_pt_decoder *decoder)
> +{
> +	intel_pt_log("ERROR: Internal error\n");
> +	decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
> +	return -ENOSYS;
> +}
> +
> +static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
> +{
> +	decoder->tx_flags = 0;
> +}
> +
> +static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
> +{
> +	decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
> +}
> +
> +static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
> +{
> +	intel_pt_clear_tx_flags(decoder);
> +	decoder->pkt_len = 1;
> +	decoder->pkt_step = 1;
> +	intel_pt_decoder_log_packet(decoder);
> +	if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
> +		intel_pt_log("ERROR: Bad packet\n");


And all this intel_pt_log(), do we really need a separate logging facility?
What is wrong with using pr_err(), pr_warning(), pr_debug(), as tools/ try to
use (we need to improve that more, but still), and the kernel as well?

> +		decoder->pkt_state = INTEL_PT_STATE_ERR1;
> +	}
> +	return -EBADMSG;
> +}
> +
> +static int intel_pt_get_data(struct intel_pt_decoder *decoder)
> +{
> +	struct intel_pt_buffer buffer = { .buf = 0, };
> +	int ret;
> +
> +	decoder->pkt_step = 0;
> +
> +	intel_pt_log("Getting more data\n");
> +	ret = decoder->get_trace(&buffer, decoder->data);
> +	if (ret)
> +		return ret;
> +	decoder->buf = buffer.buf;
> +	decoder->len = buffer.len;
> +	if (!decoder->len) {
> +		intel_pt_log("No more data\n");
> +		return -ENODATA;
> +	}
> +	if (!buffer.consecutive) {
> +		decoder->ip = 0;
> +		decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
> +		decoder->ref_timestamp = buffer.ref_timestamp;
> +		decoder->timestamp = 0;
> +		decoder->state.trace_nr = buffer.trace_nr;
> +		intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
> +			     decoder->ref_timestamp);
> +		return -ENOLINK;
> +	}
> +
> +	return 0;
> +}
> +
> +static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
> +{
> +	if (!decoder->next_buf)
> +		return intel_pt_get_data(decoder);
> +
> +	decoder->buf = decoder->next_buf;
> +	decoder->len = decoder->next_len;
> +	decoder->next_buf = 0;
> +	decoder->next_len = 0;
> +	return 0;
> +}
> +
> +static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
> +{
> +	unsigned char *buf = decoder->temp_buf;
> +	size_t old_len, len, n;
> +	int ret;
> +
> +	old_len = decoder->len;
> +	len = decoder->len;
> +	memcpy(buf, decoder->buf, len);
> +
> +	ret = intel_pt_get_data(decoder);
> +	if (ret) {
> +		decoder->pos += old_len;
> +		return ret < 0 ? ret : -EINVAL;
> +	}
> +
> +	n = INTEL_PT_PKT_MAX_SZ - len;
> +	if (n > decoder->len)
> +		n = decoder->len;
> +	memcpy(buf + len, decoder->buf, n);
> +	len += n;
> +
> +	ret = intel_pt_get_packet(buf, len, &decoder->packet);
> +	if (ret < (int)old_len) {
> +		decoder->next_buf = decoder->buf;
> +		decoder->next_len = decoder->len;
> +		decoder->buf = buf;
> +		decoder->len = old_len;
> +		return intel_pt_bad_packet(decoder);
> +	}
> +
> +	decoder->next_buf = decoder->buf + (ret - old_len);
> +	decoder->next_len = decoder->len - (ret - old_len);
> +
> +	decoder->buf = buf;
> +	decoder->len = ret;
> +
> +	return ret;
> +}
> +
> +static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
> +{
> +	int ret;
> +
> +	do {
> +		decoder->pos += decoder->pkt_step;
> +		decoder->buf += decoder->pkt_step;
> +		decoder->len -= decoder->pkt_step;
> +
> +		if (!decoder->len) {
> +			ret = intel_pt_get_next_data(decoder);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		ret = intel_pt_get_packet(decoder->buf, decoder->len,
> +					  &decoder->packet);
> +		if (ret == INTEL_PT_NEED_MORE_BYTES &&
> +		    decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
> +			ret = intel_pt_get_split_packet(decoder);
> +			if (ret < 0)
> +				return ret;
> +		}
> +		if (ret <= 0)
> +			return intel_pt_bad_packet(decoder);
> +
> +		decoder->pkt_len = ret;
> +		decoder->pkt_step = ret;
> +		intel_pt_decoder_log_packet(decoder);
> +	} while (decoder->packet.type == INTEL_PT_PAD);
> +
> +	return 0;
> +}
> +
> +static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
> +{
> +	uint64_t timestamp, masked_timestamp;
> +
> +	timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
> +	masked_timestamp = timestamp & decoder->period_mask;
> +	if (decoder->continuous_period) {
> +		if (masked_timestamp != decoder->last_masked_timestamp)
> +			return 1;
> +	} else {
> +		timestamp += 1;
> +		masked_timestamp = timestamp & decoder->period_mask;
> +		if (masked_timestamp != decoder->last_masked_timestamp) {
> +			decoder->last_masked_timestamp = masked_timestamp;
> +			decoder->continuous_period = true;
> +		}
> +	}
> +	return decoder->period_ticks - (timestamp - masked_timestamp);
> +}
> +
> +static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
> +{
> +	switch (decoder->period_type) {
> +	case INTEL_PT_PERIOD_INSTRUCTIONS:
> +		return decoder->period - decoder->period_insn_cnt;
> +	case INTEL_PT_PERIOD_TICKS:
> +		return intel_pt_next_period(decoder);
> +	case INTEL_PT_PERIOD_NONE:
> +	default:
> +		return 0;
> +	}
> +}
> +
> +static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
> +{
> +	uint64_t timestamp, masked_timestamp;
> +
> +	switch (decoder->period_type) {
> +	case INTEL_PT_PERIOD_INSTRUCTIONS:
> +		decoder->period_insn_cnt = 0;
> +		break;
> +	case INTEL_PT_PERIOD_TICKS:
> +		timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
> +		masked_timestamp = timestamp & decoder->period_mask;
> +		decoder->last_masked_timestamp = masked_timestamp;
> +		break;
> +	case INTEL_PT_PERIOD_NONE:
> +	default:
> +		break;
> +	}
> +
> +	decoder->state.type |= INTEL_PT_INSTRUCTION;
> +}
> +
> +static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
> +			      struct intel_pt_insn *intel_pt_insn, uint64_t ip)
> +{
> +	uint64_t max_insn_cnt, insn_cnt = 0;
> +	int err;
> +
> +	max_insn_cnt = intel_pt_next_sample(decoder);
> +
> +	err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
> +				 max_insn_cnt, decoder->data);
> +
> +	decoder->timestamp_insn_cnt += insn_cnt;
> +	decoder->period_insn_cnt += insn_cnt;
> +
> +	if (err) {
> +		decoder->pkt_state = INTEL_PT_STATE_ERR2;
> +		intel_pt_log_at("ERROR: Failed to get instruction",
> +				decoder->ip);
> +		if (err == -ENOENT)
> +			return -ENOLINK;
> +		return -EILSEQ;
> +	}
> +
> +	if (ip && decoder->ip == ip) {
> +		err = -EAGAIN;
> +		goto out;
> +	}
> +
> +	if (max_insn_cnt && insn_cnt >= max_insn_cnt)
> +		intel_pt_sample_insn(decoder);
> +
> +	if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
> +		decoder->state.type = INTEL_PT_INSTRUCTION;
> +		decoder->state.from_ip = decoder->ip;
> +		decoder->state.to_ip = 0;
> +		decoder->ip += intel_pt_insn->length;
> +		err = INTEL_PT_RETURN;
> +		goto out;
> +	}
> +
> +	if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
> +		/* Zero-length calls are excluded */
> +		if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
> +		    intel_pt_insn->rel) {
> +			err = intel_pt_push(&decoder->stack, decoder->ip +
> +					    intel_pt_insn->length);
> +			if (err)
> +				goto out;
> +		}
> +	} else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
> +		decoder->ret_addr = intel_pt_pop(&decoder->stack);
> +	}
> +
> +	if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
> +		decoder->state.from_ip = decoder->ip;
> +		decoder->ip += intel_pt_insn->length +
> +				intel_pt_insn->rel;
> +		decoder->state.to_ip = decoder->ip;
> +		err = INTEL_PT_RETURN;
> +	}
> +out:
> +	decoder->state.insn_op = intel_pt_insn->op;
> +	decoder->state.insn_len = intel_pt_insn->length;
> +
> +	if (decoder->tx_flags & INTEL_PT_IN_TX)
> +		decoder->state.flags |= INTEL_PT_IN_TX;
> +
> +	return err;
> +}
> +
> +static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
> +{
> +	struct intel_pt_insn intel_pt_insn;
> +	uint64_t ip;
> +	int err;
> +
> +	ip = decoder->last_ip;
> +
> +	while (1) {
> +		err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
> +		if (err == INTEL_PT_RETURN)
> +			return 0;
> +		if (err == -EAGAIN) {
> +			if (decoder->set_fup_tx_flags) {
> +				decoder->set_fup_tx_flags = false;
> +				decoder->tx_flags = decoder->fup_tx_flags;
> +				decoder->state.type = INTEL_PT_TRANSACTION;
> +				decoder->state.from_ip = decoder->ip;
> +				decoder->state.to_ip = 0;
> +				decoder->state.flags = decoder->fup_tx_flags;
> +				return 0;
> +			}
> +			return err;
> +		}
> +		decoder->set_fup_tx_flags = false;
> +		if (err)
> +			return err;
> +
> +		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> +			intel_pt_log_at("ERROR: Unexpected indirect branch",
> +					decoder->ip);
> +			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +			return -ENOENT;
> +		}
> +
> +		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> +			intel_pt_log_at("ERROR: Unexpected conditional branch",
> +					decoder->ip);
> +			decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +			return -ENOENT;
> +		}
> +
> +		intel_pt_bug(decoder);
> +	}
> +}
> +
> +static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
> +{
> +	struct intel_pt_insn intel_pt_insn;
> +	int err;
> +
> +	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
> +	if (err == INTEL_PT_RETURN)
> +		return 0;
> +	if (err)
> +		return err;
> +
> +	if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> +		if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
> +			decoder->pge = false;
> +			decoder->continuous_period = false;
> +			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +			decoder->state.from_ip = decoder->ip;
> +			decoder->state.to_ip = 0;
> +			if (decoder->packet.count != 0)
> +				decoder->ip = decoder->last_ip;
> +		} else {
> +			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +			decoder->state.from_ip = decoder->ip;
> +			if (decoder->packet.count == 0) {
> +				decoder->state.to_ip = 0;
> +			} else {
> +				decoder->state.to_ip = decoder->last_ip;
> +				decoder->ip = decoder->last_ip;
> +			}
> +		}
> +		return 0;
> +	}
> +
> +	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> +		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
> +				decoder->ip);
> +		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +		return -ENOENT;
> +	}
> +
> +	return intel_pt_bug(decoder);
> +}
> +
> +static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
> +{
> +	struct intel_pt_insn intel_pt_insn;
> +	int err;
> +
> +	while (1) {
> +		err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
> +		if (err == INTEL_PT_RETURN)
> +			return 0;
> +		if (err)
> +			return err;
> +
> +		if (intel_pt_insn.op == INTEL_PT_OP_RET) {
> +			if (!decoder->return_compression) {
> +				intel_pt_log_at("ERROR: RET when expecting conditional branch",
> +						decoder->ip);
> +				decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +				return -ENOENT;
> +			}
> +			if (!decoder->ret_addr) {
> +				intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
> +						decoder->ip);
> +				decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +				return -ENOENT;
> +			}
> +			if (!(decoder->tnt.payload & BIT63)) {
> +				intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
> +						decoder->ip);
> +				decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +				return -ENOENT;
> +			}
> +			decoder->tnt.count -= 1;
> +			if (!decoder->tnt.count)
> +				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +			decoder->tnt.payload <<= 1;
> +			decoder->state.from_ip = decoder->ip;
> +			decoder->ip = decoder->ret_addr;
> +			decoder->state.to_ip = decoder->ip;
> +			return 0;
> +		}
> +
> +		if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
> +			/* Handle deferred TIPs */
> +			err = intel_pt_get_next_packet(decoder);
> +			if (err)
> +				return err;
> +			if (decoder->packet.type != INTEL_PT_TIP ||
> +			    decoder->packet.count == 0) {
> +				intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
> +						decoder->ip);
> +				decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +				decoder->pkt_step = 0;
> +				return -ENOENT;
> +			}
> +			intel_pt_set_last_ip(decoder);
> +			decoder->state.from_ip = decoder->ip;
> +			decoder->state.to_ip = decoder->last_ip;
> +			decoder->ip = decoder->last_ip;
> +			return 0;
> +		}
> +
> +		if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
> +			decoder->tnt.count -= 1;
> +			if (!decoder->tnt.count)
> +				decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +			if (decoder->tnt.payload & BIT63) {
> +				decoder->tnt.payload <<= 1;
> +				decoder->state.from_ip = decoder->ip;
> +				decoder->ip += intel_pt_insn.length +
> +					       intel_pt_insn.rel;
> +				decoder->state.to_ip = decoder->ip;
> +				return 0;
> +			}
> +			/* Instruction sample for a non-taken branch */
> +			if (decoder->state.type & INTEL_PT_INSTRUCTION) {
> +				decoder->tnt.payload <<= 1;
> +				decoder->state.type = INTEL_PT_INSTRUCTION;
> +				decoder->state.from_ip = decoder->ip;
> +				decoder->state.to_ip = 0;
> +				decoder->ip += intel_pt_insn.length;
> +				return 0;
> +			}
> +			decoder->ip += intel_pt_insn.length;
> +			if (!decoder->tnt.count)
> +				return -EAGAIN;
> +			decoder->tnt.payload <<= 1;
> +			continue;
> +		}
> +
> +		return intel_pt_bug(decoder);
> +	}
> +}
> +
> +static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
> +{
> +	unsigned int fup_tx_flags;
> +	int err;
> +
> +	fup_tx_flags = decoder->packet.payload &
> +		       (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
> +	err = intel_pt_get_next_packet(decoder);
> +	if (err)
> +		return err;
> +	if (decoder->packet.type == INTEL_PT_FUP) {
> +		decoder->fup_tx_flags = fup_tx_flags;
> +		decoder->set_fup_tx_flags = true;
> +		if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
> +			*no_tip = true;
> +	} else {
> +		intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
> +				decoder->pos);
> +		intel_pt_update_in_tx(decoder);
> +	}
> +	return 0;
> +}
> +
> +static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
> +{
> +	uint64_t timestamp;
> +
> +	if (decoder->ref_timestamp) {
> +		timestamp = decoder->packet.payload |
> +			    (decoder->ref_timestamp & (0xffULL << 56));
> +		if (timestamp < decoder->ref_timestamp) {
> +			if (decoder->ref_timestamp - timestamp > (1ULL << 55))
> +				timestamp += (1ULL << 56);
> +		} else {
> +			if (timestamp - decoder->ref_timestamp > (1ULL << 55))
> +				timestamp -= (1ULL << 56);
> +		}
> +		decoder->tsc_timestamp = timestamp;
> +		decoder->timestamp = timestamp;
> +		decoder->ref_timestamp = 0;
> +		decoder->timestamp_insn_cnt = 0;
> +	} else if (decoder->timestamp) {
> +		timestamp = decoder->packet.payload |
> +			    (decoder->timestamp & (0xffULL << 56));
> +		if (timestamp < decoder->timestamp &&
> +		    decoder->timestamp - timestamp < 0x100) {
> +			intel_pt_log_to("ERROR: Suppressing backwards timestamp",
> +					timestamp);
> +			timestamp = decoder->timestamp;
> +		}
> +		while (timestamp < decoder->timestamp) {
> +			intel_pt_log_to("Wraparound timestamp", timestamp);
> +			timestamp += (1ULL << 56);
> +		}
> +		decoder->tsc_timestamp = timestamp;
> +		decoder->timestamp = timestamp;
> +		decoder->timestamp_insn_cnt = 0;
> +	}
> +
> +	intel_pt_log_to("Setting timestamp", decoder->timestamp);
> +}
> +
> +static int intel_pt_overflow(struct intel_pt_decoder *decoder)
> +{
> +	intel_pt_log("ERROR: Buffer overflow\n");
> +	intel_pt_clear_tx_flags(decoder);
> +	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
> +	decoder->overflow = true;
> +	return -EOVERFLOW;
> +}
> +
> +/* Walk PSB+ packets when already in sync. */
> +static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	while (1) {
> +		err = intel_pt_get_next_packet(decoder);
> +		if (err)
> +			return err;
> +
> +		switch (decoder->packet.type) {
> +		case INTEL_PT_PSBEND:
> +			return 0;
> +
> +		case INTEL_PT_TIP_PGD:
> +		case INTEL_PT_TIP_PGE:
> +		case INTEL_PT_TIP:
> +		case INTEL_PT_TNT:
> +		case INTEL_PT_BAD:
> +		case INTEL_PT_PSB:
> +			intel_pt_log("ERROR: Unexpected packet\n");
> +			return -EAGAIN;
> +
> +		case INTEL_PT_OVF:
> +			return intel_pt_overflow(decoder);
> +
> +		case INTEL_PT_TSC:
> +			intel_pt_calc_tsc_timestamp(decoder);
> +			break;
> +
> +		case INTEL_PT_CBR:
> +			decoder->cbr = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_EXEC:
> +			decoder->exec_mode = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_PIP:
> +			decoder->cr3 = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_FUP:
> +			decoder->pge = true;
> +			break;
> +
> +		case INTEL_PT_MODE_TSX:
> +			intel_pt_update_in_tx(decoder);
> +			break;
> +
> +		case INTEL_PT_PAD:
> +		default:
> +			break;
> +		}
> +	}
> +}
> +
> +static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
> +		decoder->tx_flags = 0;
> +		decoder->state.flags &= ~INTEL_PT_IN_TX;
> +		decoder->state.flags |= INTEL_PT_ABORT_TX;
> +	} else {
> +		decoder->state.flags |= INTEL_PT_ASYNC;
> +	}
> +
> +	while (1) {
> +		err = intel_pt_get_next_packet(decoder);
> +		if (err)
> +			return err;
> +
> +		switch (decoder->packet.type) {
> +		case INTEL_PT_TNT:
> +		case INTEL_PT_FUP:
> +		case INTEL_PT_PSB:
> +		case INTEL_PT_TSC:
> +		case INTEL_PT_CBR:
> +		case INTEL_PT_MODE_TSX:
> +		case INTEL_PT_BAD:
> +		case INTEL_PT_PSBEND:
> +			intel_pt_log("ERROR: Missing TIP after FUP\n");
> +			decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +			return -ENOENT;
> +
> +		case INTEL_PT_OVF:
> +			return intel_pt_overflow(decoder);
> +
> +		case INTEL_PT_TIP_PGD:
> +			decoder->state.from_ip = decoder->ip;
> +			decoder->state.to_ip = 0;
> +			if (decoder->packet.count != 0) {
> +				intel_pt_set_ip(decoder);
> +				intel_pt_log("Omitting PGD ip " x64_fmt "\n",
> +					     decoder->ip);
> +			}
> +			decoder->pge = false;
> +			decoder->continuous_period = false;
> +			return 0;
> +
> +		case INTEL_PT_TIP_PGE:
> +			decoder->pge = true;
> +			intel_pt_log("Omitting PGE ip " x64_fmt "\n",
> +				     decoder->ip);
> +			decoder->state.from_ip = 0;
> +			if (decoder->packet.count == 0) {
> +				decoder->state.to_ip = 0;
> +			} else {
> +				intel_pt_set_ip(decoder);
> +				decoder->state.to_ip = decoder->ip;
> +			}
> +			return 0;
> +
> +		case INTEL_PT_TIP:
> +			decoder->state.from_ip = decoder->ip;
> +			if (decoder->packet.count == 0) {
> +				decoder->state.to_ip = 0;
> +			} else {
> +				intel_pt_set_ip(decoder);
> +				decoder->state.to_ip = decoder->ip;
> +			}
> +			return 0;
> +
> +		case INTEL_PT_PIP:
> +			decoder->cr3 = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_EXEC:
> +			decoder->exec_mode = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_PAD:
> +			break;
> +
> +		default:
> +			return intel_pt_bug(decoder);
> +		}
> +	}
> +}
> +
> +static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
> +{
> +	bool no_tip = false;
> +	int err;
> +
> +	while (1) {
> +		err = intel_pt_get_next_packet(decoder);
> +		if (err)
> +			return err;
> +next:
> +		switch (decoder->packet.type) {
> +		case INTEL_PT_TNT:
> +			if (!decoder->packet.count)
> +				break;
> +			decoder->tnt = decoder->packet;
> +			decoder->pkt_state = INTEL_PT_STATE_TNT;
> +			err = intel_pt_walk_tnt(decoder);
> +			if (err == -EAGAIN)
> +				break;
> +			return err;
> +
> +		case INTEL_PT_TIP_PGD:
> +			if (decoder->packet.count != 0)
> +				intel_pt_set_last_ip(decoder);
> +			decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
> +			return intel_pt_walk_tip(decoder);
> +
> +		case INTEL_PT_TIP_PGE: {
> +			decoder->pge = true;
> +			if (decoder->packet.count == 0) {
> +				intel_pt_log_at("Skipping zero TIP.PGE",
> +						decoder->pos);
> +				break;
> +			}
> +			intel_pt_set_ip(decoder);
> +			decoder->state.from_ip = 0;
> +			decoder->state.to_ip = decoder->ip;
> +			return 0;
> +		}
> +
> +		case INTEL_PT_OVF:
> +			return intel_pt_overflow(decoder);
> +
> +		case INTEL_PT_TIP:
> +			if (decoder->packet.count != 0)
> +				intel_pt_set_last_ip(decoder);
> +			decoder->pkt_state = INTEL_PT_STATE_TIP;
> +			return intel_pt_walk_tip(decoder);
> +
> +		case INTEL_PT_FUP:
> +			if (decoder->packet.count == 0) {
> +				intel_pt_log_at("Skipping zero FUP",
> +						decoder->pos);
> +				no_tip = false;
> +				break;
> +			}
> +			intel_pt_set_last_ip(decoder);
> +			err = intel_pt_walk_fup(decoder);
> +			if (err != -EAGAIN) {
> +				if (err)
> +					return err;
> +				if (no_tip)
> +					decoder->pkt_state =
> +						INTEL_PT_STATE_FUP_NO_TIP;
> +				else
> +					decoder->pkt_state = INTEL_PT_STATE_FUP;
> +				return 0;
> +			}
> +			if (no_tip) {
> +				no_tip = false;
> +				break;
> +			}
> +			return intel_pt_walk_fup_tip(decoder);
> +
> +		case INTEL_PT_PSB:
> +			intel_pt_clear_stack(&decoder->stack);
> +			err = intel_pt_walk_psbend(decoder);
> +			if (err == -EAGAIN)
> +				goto next;
> +			if (err)
> +				return err;
> +			break;
> +
> +		case INTEL_PT_PIP:
> +			decoder->cr3 = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_TSC:
> +			intel_pt_calc_tsc_timestamp(decoder);
> +			break;
> +
> +		case INTEL_PT_CBR:
> +			decoder->cbr = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_EXEC:
> +			decoder->exec_mode = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_TSX:
> +			/* MODE_TSX need not be followed by FUP */
> +			if (!decoder->pge) {
> +				intel_pt_update_in_tx(decoder);
> +				break;
> +			}
> +			err = intel_pt_mode_tsx(decoder, &no_tip);
> +			if (err)
> +				return err;
> +			goto next;
> +
> +		case INTEL_PT_BAD: /* Does not happen */
> +			return intel_pt_bug(decoder);
> +
> +		case INTEL_PT_PSBEND:
> +		case INTEL_PT_PAD:
> +			break;
> +
> +		default:
> +			return intel_pt_bug(decoder);
> +		}
> +	}
> +}
> +
> +/* Walk PSB+ packets to get in sync. */
> +static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	while (1) {
> +		err = intel_pt_get_next_packet(decoder);
> +		if (err)
> +			return err;
> +
> +		switch (decoder->packet.type) {
> +		case INTEL_PT_TIP_PGD:
> +			decoder->continuous_period = false;
> +		case INTEL_PT_TIP_PGE:
> +		case INTEL_PT_TIP:
> +			intel_pt_log("ERROR: Unexpected packet\n");
> +			return -ENOENT;
> +
> +		case INTEL_PT_FUP:
> +			decoder->pge = true;
> +			if (decoder->last_ip || decoder->packet.count == 6 ||
> +			    decoder->packet.count == 0) {
> +				uint64_t current_ip = decoder->ip;
> +
> +				intel_pt_set_ip(decoder);
> +				if (current_ip)
> +					intel_pt_log_to("Setting IP",
> +							decoder->ip);
> +			}
> +			break;
> +
> +		case INTEL_PT_TSC:
> +			intel_pt_calc_tsc_timestamp(decoder);
> +			break;
> +
> +		case INTEL_PT_CBR:
> +			decoder->cbr = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_PIP:
> +			decoder->cr3 = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_EXEC:
> +			decoder->exec_mode = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_TSX:
> +			intel_pt_update_in_tx(decoder);
> +			break;
> +
> +		case INTEL_PT_TNT:
> +			intel_pt_log("ERROR: Unexpected packet\n");
> +			if (decoder->ip)
> +				decoder->pkt_state = INTEL_PT_STATE_ERR4;
> +			else
> +				decoder->pkt_state = INTEL_PT_STATE_ERR3;
> +			return -ENOENT;
> +
> +		case INTEL_PT_BAD: /* Does not happen */
> +			return intel_pt_bug(decoder);
> +
> +		case INTEL_PT_OVF:
> +			return intel_pt_overflow(decoder);
> +
> +		case INTEL_PT_PSBEND:
> +			return 0;
> +
> +		case INTEL_PT_PSB:
> +		case INTEL_PT_PAD:
> +		default:
> +			break;
> +		}
> +	}
> +}
> +
> +static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	while (1) {
> +		err = intel_pt_get_next_packet(decoder);
> +		if (err)
> +			return err;
> +
> +		switch (decoder->packet.type) {
> +		case INTEL_PT_TIP_PGD:
> +			decoder->continuous_period = false;
> +		case INTEL_PT_TIP_PGE:
> +		case INTEL_PT_TIP:
> +			decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
> +			if (decoder->last_ip || decoder->packet.count == 6 ||
> +			    decoder->packet.count == 0)
> +				intel_pt_set_ip(decoder);
> +			if (decoder->ip)
> +				return 0;
> +			break;
> +
> +		case INTEL_PT_FUP:
> +			if (decoder->overflow) {
> +				if (decoder->last_ip ||
> +				    decoder->packet.count == 6 ||
> +				    decoder->packet.count == 0)
> +					intel_pt_set_ip(decoder);
> +				if (decoder->ip)
> +					return 0;
> +			}
> +			if (decoder->packet.count)
> +				intel_pt_set_last_ip(decoder);
> +			break;
> +
> +		case INTEL_PT_TSC:
> +			intel_pt_calc_tsc_timestamp(decoder);
> +			break;
> +
> +		case INTEL_PT_CBR:
> +			decoder->cbr = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_PIP:
> +			decoder->cr3 = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_EXEC:
> +			decoder->exec_mode = decoder->packet.payload;
> +			break;
> +
> +		case INTEL_PT_MODE_TSX:
> +			intel_pt_update_in_tx(decoder);
> +			break;
> +
> +		case INTEL_PT_OVF:
> +			return intel_pt_overflow(decoder);
> +
> +		case INTEL_PT_BAD: /* Does not happen */
> +			return intel_pt_bug(decoder);
> +
> +		case INTEL_PT_PSB:
> +			err = intel_pt_walk_psb(decoder);
> +			if (err)
> +				return err;
> +			if (decoder->ip) {
> +				/* Do not have a sample */
> +				decoder->state.type = 0;
> +				return 0;
> +			}
> +			break;
> +
> +		case INTEL_PT_TNT:
> +		case INTEL_PT_PSBEND:
> +		case INTEL_PT_PAD:
> +		default:
> +			break;
> +		}
> +	}
> +}
> +
> +static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	intel_pt_log("Scanning for full IP\n");
> +	err = intel_pt_walk_to_ip(decoder);
> +	if (err)
> +		return err;
> +
> +	decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +	decoder->overflow = false;
> +
> +	decoder->state.from_ip = 0;
> +	decoder->state.to_ip = decoder->ip;
> +	intel_pt_log_to("Setting IP", decoder->ip);
> +
> +	return 0;
> +}
> +
> +static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
> +{
> +	const unsigned char *end = decoder->buf + decoder->len;
> +	size_t i;
> +
> +	for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
> +		if (i > decoder->len)
> +			continue;
> +		if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
> +			return i;
> +	}
> +	return 0;
> +}
> +
> +static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
> +{
> +	size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
> +	const char *psb = INTEL_PT_PSB_STR;
> +
> +	if (rest_psb > decoder->len ||
> +	    memcmp(decoder->buf, psb + part_psb, rest_psb))
> +		return 0;
> +
> +	return rest_psb;
> +}
> +
> +static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
> +				  int part_psb)
> +{
> +	int rest_psb, ret;
> +
> +	decoder->pos += decoder->len;
> +	decoder->len = 0;
> +
> +	ret = intel_pt_get_next_data(decoder);
> +	if (ret)
> +		return ret;
> +
> +	rest_psb = intel_pt_rest_psb(decoder, part_psb);
> +	if (!rest_psb)
> +		return 0;
> +
> +	decoder->pos -= part_psb;
> +	decoder->next_buf = decoder->buf + rest_psb;
> +	decoder->next_len = decoder->len - rest_psb;
> +	memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> +	decoder->buf = decoder->temp_buf;
> +	decoder->len = INTEL_PT_PSB_LEN;
> +
> +	return 0;
> +}
> +
> +static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
> +{
> +	unsigned char *next;
> +	int ret;
> +
> +	intel_pt_log("Scanning for PSB\n");
> +	while (1) {
> +		if (!decoder->len) {
> +			ret = intel_pt_get_next_data(decoder);
> +			if (ret)
> +				return ret;
> +		}
> +
> +		next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
> +			      INTEL_PT_PSB_LEN);
> +		if (!next) {
> +			int part_psb;
> +
> +			part_psb = intel_pt_part_psb(decoder);
> +			if (part_psb) {
> +				ret = intel_pt_get_split_psb(decoder, part_psb);
> +				if (ret)
> +					return ret;
> +			} else {
> +				decoder->pos += decoder->len;
> +				decoder->len = 0;
> +			}
> +			continue;
> +		}
> +
> +		decoder->pkt_step = next - decoder->buf;
> +		return intel_pt_get_next_packet(decoder);
> +	}
> +}
> +
> +static int intel_pt_sync(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	decoder->pge = false;
> +	decoder->continuous_period = false;
> +	decoder->last_ip = 0;
> +	decoder->ip = 0;
> +	intel_pt_clear_stack(&decoder->stack);
> +
> +	err = intel_pt_scan_for_psb(decoder);
> +	if (err)
> +		return err;
> +
> +	decoder->pkt_state = INTEL_PT_STATE_NO_IP;
> +
> +	err = intel_pt_walk_psb(decoder);
> +	if (err)
> +		return err;
> +
> +	if (decoder->ip) {
> +		decoder->state.type = 0; /* Do not have a sample */
> +		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +	} else {
> +		return intel_pt_sync_ip(decoder);
> +	}
> +
> +	return 0;
> +}
> +
> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
> +{
> +	int err;
> +
> +	do {
> +		decoder->state.type = INTEL_PT_BRANCH;
> +		decoder->state.flags = 0;
> +
> +		switch (decoder->pkt_state) {
> +		case INTEL_PT_STATE_NO_PSB:
> +			err = intel_pt_sync(decoder);
> +			break;
> +		case INTEL_PT_STATE_NO_IP:
> +			decoder->last_ip = 0;
> +			/* Fall through */
> +		case INTEL_PT_STATE_ERR_RESYNC:
> +			err = intel_pt_sync_ip(decoder);
> +			break;
> +		case INTEL_PT_STATE_IN_SYNC:
> +			err = intel_pt_walk_trace(decoder);
> +			break;
> +		case INTEL_PT_STATE_TNT:
> +			err = intel_pt_walk_tnt(decoder);
> +			if (err == -EAGAIN)
> +				err = intel_pt_walk_trace(decoder);
> +			break;
> +		case INTEL_PT_STATE_TIP:
> +		case INTEL_PT_STATE_TIP_PGD:
> +			err = intel_pt_walk_tip(decoder);
> +			break;
> +		case INTEL_PT_STATE_FUP:
> +			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +			err = intel_pt_walk_fup(decoder);
> +			if (err == -EAGAIN)
> +				err = intel_pt_walk_fup_tip(decoder);
> +			else if (!err)
> +				decoder->pkt_state = INTEL_PT_STATE_FUP;
> +			break;
> +		case INTEL_PT_STATE_FUP_NO_TIP:
> +			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
> +			err = intel_pt_walk_fup(decoder);
> +			if (err == -EAGAIN)
> +				err = intel_pt_walk_trace(decoder);
> +			break;
> +		default:
> +			err = intel_pt_bug(decoder);
> +			break;
> +		}
> +	} while (err == -ENOLINK);
> +
> +	decoder->state.err = err;
> +	decoder->state.timestamp = decoder->timestamp;
> +	decoder->state.est_timestamp = decoder->timestamp +
> +				       (decoder->timestamp_insn_cnt << 1);
> +	decoder->state.cr3 = decoder->cr3;
> +
> +	if (err)
> +		decoder->state.from_ip = decoder->ip;
> +
> +	return &decoder->state;
> +}
> +
> +static bool intel_pt_at_psb(unsigned char *buf, size_t len)
> +{
> +	if (len < INTEL_PT_PSB_LEN)
> +		return false;
> +	return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR,
> +		      INTEL_PT_PSB_LEN);
> +}
> +
> +/**
> + * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
> + * @buf: pointer to buffer pointer
> + * @len: size of buffer
> + *
> + * Updates the buffer pointer to point to the start of the next PSB packet if
> + * there is one, otherwise the buffer pointer is unchanged.  If @buf is updated,
> + * @len is adjusted accordingly.
> + *
> + * Return: %true if a PSB packet is found, %false otherwise.
> + */
> +static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
> +{
> +	unsigned char *next;
> +
> +	next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> +	if (next) {
> +		*len -= next - *buf;
> +		*buf = next;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +/**
> + * intel_pt_step_psb - move buffer pointer to the start of the following PSB
> + *                     packet.
> + * @buf: pointer to buffer pointer
> + * @len: size of buffer
> + *
> + * Updates the buffer pointer to point to the start of the following PSB packet
> + * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
> + * pointer is unchanged.  If @buf is updated, @len is adjusted accordingly.
> + *
> + * Return: %true if a PSB packet is found, %false otherwise.
> + */
> +static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
> +{
> +	unsigned char *next;
> +
> +	if (!*len)
> +		return false;
> +
> +	next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
> +	if (next) {
> +		*len -= next - *buf;
> +		*buf = next;
> +		return true;
> +	}
> +	return false;
> +}
> +
> +/**
> + * intel_pt_last_psb - find the last PSB packet in a buffer.
> + * @buf: buffer
> + * @len: size of buffer
> + *
> + * This function finds the last PSB in a buffer.
> + *
> + * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
> + */
> +static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
> +{
> +	const char *n = INTEL_PT_PSB_STR;
> +	unsigned char *p;
> +	size_t k;
> +
> +	if (len < INTEL_PT_PSB_LEN)
> +		return NULL;
> +
> +	k = len - INTEL_PT_PSB_LEN + 1;
> +	while (1) {
> +		p = memrchr(buf, n[0], k);
> +		if (!p)
> +			return NULL;
> +		if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
> +			return p;
> +		k = p - buf;
> +		if (!k)
> +			return NULL;
> +	}
> +}
> +
> +/**
> + * intel_pt_next_tsc - find and return next TSC.
> + * @buf: buffer
> + * @len: size of buffer
> + * @tsc: TSC value returned
> + *
> + * Find a TSC packet in @buf and return the TSC value.  This function assumes
> + * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
> + * PSBEND packet is found.
> + *
> + * Return: %true if TSC is found, false otherwise.
> + */
> +static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc)
> +{
> +	struct intel_pt_pkt packet;
> +	int ret;
> +
> +	while (len) {
> +		ret = intel_pt_get_packet(buf, len, &packet);
> +		if (ret <= 0)
> +			return false;
> +		if (packet.type == INTEL_PT_TSC) {
> +			*tsc = packet.payload;
> +			return true;
> +		}
> +		if (packet.type == INTEL_PT_PSBEND)
> +			return false;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return false;
> +}
> +
> +/**
> + * intel_pt_tsc_cmp - compare 7-byte TSCs.
> + * @tsc1: first TSC to compare
> + * @tsc2: second TSC to compare
> + *
> + * This function compares 7-byte TSC values allowing for the possibility that
> + * TSC wrapped around.  Generally it is not possible to know if TSC has wrapped
> + * around so for that purpose this function assumes the absolute difference is
> + * less than half the maximum difference.
> + *
> + * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
> + * after @tsc2.
> + */
> +static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
> +{
> +	const uint64_t halfway = (1ULL << 55);
> +
> +	if (tsc1 == tsc2)
> +		return 0;
> +
> +	if (tsc1 < tsc2) {
> +		if (tsc2 - tsc1 < halfway)
> +			return -1;
> +		else
> +			return 1;
> +	} else {
> +		if (tsc1 - tsc2 < halfway)
> +			return 1;
> +		else
> +			return -1;
> +	}
> +}
> +
> +/**
> + * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
> + *                             using TSC.
> + * @buf_a: first buffer
> + * @len_a: size of first buffer
> + * @buf_b: second buffer
> + * @len_b: size of second buffer
> + *
> + * If the trace contains TSC we can look at the last TSC of @buf_a and the
> + * first TSC of @buf_b in order to determine if the buffers overlap, and then
> + * walk forward in @buf_b until a later TSC is found.  A precondition is that
> + * @buf_a and @buf_b are positioned at a PSB.
> + *
> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
> + * @buf_b + @len_b if there is no non-overlapped data.
> + */
> +static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
> +						size_t len_a,
> +						unsigned char *buf_b,
> +						size_t len_b)
> +{
> +	uint64_t tsc_a, tsc_b;
> +	unsigned char *p;
> +	size_t len;
> +
> +	p = intel_pt_last_psb(buf_a, len_a);
> +	if (!p)
> +		return buf_b; /* No PSB in buf_a => no overlap */
> +
> +	len = len_a - (p - buf_a);
> +	if (!intel_pt_next_tsc(p, len, &tsc_a)) {
> +		/* The last PSB+ in buf_a is incomplete, so go back one more */
> +		len_a -= len;
> +		p = intel_pt_last_psb(buf_a, len_a);
> +		if (!p)
> +			return buf_b; /* No full PSB+ => assume no overlap */
> +		len = len_a - (p - buf_a);
> +		if (!intel_pt_next_tsc(p, len, &tsc_a))
> +			return buf_b; /* No TSC in buf_a => assume no overlap */
> +	}
> +
> +	while (1) {
> +		/* Ignore PSB+ with no TSC */
> +		if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) &&
> +		    intel_pt_tsc_cmp(tsc_a, tsc_b) < 0)
> +			return buf_b; /* tsc_a < tsc_b => no overlap */
> +
> +		if (!intel_pt_step_psb(&buf_b, &len_b))
> +			return buf_b + len_b; /* No PSB in buf_b => no data */
> +	}
> +}
> +
> +/**
> + * intel_pt_find_overlap - determine start of non-overlapped trace data.
> + * @buf_a: first buffer
> + * @len_a: size of first buffer
> + * @buf_b: second buffer
> + * @len_b: size of second buffer
> + * @have_tsc: can use TSC packets to detect overlap
> + *
> + * When trace samples or snapshots are recorded there is the possibility that
> + * the data overlaps.  Note that, for the purposes of decoding, data is only
> + * useful if it begins with a PSB packet.
> + *
> + * Return: A pointer into @buf_b from where non-overlapped data starts, or
> + * @buf_b + @len_b if there is no non-overlapped data.
> + */
> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
> +				     unsigned char *buf_b, size_t len_b,
> +				     bool have_tsc)
> +{
> +	unsigned char *found;
> +
> +	/* Buffer 'b' must start at PSB so throw away everything before that */
> +	if (!intel_pt_next_psb(&buf_b, &len_b))
> +		return buf_b + len_b; /* No PSB */
> +
> +	if (!intel_pt_next_psb(&buf_a, &len_a))
> +		return buf_b; /* No overlap */
> +
> +	if (have_tsc) {
> +		found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b);
> +		if (found)
> +			return found;
> +	}
> +
> +	/*
> +	 * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
> +	 * we can ignore the first part of buffer 'a'.
> +	 */
> +	while (len_b < len_a) {
> +		if (!intel_pt_step_psb(&buf_a, &len_a))
> +			return buf_b; /* No overlap */
> +	}
> +
> +	/* Now len_b >= len_a */
> +	if (len_b > len_a) {
> +		/* The leftover buffer 'b' must start at a PSB */
> +		while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
> +			if (!intel_pt_step_psb(&buf_a, &len_a))
> +				return buf_b; /* No overlap */
> +		}
> +	}
> +
> +	while (1) {
> +		/* Potential overlap so check the bytes */
> +		found = memmem(buf_a, len_a, buf_b, len_a);
> +		if (found)
> +			return buf_b + len_a;
> +
> +		/* Try again at next PSB in buffer 'a' */
> +		if (!intel_pt_step_psb(&buf_a, &len_a))
> +			return buf_b; /* No overlap */
> +
> +		/* The leftover buffer 'b' must start at a PSB */
> +		while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) {
> +			if (!intel_pt_step_psb(&buf_a, &len_a))
> +				return buf_b; /* No overlap */
> +		}
> +	}
> +}
> diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> new file mode 100644
> index 0000000..e55615a
> --- /dev/null
> +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
> @@ -0,0 +1,89 @@
> +/*
> + * intel_pt_decoder.h: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef INCLUDE__INTEL_PT_DECODER_H__
> +#define INCLUDE__INTEL_PT_DECODER_H__
> +
> +#include <stdint.h>
> +#include <stddef.h>
> +#include <stdbool.h>
> +
> +#include "intel-pt-insn-decoder.h"
> +
> +#define INTEL_PT_IN_TX		(1 << 0)
> +#define INTEL_PT_ABORT_TX	(1 << 1)
> +#define INTEL_PT_ASYNC		(1 << 2)
> +
> +enum intel_pt_sample_type {
> +	INTEL_PT_BRANCH		= 1 << 0,
> +	INTEL_PT_INSTRUCTION	= 1 << 1,
> +	INTEL_PT_TRANSACTION	= 1 << 2,
> +};
> +
> +enum intel_pt_period_type {
> +	INTEL_PT_PERIOD_NONE,
> +	INTEL_PT_PERIOD_INSTRUCTIONS,
> +	INTEL_PT_PERIOD_TICKS,
> +};
> +
> +struct intel_pt_state {
> +	enum intel_pt_sample_type type;
> +	int err;
> +	uint64_t from_ip;
> +	uint64_t to_ip;
> +	uint64_t cr3;
> +	uint64_t timestamp;
> +	uint64_t est_timestamp;
> +	uint64_t trace_nr;
> +	uint32_t flags;
> +	enum intel_pt_insn_op insn_op;
> +	int insn_len;
> +};
> +
> +struct intel_pt_insn;
> +
> +struct intel_pt_buffer {
> +	const unsigned char *buf;
> +	size_t len;
> +	bool consecutive;
> +	uint64_t ref_timestamp;
> +	uint64_t trace_nr;
> +};
> +
> +struct intel_pt_params {
> +	int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
> +	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
> +			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
> +			 uint64_t max_insn_cnt, void *data);
> +	void *data;
> +	bool return_compression;
> +	uint64_t period;
> +	enum intel_pt_period_type period_type;
> +};
> +
> +struct intel_pt_decoder;
> +
> +struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
> +void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
> +
> +const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
> +
> +unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
> +				     unsigned char *buf_b, size_t len_b,
> +				     bool have_tsc);
> +
> +const char *intel_pt_error_message(int code);
> +
> +#endif
> -- 
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ