lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Mon, 16 Jan 2012 17:32:53 +0800
From:	Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
To:	Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
CC:	Avi Kivity <avi@...hat.com>, Marcelo Tosatti <mtosatti@...hat.com>,
	LKML <linux-kernel@...r.kernel.org>, KVM <kvm@...r.kernel.org>
Subject: [PATCH 3/3] KVM: perf: kvm events analysis tool

Add 'perf kvm-events' support to analyze kvm vmexit/mmio/ioport smartly

Usage:
	perf kvm-events record
	perf kvm-events report

Signed-off-by: Xiao Guangrong <xiaoguangrong@...ux.vnet.ibm.com>
---
 tools/perf/Documentation/perf-kvm-events.txt |   54 ++
 tools/perf/Makefile                          |    1 +
 tools/perf/builtin-kvm-events.c              |  860 ++++++++++++++++++++++++++
 tools/perf/builtin.h                         |    1 +
 tools/perf/perf.c                            |    1 +
 5 files changed, 917 insertions(+), 0 deletions(-)
 create mode 100644 tools/perf/Documentation/perf-kvm-events.txt
 create mode 100644 tools/perf/builtin-kvm-events.c

diff --git a/tools/perf/Documentation/perf-kvm-events.txt b/tools/perf/Documentation/perf-kvm-events.txt
new file mode 100644
index 0000000..73bcb82
--- /dev/null
+++ b/tools/perf/Documentation/perf-kvm-events.txt
@@ -0,0 +1,54 @@
+perf-kvm-events(1)
+============
+
+NAME
+----
+perf-kvm-events - Analyze kvm events
+
+SYNOPSIS
+--------
+[verse]
+'perf kvm-events' {record|report}
+
+DESCRIPTION
+-----------
+You can analyze some crucial events and statistics with this
+'perf kvm-events' command.
+
+  'perf kvm-events record <command>' records kvm events
+  between start and end <command>. And this command
+  produces the file "perf.data" which contains tracing
+  results of kvm events.
+
+  'perf kvm-events report' reports statistical data.
+
+COMMON OPTIONS
+--------------
+
+-i::
+--input=<file>::
+        Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+        Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+        Dump raw trace in ASCII.
+
+REPORT OPTIONS
+--------------
+--vcpu=<value>::
+	analyze events which occures on this vcpu
+
+--events=<value>::
+	events to be analyzed. Possible values: vmexit, mmio, ioport.
+-k::
+--key=<value>::
+        Sorting key. Possible values: sample(default, sort by samples number),
+time(sort by time%).
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index ac86d67..ee43451 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,6 +382,7 @@ BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
 BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
 BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kvm-events.o
 BUILTIN_OBJS += $(OUTPUT)builtin-test.o
 BUILTIN_OBJS += $(OUTPUT)builtin-inject.o

diff --git a/tools/perf/builtin-kvm-events.c b/tools/perf/builtin-kvm-events.c
new file mode 100644
index 0000000..55dc680
--- /dev/null
+++ b/tools/perf/builtin-kvm-events.c
@@ -0,0 +1,860 @@
+#include "builtin.h"
+#include "perf.h"
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+#include "util/debug.h"
+#include "util/session.h"
+#include "util/tool.h"
+
+#include <linux/hash.h>
+
+/*
+ * Todo: improve the print format of kvm_exit to let it is easier
+ * parsed by perf, then we can get the exit reason from print
+ * format directly.
+ */
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVD                13
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_INVALID_STATE	33
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY	 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
+
+#define SVM_EXIT_READ_CR0	0x000
+#define SVM_EXIT_READ_CR3	0x003
+#define SVM_EXIT_READ_CR4	0x004
+#define SVM_EXIT_READ_CR8	0x008
+#define SVM_EXIT_WRITE_CR0	0x010
+#define SVM_EXIT_WRITE_CR3	0x013
+#define SVM_EXIT_WRITE_CR4	0x014
+#define SVM_EXIT_WRITE_CR8	0x018
+#define SVM_EXIT_READ_DR0	0x020
+#define SVM_EXIT_READ_DR1	0x021
+#define SVM_EXIT_READ_DR2	0x022
+#define SVM_EXIT_READ_DR3	0x023
+#define SVM_EXIT_READ_DR4	0x024
+#define SVM_EXIT_READ_DR5	0x025
+#define SVM_EXIT_READ_DR6	0x026
+#define SVM_EXIT_READ_DR7	0x027
+#define SVM_EXIT_WRITE_DR0	0x030
+#define SVM_EXIT_WRITE_DR1	0x031
+#define SVM_EXIT_WRITE_DR2	0x032
+#define SVM_EXIT_WRITE_DR3	0x033
+#define SVM_EXIT_WRITE_DR4	0x034
+#define SVM_EXIT_WRITE_DR5	0x035
+#define SVM_EXIT_WRITE_DR6	0x036
+#define SVM_EXIT_WRITE_DR7	0x037
+#define SVM_EXIT_EXCP_BASE	0x040
+#define SVM_EXIT_INTR		0x060
+#define SVM_EXIT_NMI		0x061
+#define SVM_EXIT_SMI		0x062
+#define SVM_EXIT_INIT		0x063
+#define SVM_EXIT_VINTR		0x064
+#define SVM_EXIT_CR0_SEL_WRITE	0x065
+#define SVM_EXIT_IDTR_READ	0x066
+#define SVM_EXIT_GDTR_READ	0x067
+#define SVM_EXIT_LDTR_READ	0x068
+#define SVM_EXIT_TR_READ	0x069
+#define SVM_EXIT_IDTR_WRITE	0x06a
+#define SVM_EXIT_GDTR_WRITE	0x06b
+#define SVM_EXIT_LDTR_WRITE	0x06c
+#define SVM_EXIT_TR_WRITE	0x06d
+#define SVM_EXIT_RDTSC	0x06e
+#define SVM_EXIT_RDPMC	0x06f
+#define SVM_EXIT_PUSHF	0x070
+#define SVM_EXIT_POPF	0x071
+#define SVM_EXIT_CPUID	0x072
+#define SVM_EXIT_RSM	0x073
+#define SVM_EXIT_IRET	0x074
+#define SVM_EXIT_SWINT	0x075
+#define SVM_EXIT_INVD	0x076
+#define SVM_EXIT_PAUSE	0x077
+#define SVM_EXIT_HLT	0x078
+#define SVM_EXIT_INVLPG	0x079
+#define SVM_EXIT_INVLPGA	0x07a
+#define SVM_EXIT_IOIO	0x07b
+#define SVM_EXIT_MSR	0x07c
+#define SVM_EXIT_TASK_SWITCH	0x07d
+#define SVM_EXIT_FERR_FREEZE	0x07e
+#define SVM_EXIT_SHUTDOWN	0x07f
+#define SVM_EXIT_VMRUN	0x080
+#define SVM_EXIT_VMMCALL	0x081
+#define SVM_EXIT_VMLOAD	0x082
+#define SVM_EXIT_VMSAVE	0x083
+#define SVM_EXIT_STGI	0x084
+#define SVM_EXIT_CLGI	0x085
+#define SVM_EXIT_SKINIT	0x086
+#define SVM_EXIT_RDTSCP	0x087
+#define SVM_EXIT_ICEBP	0x088
+#define SVM_EXIT_WBINVD	0x089
+#define SVM_EXIT_MONITOR	0x08a
+#define SVM_EXIT_MWAIT	0x08b
+#define SVM_EXIT_MWAIT_COND	0x08c
+#define SVM_EXIT_XSETBV	0x08d
+#define SVM_EXIT_NPF	0x400
+
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define MC_VECTOR 18
+
+
+#define VMX_EXIT_REASONS \
+	{ EXIT_REASON_EXCEPTION_NMI,		"EXCEPTION_NMI" }, \
+	{ EXIT_REASON_EXTERNAL_INTERRUPT,	"EXTERNAL_INTERRUPT" }, \
+	{ EXIT_REASON_TRIPLE_FAULT,		"TRIPLE_FAULT" }, \
+	{ EXIT_REASON_PENDING_INTERRUPT,	"PENDING_INTERRUPT" }, \
+	{ EXIT_REASON_NMI_WINDOW,		"NMI_WINDOW" }, \
+	{ EXIT_REASON_TASK_SWITCH,		"TASK_SWITCH" }, \
+	{ EXIT_REASON_CPUID,			"CPUID" }, \
+	{ EXIT_REASON_HLT,			"HLT" }, \
+	{ EXIT_REASON_INVLPG,			"INVLPG" }, \
+	{ EXIT_REASON_RDPMC,			"RDPMC" }, \
+	{ EXIT_REASON_RDTSC,			"RDTSC" }, \
+	{ EXIT_REASON_VMCALL,			"VMCALL" }, \
+	{ EXIT_REASON_VMCLEAR,			"VMCLEAR" }, \
+	{ EXIT_REASON_VMLAUNCH,			"VMLAUNCH" }, \
+	{ EXIT_REASON_VMPTRLD,			"VMPTRLD" }, \
+	{ EXIT_REASON_VMPTRST,			"VMPTRST" }, \
+	{ EXIT_REASON_VMREAD,			"VMREAD" }, \
+	{ EXIT_REASON_VMRESUME,			"VMRESUME" }, \
+	{ EXIT_REASON_VMWRITE,			"VMWRITE" }, \
+	{ EXIT_REASON_VMOFF,			"VMOFF" }, \
+	{ EXIT_REASON_VMON,			"VMON" }, \
+	{ EXIT_REASON_CR_ACCESS,		"CR_ACCESS" }, \
+	{ EXIT_REASON_DR_ACCESS,		"DR_ACCESS" }, \
+	{ EXIT_REASON_IO_INSTRUCTION,		"IO_INSTRUCTION" }, \
+	{ EXIT_REASON_MSR_READ,			"MSR_READ" }, \
+	{ EXIT_REASON_MSR_WRITE,		"MSR_WRITE" }, \
+	{ EXIT_REASON_MWAIT_INSTRUCTION,	"MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_INSTRUCTION,	"MONITOR_INSTRUCTION" }, \
+	{ EXIT_REASON_PAUSE_INSTRUCTION,	"PAUSE_INSTRUCTION" }, \
+	{ EXIT_REASON_MCE_DURING_VMENTRY,	"MCE_DURING_VMENTRY" }, \
+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,	"TPR_BELOW_THRESHOLD" },\
+	{ EXIT_REASON_APIC_ACCESS,		"APIC_ACCESS" }, \
+	{ EXIT_REASON_EPT_VIOLATION,		"EPT_VIOLATION" }, \
+	{ EXIT_REASON_EPT_MISCONFIG,		"EPT_MISCONFIG" }, \
+	{ EXIT_REASON_WBINVD,			"WBINVD" }
+
+#define SVM_EXIT_REASONS \
+	{ SVM_EXIT_READ_CR0,			"read_cr0" }, \
+	{ SVM_EXIT_READ_CR3,			"read_cr3" }, \
+	{ SVM_EXIT_READ_CR4,			"read_cr4" }, \
+	{ SVM_EXIT_READ_CR8,			"read_cr8" }, \
+	{ SVM_EXIT_WRITE_CR0,			"write_cr0" }, \
+	{ SVM_EXIT_WRITE_CR3,			"write_cr3" }, \
+	{ SVM_EXIT_WRITE_CR4,			"write_cr4" }, \
+	{ SVM_EXIT_WRITE_CR8,			"write_cr8" }, \
+	{ SVM_EXIT_READ_DR0,			"read_dr0" }, \
+	{ SVM_EXIT_READ_DR1,			"read_dr1" }, \
+	{ SVM_EXIT_READ_DR2,			"read_dr2" }, \
+	{ SVM_EXIT_READ_DR3,			"read_dr3" }, \
+	{ SVM_EXIT_WRITE_DR0,			"write_dr0" }, \
+	{ SVM_EXIT_WRITE_DR1,			"write_dr1" }, \
+	{ SVM_EXIT_WRITE_DR2,			"write_dr2" }, \
+	{ SVM_EXIT_WRITE_DR3,			"write_dr3" }, \
+	{ SVM_EXIT_WRITE_DR5,			"write_dr5" }, \
+	{ SVM_EXIT_WRITE_DR7,			"write_dr7" }, \
+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" }, \
+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" }, \
+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" }, \
+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" }, \
+	{ SVM_EXIT_INTR,			"interrupt" }, \
+	{ SVM_EXIT_NMI,				"nmi" }, \
+	{ SVM_EXIT_SMI,				"smi" }, \
+	{ SVM_EXIT_INIT,			"init" }, \
+	{ SVM_EXIT_VINTR,			"vintr" }, \
+	{ SVM_EXIT_CPUID,			"cpuid" }, \
+	{ SVM_EXIT_INVD,			"invd" }, \
+	{ SVM_EXIT_HLT,				"hlt" }, \
+	{ SVM_EXIT_INVLPG,			"invlpg" }, \
+	{ SVM_EXIT_INVLPGA,			"invlpga" }, \
+	{ SVM_EXIT_IOIO,			"io" }, \
+	{ SVM_EXIT_MSR,				"msr" }, \
+	{ SVM_EXIT_TASK_SWITCH,			"task_switch" }, \
+	{ SVM_EXIT_SHUTDOWN,			"shutdown" }, \
+	{ SVM_EXIT_VMRUN,			"vmrun" }, \
+	{ SVM_EXIT_VMMCALL,			"hypercall" }, \
+	{ SVM_EXIT_VMLOAD,			"vmload" }, \
+	{ SVM_EXIT_VMSAVE,			"vmsave" }, \
+	{ SVM_EXIT_STGI,			"stgi" }, \
+	{ SVM_EXIT_CLGI,			"clgi" }, \
+	{ SVM_EXIT_SKINIT,			"skinit" }, \
+	{ SVM_EXIT_WBINVD,			"wbinvd" }, \
+	{ SVM_EXIT_MONITOR,			"monitor" }, \
+	{ SVM_EXIT_MWAIT,			"mwait" }, \
+	{ SVM_EXIT_XSETBV,			"xsetbv" }, \
+	{ SVM_EXIT_NPF,				"npf" }
+
+struct exit_reasons_table {
+	unsigned long exit_code;
+	const char *reason;
+};
+
+struct exit_reasons_table vmx_exit_reasons[] = {
+	VMX_EXIT_REASONS
+};
+
+struct exit_reasons_table svm_exit_reasons[] = {
+	SVM_EXIT_REASONS
+};
+
+static const char *get_exit_reason(long isa, u64 exit_code)
+{
+	int table_size = ARRAY_SIZE(svm_exit_reasons);
+	struct exit_reasons_table *table = svm_exit_reasons;
+
+
+	if (isa == 1) {
+		table = vmx_exit_reasons;
+		table_size = ARRAY_SIZE(vmx_exit_reasons);
+	}
+
+	while (table_size--) {
+		if (table->exit_code == exit_code)
+			return table->reason;
+		table++;
+	}
+
+	die("unkonw kvm exit code:%ld on %s\n", exit_code, isa == 1 ?
+						"VMX" : "SVM");
+}
+
+struct event_key {
+	u64 key;
+	int info;
+};
+
+struct kvm_events_ops {
+	bool (*is_begain_event)(struct event *event, void *data);
+	bool (*is_end_event)(struct event *event);
+	struct event_key (*get_key)(struct event *event, void *data);
+	void (*decode_key)(struct event_key *key, char decode[20]);
+	const char *name;
+};
+
+static bool exit_event_begain(struct event *event, void *data __unused)
+{
+	return !strcmp(event->name, "kvm_exit");
+}
+
+static bool exit_event_end(struct event *event)
+{
+	return !strcmp(event->name, "kvm_entry");
+}
+
+static struct event_key exit_event_get_key(struct event *event, void *data)
+{
+	struct event_key key;
+
+	key.key = raw_field_value(event, "exit_reason", data);
+	key.info = raw_field_value(event, "isa", data);
+	return key;
+}
+
+static void exit_event_decode_key(struct event_key *key, char decode[20])
+{
+	const char *exit_reason = get_exit_reason(key->info, key->key);
+
+	memset(decode, 0, 20);
+	strncpy(decode, exit_reason, 20);
+}
+
+static struct kvm_events_ops exit_events = {
+	.is_begain_event = exit_event_begain,
+	.is_end_event = exit_event_end,
+	.get_key = exit_event_get_key,
+	.decode_key = exit_event_decode_key,
+	.name = "VM-EXIT"
+};
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+static bool mmio_event_begain(struct event *event, void *data)
+{
+	if (!strcmp(event->name, "kvm_mmio")) {
+		long type = raw_field_value(event, "type", data);
+
+		if (type != KVM_TRACE_MMIO_READ_UNSATISFIED)
+			return true;
+	};
+
+	return false;
+}
+
+static bool mmio_event_end(struct event *event)
+{
+	return !strcmp(event->name, "kvm_mmio_done");
+}
+
+static struct event_key mmio_event_get_key(struct event *event, void *data)
+{
+	struct event_key key;
+
+	key.key = raw_field_value(event, "gpa", data);
+	key.info = raw_field_value(event, "type", data);
+
+	return key;
+}
+
+static void mmio_event_decode_key(struct event_key *key, char decode[20])
+{
+	memset(decode, 0, 20);
+	sprintf(decode, "%#lx:%s", key->key,
+		key->info == KVM_TRACE_MMIO_READ ? "R" : "W");
+}
+
+static struct kvm_events_ops mmio_events = {
+	.is_begain_event = mmio_event_begain,
+	.is_end_event = mmio_event_end,
+	.get_key = mmio_event_get_key,
+	.decode_key = mmio_event_decode_key,
+	.name = "MMIO Access"
+};
+
+static bool ioport_event_begain(struct event *event, void *data __unused)
+{
+	return !strcmp(event->name, "kvm_pio");
+}
+
+static struct event_key ioport_event_get_key(struct event *event, void *data)
+{
+	struct event_key key;
+
+	key.key = raw_field_value(event, "port", data);
+	key.info = raw_field_value(event, "rw", data);
+
+	return key;
+}
+
+static void ioport_event_decode_key(struct event_key *key, char decode[20])
+{
+	memset(decode, 0, 20);
+	sprintf(decode, "%#lx:%s", key->key, key->info ? "POUT" : "PIN");
+}
+
+static struct kvm_events_ops ioport_events = {
+	.is_begain_event = ioport_event_begain,
+	.is_end_event = mmio_event_end,
+	.get_key = ioport_event_get_key,
+	.decode_key = ioport_event_decode_key,
+	.name = "IO Port Access"
+};
+
+static const char *report_event = "vmexit";
+struct kvm_events_ops *events_ops;
+
+static void register_kvm_events_ops(void)
+{
+	if (!strcmp(report_event, "vmexit"))
+		events_ops = &exit_events;
+	else if (!strcmp(report_event, "mmio"))
+		events_ops = &mmio_events;
+	else if (!strcmp(report_event, "ioport"))
+		events_ops = &ioport_events;
+	else
+		die("Unknown report event:%s\n", report_event);
+}
+
+#define KVM_MAX_VCPUS 254
+
+struct kvm_event {
+	struct list_head hash_entry;
+	struct rb_node rb;
+
+	u64 count;
+	u64 time;
+	struct {
+		u64 count;
+		u64 time;
+	} vcpu[KVM_MAX_VCPUS];
+
+	struct event_key key;
+};
+
+struct {
+	u64 start_time;
+	struct kvm_event *last_event;
+} vcpu_event_record[KVM_MAX_VCPUS] ;
+
+#define EVENTS_BITS			12
+#define EVENTS_CACHE_SIZE	(1UL << EVENTS_BITS)
+
+static u64 total_time;
+static u64 total_count;
+static struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
+static void init_kvm_event_record(void)
+{
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; i++)
+		vcpu_event_record[i].last_event = NULL;
+
+	for (i = 0; i < (int)EVENTS_CACHE_SIZE; i++)
+		INIT_LIST_HEAD(&kvm_events_cache[i]);
+}
+
+static int kvm_events_hash_fn(u64 key)
+{
+	return hash_long((unsigned long)key, EVENTS_BITS);
+}
+
+static void kvm_event_init(struct kvm_event *event, struct event_key *key)
+{
+	event->key = *key;
+}
+
+static struct kvm_event *find_create_kvm_event(struct event_key *key)
+{
+	struct kvm_event *event;
+	struct list_head *head;
+
+	head = &kvm_events_cache[kvm_events_hash_fn(key->key)];
+	list_for_each_entry(event, head, hash_entry)
+		if (event->key.key == key->key && event->key.info == key->info)
+			return event;
+
+	event = zalloc(sizeof(*event));
+	if (!event)
+		die("Not enough memory\n");
+
+	kvm_event_init(event, key);
+	list_add(&event->hash_entry, head);
+	return event;
+}
+
+static void handle_begain_event(struct event_key *key, int vcpu_id,
+				u64 timestamp)
+{
+	struct kvm_event *event = find_create_kvm_event(key);
+
+	vcpu_event_record[vcpu_id].last_event = event;
+	vcpu_event_record[vcpu_id].start_time = timestamp;
+}
+
+static void update_kvm_event(struct kvm_event *event, int vcpu_id,
+			     u64 time_diff)
+{
+	event->count++;
+	event->time += time_diff;
+	event->vcpu[vcpu_id].count++;
+	event->vcpu[vcpu_id].time += time_diff;
+}
+
+static void handle_end_event(int vcpu_id, u64 timestamp)
+{
+	u64 time_begain, time_diff;
+	struct kvm_event *event;
+
+	event = vcpu_event_record[vcpu_id].last_event;
+	if (!event)
+		return;
+
+	time_begain = vcpu_event_record[vcpu_id].start_time;
+
+	vcpu_event_record[vcpu_id].last_event = NULL;
+	vcpu_event_record[vcpu_id].start_time = 0;
+
+	BUG_ON(!time_begain || timestamp < time_begain);
+
+	time_diff = timestamp - time_begain;
+	update_kvm_event(event, vcpu_id, time_diff);
+}
+
+static int get_vcpu_id(struct event *event, void *data)
+{
+	return raw_field_value(event, "vcpu_id", data);
+}
+
+static void handle_kvm_event(struct event *event, void *data, u64 timestamp)
+{
+	int vcpu_id;
+
+	if (events_ops->is_begain_event(event, data)) {
+		struct event_key key = events_ops->get_key(event, data);
+
+		vcpu_id = get_vcpu_id(event, data);
+		return handle_begain_event(&key, vcpu_id, timestamp);
+	}
+
+	if (events_ops->is_end_event(event)) {
+		vcpu_id = get_vcpu_id(event, data);
+		return handle_end_event(vcpu_id, timestamp);
+	}
+}
+
+typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
+struct kvm_event_key {
+	const char *name;
+	key_cmp_fun key;
+};
+
+static int trace_vcpu = -1;
+#define COMPARE_EVENT_KEY(member)					\
+static u64 get_event_ ##member(struct kvm_event *event, int vcpu)	\
+{									\
+	if (vcpu == -1)							\
+		return event->member;					\
+									\
+	return event->vcpu[vcpu].member;				\
+}									\
+									\
+static int compare_kvm_event_ ## member(struct kvm_event *one,		\
+					struct kvm_event *two, int vcpu)\
+{									\
+	return get_event_ ##member(one, vcpu) > get_event_ ##member(two, vcpu);\
+}
+
+COMPARE_EVENT_KEY(count);
+COMPARE_EVENT_KEY(time);
+
+#define DEF_SORT_NAME_KEY(name, compare_key)	\
+	{ #name, compare_kvm_event_ ## compare_key }
+
+static struct kvm_event_key keys[] = {
+	DEF_SORT_NAME_KEY(sample, count),
+	DEF_SORT_NAME_KEY(time, time),
+	{ NULL, NULL }
+};
+
+static const char *sort_key = "sample";
+static key_cmp_fun compare;
+
+static void select_key(void)
+{
+	int i;
+
+	for (i = 0; keys[i].name; i++) {
+		if (!strcmp(keys[i].name, sort_key)) {
+			compare = keys[i].key;
+			return;
+		}
+	}
+
+	die("Unknown compare key:%s\n", sort_key);
+}
+
+static struct rb_root result;
+static void insert_to_result(struct kvm_event *event, key_cmp_fun bigger,
+			     int vcpu)
+{
+	struct rb_node **rb = &result.rb_node;
+	struct rb_node *parent = NULL;
+	struct kvm_event *p;
+
+	while (*rb) {
+		p = container_of(*rb, struct kvm_event, rb);
+		parent = *rb;
+
+		if (bigger(event, p, vcpu))
+			rb = &(*rb)->rb_left;
+		else
+			rb = &(*rb)->rb_right;
+	}
+
+	rb_link_node(&event->rb, parent, rb);
+	rb_insert_color(&event->rb, &result);
+}
+
+static void update_total_count(struct kvm_event *event, int vcpu)
+{
+	total_count += get_event_count(event, vcpu);
+	total_time += get_event_time(event, vcpu);
+}
+
+static bool event_is_valid(struct kvm_event *event, int vcpu)
+{
+	return get_event_count(event, vcpu);
+}
+
+static void sort_result(int vcpu)
+{
+	unsigned int i;
+	struct kvm_event *event;
+
+	for (i = 0; i < EVENTS_CACHE_SIZE; i++)
+		list_for_each_entry(event, &kvm_events_cache[i], hash_entry)
+			if (event_is_valid(event, vcpu)) {
+				update_total_count(event, vcpu);
+				insert_to_result(event, compare, vcpu);
+			}
+}
+
+/* returns left most element of result, and erase it */
+static struct kvm_event *pop_from_result(void)
+{
+	struct rb_node *node = result.rb_node;
+
+	if (!node)
+		return NULL;
+
+	while (node->rb_left)
+		node = node->rb_left;
+
+	rb_erase(node, &result);
+	return container_of(node, struct kvm_event, rb);
+}
+
+static void print_vcpu_info(int vcpu)
+{
+	pr_info("Analyze events for ");
+
+	if (vcpu == -1)
+		pr_info("all VCPUs:\n\n");
+	else
+		pr_info("VCPU %d:\n\n", vcpu);
+}
+
+static void print_result(int vcpu)
+{
+	char decode[20];
+	struct kvm_event *event;
+
+	pr_info("\n\n");
+	print_vcpu_info(vcpu);
+	pr_info("%20s ", events_ops->name);
+	pr_info("%10s ", "Samples");
+	pr_info("%13s ", "Samples%");
+
+	pr_info("%13s ", "Time%");
+	pr_info("%15s ", "Avg time");
+	pr_info("\n\n");
+
+	while ((event = pop_from_result())) {
+		u64 ecount, etime;
+
+		ecount = get_event_count(event, vcpu);
+		etime = get_event_time(event, vcpu);
+
+		events_ops->decode_key(&event->key, decode);
+		pr_info("%20s ", decode);
+		pr_info("%10lu ", ecount);
+		pr_info("%12.2f%% ", (double)ecount / total_count * 100);
+		pr_info("%12.2f%%", (double)etime / total_time * 100);
+		pr_info("%13.2fus ", (double)etime / ecount/1e3);
+		pr_info("\n");
+	}
+
+	pr_info("\nTotal Samples:%ld, Total events handled time:%.2fus.\n\n",
+		total_count, total_time / 1e3);
+}
+
+static void process_raw_event(void *data, int __unused cpu, u64 timestamp,
+			      struct thread *thread __unused)
+{
+	struct event *event;
+	int type;
+
+	type = trace_parse_common_type(data);
+	event = trace_find_event(type);
+
+	return handle_kvm_event(event, data, timestamp);
+}
+
+static int process_sample_event(struct perf_tool *tool __used,
+				union perf_event *event,
+				struct perf_sample *sample,
+				struct perf_evsel *evsel __used,
+				struct machine *machine)
+{
+	struct thread *thread = machine__findnew_thread(machine, sample->tid);
+
+	if (thread == NULL) {
+		pr_debug("problem processing %d event, skipping it.\n",
+			event->header.type);
+		return -1;
+	}
+
+	process_raw_event(sample->raw_data, sample->cpu, sample->time, thread);
+
+	return 0;
+}
+
+static struct perf_tool eops = {
+	.sample			= process_sample_event,
+	.comm			= perf_event__process_comm,
+	.ordered_samples	= true,
+};
+
+static char const *input_name = "perf.data";
+
+static int read_events(void)
+{
+	struct perf_session *session;
+
+	session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
+	if (!session)
+		die("Initializing perf session failed\n");
+
+	if (!perf_session__has_traces(session, "kvm record"))
+		return -1;
+
+	return perf_session__process_events(session, &eops);
+}
+
+static void verify_vcpu(int vcpu)
+{
+	if (vcpu != -1 && (vcpu > KVM_MAX_VCPUS || vcpu < 0))
+		die("Unknow vcpu:%d.\n", vcpu);
+
+}
+
+static int kvm_events_report(int vcpu)
+{
+	verify_vcpu(vcpu);
+	select_key();
+	register_kvm_events_ops();
+	init_kvm_event_record();
+	setup_pager();
+	read_events();
+	sort_result(vcpu);
+	print_result(vcpu);
+	return 0;
+}
+
+static const char * const record_args[] = {
+	"record",
+	"-a",
+	"-R",
+	"-f",
+	"-m", "1024",
+	"-c", "1",
+	"-e", "kvm:kvm_entry",
+	"-e", "kvm:kvm_exit",
+	"-e", "kvm:kvm_mmio",
+	"-e", "kvm:kvm_mmio_done",
+	"-e", "kvm:kvm_pio"
+};
+
+static int kvm_events_record(int argc, const char **argv)
+{
+	unsigned int rec_argc, i, j;
+	const char **rec_argv;
+
+	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+	rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+	if (rec_argv == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < ARRAY_SIZE(record_args); i++)
+		rec_argv[i] = strdup(record_args[i]);
+
+	for (j = 1; j < (unsigned int)argc; j++, i++)
+		rec_argv[i] = argv[j];
+
+	BUG_ON(i != rec_argc);
+
+	return cmd_record(i, rec_argv, NULL);
+}
+
+static const char * const kvm_events_report_usage[] = {
+	"perf kvm events report [<options>]",
+	NULL
+};
+
+static const struct option kvm_events_report_options[] = {
+	OPT_STRING(0, "event", &report_event, "reprot event",
+		    "event for reporting: vmexit, mmio, ioport"),
+	OPT_INTEGER(0, "vcpu", &trace_vcpu,
+		    "vcpu id to report"),
+	OPT_STRING('k', "key", &sort_key, "sort-key",
+		    "key for sorting: sample(sort by samples number)"
+		    " time (sort by time%)"),
+	OPT_END()
+};
+
+static const char * const kvm_events_usage[] = {
+	"perf kvm events [<options>] {record|report}",
+	NULL
+};
+
+static const struct option kvm_events_options[] = {
+	OPT_STRING('i', "input", &input_name, "file", "input file name"),
+	OPT_INCR('v', "verbose", &verbose,
+		 "be more verbose (show symbol address, etc)"),
+	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+		 "dump raw trace in ASCII"),
+	OPT_END()
+};
+
+int cmd_kvm_events(int argc, const char **argv, const char *prefix __used)
+{
+	argc = parse_options(argc, argv, kvm_events_options, kvm_events_usage,
+			     PARSE_OPT_STOP_AT_NON_OPTION);
+	if (!argc)
+		usage_with_options(kvm_events_usage, kvm_events_options);
+
+	symbol__init();
+
+	if (!strncmp(argv[0], "rec", 3))
+		return kvm_events_record(argc, argv);
+
+	if (!strncmp(argv[0], "report", 6)) {
+		if (argc) {
+			argc = parse_options(argc, argv,
+					     kvm_events_report_options,
+					     kvm_events_report_usage, 0);
+			if (argc)
+				usage_with_options(kvm_events_report_usage,
+						   kvm_events_report_options);
+		}
+		return kvm_events_report(trace_vcpu);
+	}
+
+	usage_with_options(kvm_events_usage, kvm_events_options);
+	return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index b382bd5..fb19e3d 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -33,6 +33,7 @@ extern int cmd_probe(int argc, const char **argv, const char *prefix);
 extern int cmd_kmem(int argc, const char **argv, const char *prefix);
 extern int cmd_lock(int argc, const char **argv, const char *prefix);
 extern int cmd_kvm(int argc, const char **argv, const char *prefix);
+extern int cmd_kvm_events(int argc, const char **argv, const char *prefix);
 extern int cmd_test(int argc, const char **argv, const char *prefix);
 extern int cmd_inject(int argc, const char **argv, const char *prefix);

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 2b2e225..ab85ea5 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -317,6 +317,7 @@ static void handle_internal_command(int argc, const char **argv)
 		{ "kmem",	cmd_kmem,	0 },
 		{ "lock",	cmd_lock,	0 },
 		{ "kvm",	cmd_kvm,	0 },
+		{ "kvm-events", cmd_kvm_events, 0},
 		{ "test",	cmd_test,	0 },
 		{ "inject",	cmd_inject,	0 },
 	};
-- 
1.7.7.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ