lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1441691364-5612-4-git-send-email-indou.takao@jp.fujitsu.com>
Date:	Tue, 8 Sep 2015 14:49:23 +0900
From:	Takao Indoh <indou.takao@...fujitsu.com>
To:	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	"H. Peter Anvin" <hpa@...or.com>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Vivek Goyal <vgoyal@...hat.com>,
	Steven Rostedt <rostedt@...dmis.org>
CC:	<linux-kernel@...r.kernel.org>, <x86@...nel.org>
Subject: [PATCH v2 3/4] perf/x86/intel/pt: Add Intel PT logger

This patch provides Intel PT logging feature. When system boots with a
parameter "intel_pt_log", log buffers for Intel PT are allocated and
logging starts, then processor flow information is written in the log
buffer by hardware like flight recorder. This is very helpful to
investigate a cause of kernel panic.

The log buffer size is specified by the parameter
"intel_pt_log_buf_len=<size>". This buffer is used as circular buffer,
therefore old events are overwritten by new events.

Signed-off-by: Takao Indoh <indou.takao@...fujitsu.com>
---
 arch/x86/Kconfig                          |   16 +++
 arch/x86/include/asm/intel_pt_log.h       |   13 ++
 arch/x86/kernel/cpu/Makefile              |    2 +
 arch/x86/kernel/cpu/intel_pt_log.c        |  178 +++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_pt.c |    6 +
 5 files changed, 215 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/include/asm/intel_pt_log.h
 create mode 100644 arch/x86/kernel/cpu/intel_pt_log.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f37010f..2b99ba2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1722,6 +1722,22 @@ config X86_INTEL_MPX
 
 	  If unsure, say N.
 
+config X86_INTEL_PT_LOG
+	prompt "Intel PT logger"
+	def_bool n
+	depends on PERF_EVENTS && CPU_SUP_INTEL
+	---help---
+	  Intel PT is a hardware features that can capture information
+	  about program execution flow. Once Intel PT is enabled, the
+	  events which change program flow, like branch instructions,
+	  exceptions, interruptions, traps and so on are logged in
+	  the memory.
+
+	  This option enables starting Intel PT logging feature at boot
+	  time. When kernel panic occurs, Intel PT log buffer can be
+	  retrieved from crash dump file and enables to reconstruct the
+	  detailed flow that led to the panic.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/include/asm/intel_pt_log.h b/arch/x86/include/asm/intel_pt_log.h
new file mode 100644
index 0000000..cef63f7
--- /dev/null
+++ b/arch/x86/include/asm/intel_pt_log.h
@@ -0,0 +1,13 @@
+#ifndef __INTEL_PT_LOG_H__
+#define __INTEL_PT_LOG_H__
+
+#if defined(CONFIG_X86_INTEL_PT_LOG)
+
+#include <linux/perf_event.h>
+
+void pt_log_start(struct pmu *pmu);
+void save_intel_pt_registers(void);
+
+#endif
+
+#endif /* __INTEL_PT_LOG_H__ */
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 4eb065c..67c17f0 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -48,6 +48,8 @@ obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE)	+= perf_event_intel_uncore.o \
 					   perf_event_intel_uncore_nhmex.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_msr.o
 obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_msr.o
+
+obj-$(CONFIG_X86_INTEL_PT_LOG)		+= intel_pt_log.o
 endif
 
 
diff --git a/arch/x86/kernel/cpu/intel_pt_log.c b/arch/x86/kernel/cpu/intel_pt_log.c
new file mode 100644
index 0000000..eb345fd
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_pt_log.c
@@ -0,0 +1,178 @@
+/*
+ * Intel Processor Trace Logger
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/trace_events.h>
+#include <asm/intel_pt_log.h>
+
+#define SAMPLE_TYPE_BASE \
+	(PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_TIME|PERF_SAMPLE_IDENTIFIER)
+#define SAMPLE_TYPE_PT \
+	(SAMPLE_TYPE_BASE|PERF_SAMPLE_CPU|PERF_SAMPLE_RAW)
+#define SAMPLE_TYPE_SCHED \
+	(SAMPLE_TYPE_BASE|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD|PERF_SAMPLE_RAW)
+#define SAMPLE_TYPE_DUMMY \
+	(SAMPLE_TYPE_BASE)
+
+/* intel_pt */
+static struct perf_event_attr pt_attr_pt = {
+	.config		= 0x400, /* bit10: TSCEn */
+	.size		= sizeof(struct perf_event_attr),
+	.sample_type	= SAMPLE_TYPE_PT,
+	.read_format	= PERF_FORMAT_ID,
+	.inherit	= 1,
+	.pinned		= 1,
+	.sample_id_all	= 1,
+	.exclude_guest	= 1
+};
+
+/* sched:sched_switch */
+static struct perf_event_attr pt_attr_sched = {
+	.type		= PERF_TYPE_TRACEPOINT,
+	.size		= sizeof(struct perf_event_attr),
+	.sample_type	= SAMPLE_TYPE_SCHED,
+	.read_format	= PERF_FORMAT_ID,
+	.inherit	= 1,
+	.sample_id_all	= 1,
+	.exclude_guest	= 1
+};
+
+/* dummy:u */
+static struct perf_event_attr pt_attr_dummy = {
+	.type		= PERF_TYPE_SOFTWARE,
+	.config		= PERF_COUNT_SW_DUMMY,
+	.size		= sizeof(struct perf_event_attr),
+	.sample_type	= SAMPLE_TYPE_DUMMY,
+	.read_format	= PERF_FORMAT_ID,
+	.inherit	= 1,
+	.exclude_kernel = 1,
+	.exclude_hv     = 1,
+	.comm		= 1,
+	.task		= 1,
+	.sample_id_all	= 1,
+	.comm_exec	= 1
+};
+
+static int pt_log_enabled;
+static int pt_log_buf_nr_pages = 128; /* number of pages for log buffer */
+static struct cpumask pt_log_cpu_mask;
+
+static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_pt);
+static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_sched);
+static DEFINE_PER_CPU(struct perf_event *, pt_perf_event_dummy);
+
+/* Saved registers on panic */
+static DEFINE_PER_CPU(u64, saved_msr_ctl);
+static DEFINE_PER_CPU(u64, saved_msr_status);
+static DEFINE_PER_CPU(u64, saved_msr_output_base);
+static DEFINE_PER_CPU(u64, saved_msr_output_mask);
+
+void save_intel_pt_registers(void)
+{
+	int cpu = smp_processor_id();
+	u64 ctl;
+
+	if (!cpumask_test_cpu(cpu, &pt_log_cpu_mask))
+		return;
+
+	/* Save RTIT_CTL register */
+	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
+	per_cpu(saved_msr_ctl, cpu) = ctl;
+
+	/* Stop tracing */
+	ctl &= ~RTIT_CTL_TRACEEN;
+	wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+	/* Save other registers */
+	rdmsrl(MSR_IA32_RTIT_STATUS, per_cpu(saved_msr_status, cpu));
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, per_cpu(saved_msr_output_base, cpu));
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, per_cpu(saved_msr_output_mask, cpu));
+}
+
+static int pt_enable_kernel_counter(int cpu)
+{
+	struct perf_event *event = NULL;
+
+	/* Create counter for intel_pt */
+	event = perf_event_create_kernel_counter_with_buffer(&pt_attr_pt,
+			cpu, NULL, NULL, NULL, 0,
+			pt_log_buf_nr_pages, pt_log_buf_nr_pages, event);
+
+	if (IS_ERR(event)) {
+		pr_err("failed to create counter for pt: cpu=%d, err=%d\n",
+			cpu, IS_ERR(event));
+		return -1;
+	}
+	per_cpu(pt_perf_event_pt, cpu) = event;
+
+	/* Create counter for side-band data (sched:sched_switch) */
+	event = perf_event_create_kernel_counter_with_buffer(&pt_attr_sched,
+			cpu, NULL, NULL, NULL, 0, 0, 0, event);
+
+	if (IS_ERR(event))
+		pr_warn("failed to create counter for sched: cpu=%d, err=%d\n",
+			cpu, IS_ERR(event));
+	else
+		per_cpu(pt_perf_event_sched, cpu) = event;
+
+	/* Create counter for side-band data (dummy:u) */
+	event = perf_event_create_kernel_counter_with_buffer(&pt_attr_dummy,
+			cpu, NULL, NULL, NULL, 0, 0, 0, event);
+
+	if (IS_ERR(event))
+		pr_warn("failed to create counter for dummy: cpu=%d, err=%d\n",
+			cpu, IS_ERR(event));
+	else
+		per_cpu(pt_perf_event_dummy, cpu) = event;
+
+	return 0;
+}
+
+static __init int pt_log_buf_setup(char *str)
+{
+	int len;
+
+	if (get_option(&str, &len))
+		pt_log_buf_nr_pages = len>>PAGE_SHIFT;
+
+	return 1;
+}
+__setup("intel_pt_log_buf_len", pt_log_buf_setup);
+
+static __init int pt_log_setup(char *str)
+{
+	pt_log_enabled = 1;
+	return 1;
+}
+__setup("intel_pt_log", pt_log_setup);
+
+__init void pt_log_start(struct pmu *pmu)
+{
+	int cpu, type;
+
+	cpumask_clear(&pt_log_cpu_mask);
+
+	if (!pt_log_enabled)
+		return;
+
+	type = perf_trace_event_get_type_by_name("sched", "sched_switch");
+	if (!type) {
+		pr_err("Cannot find sched:sched_switch event\n");
+		return;
+	}
+
+	pt_attr_sched.config = type;
+	pt_attr_sched.sample_period = 1;
+	pt_attr_pt.type = pmu->type;
+
+	get_online_cpus();
+	for_each_online_cpu(cpu) {
+		if (!pt_enable_kernel_counter(cpu))
+			cpumask_set_cpu(cpu, &pt_log_cpu_mask);
+	}
+	put_online_cpus();
+}
+
diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 4216928..5154670 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -27,6 +27,7 @@
 #include <asm/perf_event.h>
 #include <asm/insn.h>
 #include <asm/io.h>
+#include <asm/intel_pt_log.h>
 
 #include "perf_event.h"
 #include "intel_pt.h"
@@ -1173,6 +1174,11 @@ static __init int pt_init(void)
 	pt_pmu.pmu.free_aux	= pt_buffer_free_aux;
 	ret = perf_pmu_register(&pt_pmu.pmu, "intel_pt", -1);
 
+#ifdef CONFIG_X86_INTEL_PT_LOG
+	if (!ret)
+		pt_log_start(&pt_pmu.pmu);
+#endif
+
 	return ret;
 }
 arch_initcall(pt_init);
-- 
1.7.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ