lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1424436249-3572-2-git-send-email-adrian.hunter@intel.com>
Date:	Fri, 20 Feb 2015 14:44:08 +0200
From:	Adrian Hunter <adrian.hunter@...el.com>
To:	Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>
Cc:	Arnaldo Carvalho de Melo <acme@...nel.org>,
	linux-kernel@...r.kernel.org, David Ahern <dsahern@...il.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Jiri Olsa <jolsa@...hat.com>,
	Namhyung Kim <namhyung@...il.com>,
	Paul Mackerras <paulus@...ba.org>,
	Stephane Eranian <eranian@...gle.com>,
	John Stultz <john.stultz@...aro.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Pawel Moll <pawel.moll@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Andi Kleen <ak@...ux.intel.com>,
	Mathieu Poirier <mathieu.poirier@...aro.org>
Subject: [PATCH V2 1/2] perf: Sample additional clock value

This is needed to allow perf event samples to be
synchronized with data from other sources, and
in particular, sources like Intel Processor Trace
(Intel PT) where the hardware produces a trace
with hardware defined timestamps (i.e. TSC).

For example, to decode an Intel PT trace, the decoder
must walk the object code. To determine what object
code is running, the decoder must track events like
sched_switch and MMAP and match them against the trace
data using the timestamps.

Note that it is not the accuracy of the time sources
that is at issue but instead the ability to correctly
order events.

On modern machines, perf_clock is currently directly
related to TSC, however that is to change when
perf_clock becomes CLOCK_MONOTONIC.

Consequently add PERF_SAMPLE_CLOCK to sample some
other clock. The patch allows for 16 possible clock
selections with the only initial possibility a
processor trace clock which will be TSC on x86.

Although there are only 16 possible clock selections,
it is envisioned that POSIX clock ids would be a
single selection, with the actual clock id provided
in another perf_event_attr member.

Based-on-patch-by: Pawel Moll <pawel.moll@....com>
Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
---
 include/linux/perf_event.h      |  3 ++-
 include/uapi/linux/perf_event.h | 19 +++++++++++++++++--
 kernel/events/core.c            | 30 ++++++++++++++++++++++++++++++
 kernel/events/internal.h        |  4 ++++
 4 files changed, 53 insertions(+), 3 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index efe2d2d..9385140 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -655,7 +655,7 @@ extern void perf_pmu_migrate_context(struct pmu *pmu,
 				int src_cpu, int dst_cpu);
 extern u64 perf_event_read_value(struct perf_event *event,
 				 u64 *enabled, u64 *running);
-
+u64 perf_sample_clock_pt(void);
 
 struct perf_sample_data {
 	/*
@@ -687,6 +687,7 @@ struct perf_sample_data {
 		u32	cpu;
 		u32	reserved;
 	}				cpu_entry;
+	u64				clock;
 	struct perf_callchain_entry	*callchain;
 
 	/*
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index be9ff06..2fccfc0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_CLOCK			= 1U << 19,
 
-	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 };
 
 /*
@@ -228,6 +229,16 @@ enum {
 };
 
 /*
+ * Values to determine clock to sample.
+ */
+enum perf_sample_clock_type {
+	/* Processor trace clock (TSC on x86) */
+	PERF_SAMPLE_CLOCK_PT		= 0,
+
+	PERF_SAMPLE_CLOCK_MAX		/* non-ABI */
+};
+
+/*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
  *
@@ -328,7 +339,9 @@ struct perf_event_attr {
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
 				comm_exec      :  1, /* flag comm events that are due to an exec */
-				__reserved_1   : 39;
+				/* clock: see enum perf_sample_clock_type */
+				clock          :  4, /* which clock */
+				__reserved_1   : 35;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -601,6 +614,7 @@ enum perf_event_type {
 	 * 	{ u64			id;       } && PERF_SAMPLE_ID
 	 * 	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 	 * 	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			clock;    } && PERF_SAMPLE_CLOCK
 	 *	{ u64			id;	  } && PERF_SAMPLE_IDENTIFIER
 	 * } && perf_event_attr::sample_id_all
 	 *
@@ -746,6 +760,7 @@ enum perf_event_type {
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 *	{ u64			clock;    } && PERF_SAMPLE_CLOCK
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 799f034..dc39915 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1323,6 +1323,9 @@ static void perf_event__id_header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_CPU)
 		size += sizeof(data->cpu_entry);
 
+	if (sample_type & PERF_SAMPLE_CLOCK)
+		size += sizeof(data->clock);
+
 	event->id_header_size = size;
 }
 
@@ -4915,6 +4918,11 @@ perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
 	}
 }
 
+u64 __weak perf_sample_clock_pt(void)
+{
+	return 0;
+}
+
 static void __perf_event_header__init_id(struct perf_event_header *header,
 					 struct perf_sample_data *data,
 					 struct perf_event *event)
@@ -4943,6 +4951,16 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
 		data->cpu_entry.cpu	 = raw_smp_processor_id();
 		data->cpu_entry.reserved = 0;
 	}
+
+	if (sample_type & PERF_SAMPLE_CLOCK) {
+		switch (event->attr.clock) {
+		case PERF_SAMPLE_CLOCK_PT:
+			data->clock = perf_sample_clock_pt();
+			break;
+		default:
+			data->clock = 0;
+		}
+	}
 }
 
 void perf_event_header__init_id(struct perf_event_header *header,
@@ -4973,6 +4991,9 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_CPU)
 		perf_output_put(handle, data->cpu_entry);
 
+	if (sample_type & PERF_SAMPLE_CLOCK)
+		perf_output_put(handle, data->clock);
+
 	if (sample_type & PERF_SAMPLE_IDENTIFIER)
 		perf_output_put(handle, data->id);
 }
@@ -5218,6 +5239,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_CLOCK)
+		perf_output_put(handle, data->clock);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -7632,6 +7656,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 
 	if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
 		ret = perf_reg_validate(attr->sample_regs_intr);
+
+	if ((attr->sample_type & PERF_SAMPLE_CLOCK) &&
+	    (attr->clock >= PERF_SAMPLE_CLOCK_MAX ||
+	     (!HAVE_PERF_SAMPLE_CLOCK_PT &&
+	      attr->clock == PERF_SAMPLE_CLOCK_PT)))
+		return -EINVAL;
 out:
 	return ret;
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 9f6ce9b..418142f 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -228,4 +228,8 @@ static inline bool arch_perf_have_user_stack_dump(void)
 #define perf_user_stack_pointer(regs) 0
 #endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
 
+#ifndef HAVE_PERF_SAMPLE_CLOCK_PT
+#define HAVE_PERF_SAMPLE_CLOCK_PT 0
+#endif
+
 #endif /* _KERNEL_EVENTS_INTERNAL_H */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ