lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1480713561-6617-2-git-send-email-kan.liang@intel.com>
Date:   Fri,  2 Dec 2016 16:19:09 -0500
From:   kan.liang@...el.com
To:     peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
        linux-kernel@...r.kernel.org
Cc:     alexander.shishkin@...ux.intel.com, tglx@...utronix.de,
        namhyung@...nel.org, jolsa@...nel.org, adrian.hunter@...el.com,
        wangnan0@...wei.com, mark.rutland@....com, andi@...stfloor.org,
        Kan Liang <kan.liang@...el.com>
Subject: [PATCH V2 01/13] perf/core: Introduce PERF_RECORD_OVERHEAD

From: Kan Liang <kan.liang@...el.com>

A new perf record is introduced to export perf overhead information to
userspace. So the user can measure the overhead of sampling directly.
If the user doesn't want to use this feature, it can be switched off by
configuring the user space tool.

To output the overhead information, it takes advantage of the existing
event log mechanism. But the overhead information is the system
overhead, not per-event overhead.

Signed-off-by: Kan Liang <kan.liang@...el.com>
---
 include/linux/perf_event.h      |  9 ++++++++
 include/uapi/linux/perf_event.h | 39 +++++++++++++++++++++++++++++++++-
 kernel/events/core.c            | 46 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecd..5bc8156 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -998,6 +998,10 @@ perf_event__output_id_sample(struct perf_event *event,
 extern void
 perf_log_lost_samples(struct perf_event *event, u64 lost);
 
+extern void
+perf_log_overhead(struct perf_event *event, u64 type,
+		  struct perf_overhead_entry *entry);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
@@ -1221,6 +1225,11 @@ static inline bool has_addr_filter(struct perf_event *event)
 	return event->pmu->nr_addr_filters;
 }
 
+static inline bool needs_log_overhead(struct perf_event *event)
+{
+	return !!event->attr.overhead;
+}
+
 /*
  * An inherited event uses parent's filters
  */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485..bb0ecf0 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -344,7 +344,8 @@ struct perf_event_attr {
 				use_clockid    :  1, /* use @clockid for time fields */
 				context_switch :  1, /* context switch data */
 				write_backward :  1, /* Write ring buffer from end to beginning */
-				__reserved_1   : 36;
+				overhead       :  1, /* Log overhead information */
+				__reserved_1   : 35;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -862,6 +863,17 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_SWITCH_CPU_WIDE		= 15,
 
+	/*
+	 * Records perf overhead
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				type;
+	 *	struct perf_overhead_entry	entry;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_OVERHEAD			= 16,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
@@ -980,4 +992,29 @@ struct perf_branch_entry {
 		reserved:44;
 };
 
+/*
+ * The overhead type could be different among architectures.
+ * The common overhead type can be defined from PERF_CORE_OVERHEAD
+ * The arch specific type should be defined from PERF_PMU_OVERHEAD
+ */
+enum perf_record_overhead_type {
+	PERF_CORE_OVERHEAD	 = 0,
+
+	PERF_PMU_OVERHEAD	 = 20,
+
+	PERF_OVERHEAD_MAX,
+};
+
+/*
+ * single overhead record layout:
+ *
+ *	  nr: Times of overhead happens.
+ *	      E.g. for NMI, nr == times of NMI handler are called.
+ *	time: Total overhead cost(ns)
+ */
+struct perf_overhead_entry {
+	__u64	nr;
+	__u64	time;
+};
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6ee1feb..5312744 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7037,6 +7037,52 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
+
+/*
+ * Record overhead information
+ *
+ * The overhead logged here is the system overhead, not per-event overhead.
+ * This function only take advantage of the existing event log mechanism
+ * to log the overhead information.
+ *
+ */
+void perf_log_overhead(struct perf_event *event, u64 type,
+		       struct perf_overhead_entry *entry)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				type;
+		struct perf_overhead_entry	overhead;
+	} overhead_event = {
+		.header = {
+			.type = PERF_RECORD_OVERHEAD,
+			.misc = 0,
+			.size = sizeof(overhead_event),
+		},
+		.type = type,
+		.overhead = {
+			.nr = entry->nr,
+			.time = entry->time,
+		},
+	};
+
+	perf_event_header__init_id(&overhead_event.header, &sample, event);
+	ret = perf_output_begin(&handle, event, overhead_event.header.size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, overhead_event);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+	memset(entry, 0, sizeof(*entry));
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
-- 
2.5.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ