lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu,  2 Oct 2014 14:32:08 +0400
From:	Alexander Yarygin <yarygin@...ux.vnet.ibm.com>
To:	linux-kernel@...r.kernel.org
Cc:	Alexander Yarygin <yarygin@...ux.vnet.ibm.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Christian Borntraeger <borntraeger@...ibm.com>,
	David Ahern <dsahern@...il.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Ingo Molnar <mingo@...nel.org>, Jiri Olsa <jolsa@...hat.com>,
	Mike Galbraith <efault@....de>,
	Namhyung Kim <namhyung.kim@....com>,
	Paul Mackerras <paulus@...ba.org>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Stephane Eranian <eranian@...gle.com>
Subject: [PATCH 1/2] perf tools: Add option to copy events when queueing

From: David Ahern <dsahern@...il.com>

When processing events the session code has an ordered samples queue which is
used to time-sort events coming in across multiple mmaps. At a later point in
time samples on the queue are flushed up to some timestamp at which point the
event is actually processed.

When analyzing events live (ie., record/analysis path in the same command)
there is a race that leads to corrupted events and parse errors which cause
perf to terminate. The problem is that when the event is placed in the ordered
samples queue it is only a reference to the event which is really sitting in
the mmap buffer. Even though the event is queued for later processing the mmap
tail pointer is updated which indicates to the kernel that the event has been
processed. The race is flushing the event from the queue before it gets
overwritten by some other event. For commands trying to process events live
(versus just writing to a file) and processing a high rate of events this leads
to parse failures and perf terminates.

Examples hitting this problem are 'perf kvm stat live', especially with nested
VMs which generate 100,000+ traces per second, and a command processing
scheduling events with a high rate of context switching -- e.g., running
'perf bench sched pipe'.

This patch offers live commands an option to copy the event when it is placed in
the ordered samples queue.

Signed-off-by: David Ahern <dsahern@...il.com>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Christian Borntraeger <borntraeger@...ibm.com>
Cc: Frederic Weisbecker <fweisbec@...il.com>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Mike Galbraith <efault@....de>
Cc: Namhyung Kim <namhyung.kim@....com>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Stephane Eranian <eranian@...gle.com>
Signed-off-by: Alexander Yarygin <yarygin@...ux.vnet.ibm.com>
---
 tools/perf/util/ordered-events.c | 41 ++++++++++++++++++++++++++++++++++++----
 tools/perf/util/ordered-events.h | 10 +++++++++-
 tools/perf/util/session.c        |  5 ++---
 3 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 706ce1a..f7383cc 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -1,5 +1,6 @@
 #include <linux/list.h>
 #include <linux/compiler.h>
+#include <linux/string.h>
 #include "ordered-events.h"
 #include "evlist.h"
 #include "session.h"
@@ -58,10 +59,24 @@ static void queue_event(struct ordered_events *oe, struct ordered_event *new)
 }
 
 #define MAX_SAMPLE_BUFFER	(64 * 1024 / sizeof(struct ordered_event))
-static struct ordered_event *alloc_event(struct ordered_events *oe)
+static struct ordered_event *alloc_event(struct ordered_events *oe,
+					 union perf_event *event)
 {
 	struct list_head *cache = &oe->cache;
 	struct ordered_event *new = NULL;
+	union perf_event *new_event = NULL;
+
+	if (oe->copy_on_queue) {
+		if (oe->cur_alloc_size < oe->max_alloc_size) {
+			new_event = memdup(event, event->header.size);
+			if (new_event)
+				oe->cur_alloc_size += event->header.size;
+		}
+	} else
+		new_event = event;
+
+	if (!new_event)
+		return NULL;
 
 	if (!list_empty(cache)) {
 		new = list_entry(cache->next, struct ordered_event, list);
@@ -74,8 +89,13 @@ static struct ordered_event *alloc_event(struct ordered_events *oe)
 		size_t size = MAX_SAMPLE_BUFFER * sizeof(*new);
 
 		oe->buffer = malloc(size);
-		if (!oe->buffer)
+		if (!oe->buffer) {
+			if (oe->copy_on_queue) {
+				oe->cur_alloc_size -= new_event->header.size;
+				free(new_event);
+			}
 			return NULL;
+		}
 
 		pr("alloc size %" PRIu64 "B (+%zu), max %" PRIu64 "B\n",
 		   oe->cur_alloc_size, size, oe->max_alloc_size);
@@ -90,15 +110,19 @@ static struct ordered_event *alloc_event(struct ordered_events *oe)
 		pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
 	}
 
+	new->event = new_event;
+
 	return new;
 }
 
 struct ordered_event *
-ordered_events__new(struct ordered_events *oe, u64 timestamp)
+ordered_events__new(struct ordered_events *oe, u64 timestamp,
+		    union perf_event *event)
 {
 	struct ordered_event *new;
 
-	new = alloc_event(oe);
+	new = alloc_event(oe, event);
+
 	if (new) {
 		new->timestamp = timestamp;
 		queue_event(oe, new);
@@ -111,6 +135,10 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve
 {
 	list_move(&event->list, &oe->cache);
 	oe->nr_events--;
+	if (oe->copy_on_queue) {
+		oe->cur_alloc_size -= event->event->header.size;
+		free(event->event);
+	}
 }
 
 static int __ordered_events__flush(struct perf_session *s,
@@ -240,6 +268,11 @@ void ordered_events__free(struct ordered_events *oe)
 
 		event = list_entry(oe->to_free.next, struct ordered_event, list);
 		list_del(&event->list);
+		if (oe->copy_on_queue) {
+			oe->cur_alloc_size -= event->event->header.size;
+			free(event->event);
+		}
+
 		free(event);
 	}
 }
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 3b2f205..7b8f9b0 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -34,9 +34,11 @@ struct ordered_events {
 	int			buffer_idx;
 	unsigned int		nr_events;
 	enum oe_flush		last_flush_type;
+	bool                    copy_on_queue;
 };
 
-struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp);
+struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp,
+					  union perf_event *event);
 void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
 int ordered_events__flush(struct perf_session *s, struct perf_tool *tool,
 			  enum oe_flush how);
@@ -48,4 +50,10 @@ void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
 {
 	oe->max_alloc_size = size;
 }
+
+static inline
+void ordered_events__set_copy_on_queue(struct ordered_events *oe, bool copy)
+{
+	oe->copy_on_queue = copy;
+}
 #endif /* __ORDERED_EVENTS_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6d2d50d..976064d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -532,17 +532,16 @@ int perf_session_queue_event(struct perf_session *s, union perf_event *event,
 			return -EINVAL;
 	}
 
-	new = ordered_events__new(oe, timestamp);
+	new = ordered_events__new(oe, timestamp, event);
 	if (!new) {
 		ordered_events__flush(s, tool, OE_FLUSH__HALF);
-		new = ordered_events__new(oe, timestamp);
+		new = ordered_events__new(oe, timestamp, event);
 	}
 
 	if (!new)
 		return -ENOMEM;
 
 	new->file_offset = file_offset;
-	new->event = event;
 	return 0;
 }
 
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ