lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <54cc11d7-3ef2-c856-052e-6e2c309ff743@linux.intel.com>
Date:   Thu, 23 Aug 2018 19:47:01 +0300
From:   Alexey Budankov <alexey.budankov@...ux.intel.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...nel.org>,
        Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Andi Kleen <ak@...ux.intel.com>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        linux-perf-users@...r.kernel.org
Subject: [PATCH v2 2/2]: perf record: enable asynchronous trace writing


Trace file offsets are different for every enqueued write operation 
and calculated dynamically in trace streaming loop and don't overlap 
so write requests can be written in parallel.

record__mmap_read_sync implements sort of a barrier between spilling 
ready profiling data to disk.

Signed-off-by: Alexey Budankov <alexey.budankov@...ux.intel.com>
---
 tools/perf/builtin-record.c | 97 +++++++++++++++++++++++++++++++++++++++++----
 tools/perf/util/mmap.c      | 23 +++++------
 tools/perf/util/mmap.h      |  2 +-
 3 files changed, 101 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a35675e9f3aa..dee63229ed37 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -132,7 +132,7 @@ static int record__aio_write(int trace_fd, struct aiocb *cblock,
 	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
 
 	if (aio_write(cblock) == -1) {
-		pr_err("failed to write perf data, error: %m\n");
+		pr_err("failed to queue perf data, error: %m\n");
 		return -1;
 	}
 
@@ -148,12 +148,14 @@ static int process_synthesized_event(struct perf_tool *tool,
 	return record__write(rec, event, event->header.size);
 }
 
-static int record__pushfn(void *to, void *bf, size_t size)
+static int record__pushfn(void *to, void *bf, size_t size, off_t off)
 {
 	struct record *rec = to;
+	struct perf_mmap *map = bf;
 
 	rec->samples++;
-	return record__write(rec, bf, size);
+	return record__aio_write(rec->session->data->file.fd, &map->cblock,
+			map->data, size, off);
 }
 
 static volatile int done;
@@ -528,13 +530,85 @@ static struct perf_event_header finished_round_event = {
 	.type = PERF_RECORD_FINISHED_ROUND,
 };
 
+static int record__mmap_read_sync(int trace_fd, struct aiocb **cblocks,
+		int cblocks_size, struct record *rec)
+{
+	size_t rem;
+	ssize_t size;
+	off_t rem_off;
+	int i, aio_ret, aio_errno, do_suspend;
+	struct perf_mmap *md;
+	struct timespec timeout0 = { 0, 0 };
+	struct timespec timeoutS = { 0, 1000 * 1000  * 1 };
+
+	if (!cblocks_size)
+		return 0;
+
+	do {
+		do_suspend = 0;
+		nanosleep(&timeoutS, NULL);
+		if (aio_suspend((const struct aiocb**)cblocks, cblocks_size, &timeout0)) {
+			if (errno == EAGAIN || errno == EINTR) {
+				do_suspend = 1;
+				continue;
+			} else {
+				pr_err("failed to sync perf data, error: %m\n");
+				break;
+			}
+		}
+		for (i = 0; i < cblocks_size; i++) {
+			if (cblocks[i] == NULL) {
+				continue;
+			}
+			aio_errno = aio_error(cblocks[i]);
+			if (aio_errno == EINPROGRESS) {
+				do_suspend = 1;
+				continue;
+			}
+			size = aio_ret = aio_return((struct aiocb*)cblocks[i]);
+			if (aio_ret < 0) {
+				if (aio_errno == EINTR) {
+					size = 0;
+				} else {
+					pr_err("failed to write perf data, error: %m\n");
+					cblocks[i] = NULL;
+					continue;
+				}
+			}
+			rec->bytes_written += size;
+			md = (struct perf_mmap*)((char*)cblocks[i] -
+				offsetof(struct perf_mmap, cblock));
+			rem = cblocks[i]->aio_nbytes - (size_t)size;
+			if (rem == 0) {
+				perf_mmap__put(md);
+				cblocks[i] = NULL;
+				if (switch_output_size(rec))
+					trigger_hit(&switch_output_trigger);
+			} else {
+				rem_off = cblocks[i]->aio_offset +
+					cblocks[i]->aio_nbytes - rem;
+				if (!record__aio_write(trace_fd,
+					(struct aiocb *)cblocks[i],
+					md->data + cblocks[i]->aio_nbytes - rem,
+					rem, rem_off))
+					do_suspend = 1;
+			}
+		}
+	} while (do_suspend);
+
+	return 0;
+}
+
 static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
 				    bool overwrite)
 {
 	u64 bytes_written = rec->bytes_written;
-	int i;
-	int rc = 0;
+	int i, rc = 0;
 	struct perf_mmap *maps;
+	int trace_fd = rec->session->data->file.fd;
+	struct aiocb **mmap_aio = rec->evlist->mmap_aio;
+	int mmap_aio_size = 0;
+	off_t off;
 
 	if (!evlist)
 		return 0;
@@ -546,14 +620,17 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
 		return 0;
 
+	off = lseek(trace_fd, 0, SEEK_CUR);
+
 	for (i = 0; i < evlist->nr_mmaps; i++) {
 		struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
 
 		if (maps[i].base) {
-			if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
-				rc = -1;
+			rc = perf_mmap__push(&maps[i], rec, record__pushfn, &off);
+			if (rc < 0)
 				goto out;
-			}
+			else if (rc > 0)
+				mmap_aio[mmap_aio_size++] = &maps[i].cblock;
 		}
 
 		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
@@ -563,6 +640,10 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
 		}
 	}
 
+	record__mmap_read_sync(trace_fd, mmap_aio, mmap_aio_size, rec);
+
+	lseek(trace_fd, off, SEEK_SET);
+
 	/*
 	 * Mark the round finished in case we wrote
 	 * at least one event.
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index e71d46cb01cc..c8b921c88a5d 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -292,11 +292,11 @@ int perf_mmap__read_init(struct perf_mmap *map)
 }
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
-		    int push(void *to, void *buf, size_t size))
+		    int push(void *to, void *buf, size_t size, off_t), off_t *off)
 {
 	u64 head = perf_mmap__read_head(md);
 	unsigned char *data = md->base + page_size;
-	unsigned long size;
+	unsigned long size, size0 = 0;
 	void *buf;
 	int rc = 0;
 
@@ -308,23 +308,22 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
 
 	if ((md->start & md->mask) + size != (md->end & md->mask)) {
 		buf = &data[md->start & md->mask];
-		size = md->mask + 1 - (md->start & md->mask);
-		md->start += size;
-
-		if (push(to, buf, size) < 0) {
-			rc = -1;
-			goto out;
-		}
+		size0 = md->mask + 1 - (md->start & md->mask);
+		md->start += size0;
+		memcpy(md->data, buf, size0);
 	}
 
 	buf = &data[md->start & md->mask];
 	size = md->end - md->start;
 	md->start += size;
+	memcpy(md->data + size0, buf, size);
 
-	if (push(to, buf, size) < 0) {
-		rc = -1;
+	rc = push(to, md, size0 + size, *off) < 0 ? -1 : 1;
+	if (rc == -1)
 		goto out;
-	}
+
+	perf_mmap__get(md);
+	*off += size0 + size;
 
 	md->prev = head;
 	perf_mmap__consume(md);
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 1974e621e36b..6211a3a0c4c3 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -95,7 +95,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
 union perf_event *perf_mmap__read_event(struct perf_mmap *map);
 
 int perf_mmap__push(struct perf_mmap *md, void *to,
-		    int push(void *to, void *buf, size_t size));
+		    int push(void *to, void *buf, size_t size, off_t off), off_t *off);
 
 size_t perf_mmap__mmap_len(struct perf_mmap *map);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ