linux-kernel - [PATCH 47/48] perf record: Spread maps for --threads option

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180913125450.21342-48-jolsa@kernel.org>
Date:   Thu, 13 Sep 2018 14:54:49 +0200
From:   Jiri Olsa <jolsa@...nel.org>
To:     Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     lkml <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Namhyung Kim <namhyung@...nel.org>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Peter Zijlstra <a.p.zijlstra@...llo.nl>,
        Andi Kleen <andi@...stfloor.org>,
        Alexey Budankov <alexey.budankov@...ux.intel.com>
Subject: [PATCH 47/48] perf record: Spread maps for --threads option

Currently we assign all maps to main thread. Adding
code that spreads maps for --threads option.

For --thread option we create as many threads as there
are memory maps in evlist, which is the number of CPUs
in the system or CPUs we monitor. Each thread gets a
single data mmap to read.

In addition we have also same amount of tracking mmaps
for auxiliary events which we don't create special thread
for. Instead we assign the to the main thread, because
there's not much traffic expected there.

The assignment is visible from --thread-stats output:

          pid      write       poll       skip  maps (size 20K)
    1s   9770       144B          1          0   19K   19K   19K   18K   19K
         9772         0B          1          0   18K
         9773         0B          1          0   19K
         9774         0B          1          0   19K

There are 5 maps for thread 9770 (1 data map and 4 auxiliary)
and one data map for every other thread. Each thread writes
data to the separate data file.

In addition we also pin every thread to the cpu that
the data map belongs to in order to keep both writer
(kernel) and reader (perf tool thread) on the same CPU.

Link: http://lkml.kernel.org/n/tip-ghcsnp3b73innq2gkl1lkfbz@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@...nel.org>
---
 tools/perf/builtin-record.c | 133 +++++++++++++++++++++++++++++++++---
 1 file changed, 125 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 92ba4d83b18c..4cc728174c79 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -109,6 +109,8 @@ struct record {
 	struct switch_output	switch_output;
 	unsigned long long	samples;
 	struct record_thread	*threads;
+	bool			threads_all;
+	bool			threads_one;
 	int			threads_cnt;
 	bool			threads_set;
 	int			threads_signal_cnt;
@@ -393,15 +395,11 @@ static int record__mmap_evlist(struct record *rec,
 	return 0;
 }
 
-static int record__mmap_index(struct record *rec)
+static void record__mmap_index_single(struct record *rec)
 {
 	struct perf_evlist *evlist = rec->evlist;
 	struct perf_data *data = &rec->data;
-	int i, ret, nr = evlist->nr_mmaps;
-
-	ret = perf_data__create_index(data, nr);
-	if (ret)
-		return ret;
+	int i, nr = evlist->nr_mmaps;
 
 	for (i = 0; i < nr; i++) {
 		struct perf_mmap *map = &evlist->mmap[i];
@@ -414,6 +412,50 @@ static int record__mmap_index(struct record *rec)
 
 		map->file = &data->file;
 	}
+}
+
+static void record__mmap_index_all(struct record *rec)
+{
+	struct perf_evlist *evlist = rec->evlist;
+	struct perf_data     *data = &rec->data;
+	struct record_thread *threads = rec->threads;
+	struct record_thread *thread0 = threads;
+	int i, t;
+
+	BUG_ON(data->index_nr != rec->threads_cnt);
+
+	for (i = 0; i < evlist->nr_mmaps; i++) {
+		struct perf_mmap *map = &evlist->track_mmap[i];
+
+		map->file = &data->file;
+	}
+
+	thread0->mmap[0]->file = &data->index[0];
+
+	for (t = 1; t < rec->threads_cnt; t++) {
+		struct record_thread *th = threads + t;
+
+		for (i = 0; i < th->mmap_nr; i++) {
+			struct perf_mmap *map = th->mmap[i];
+
+			map->file = &data->index[t];
+		}
+	}
+}
+
+static int record__mmap_index(struct record *rec)
+{
+	struct perf_data *data = &rec->data;
+	int ret;
+
+	ret = perf_data__create_index(data, rec->threads_cnt);
+	if (ret)
+		return ret;
+
+	if (rec->threads_all)
+		record__mmap_index_all(rec);
+	else if (rec->threads_one)
+		record__mmap_index_single(rec);
 
 	return 0;
 }
@@ -1056,7 +1098,7 @@ record_thread__mmap(struct record_thread *th, int nr, int nr_ovw)
 }
 
 static int
-record__threads_assign(struct record *rec)
+record__threads_assign_single(struct record *rec)
 {
 	struct record_thread *threads = rec->threads;
 	struct record_thread *thread0 = threads;
@@ -1089,6 +1131,55 @@ record__threads_assign(struct record *rec)
 	return ret;
 }
 
+static int
+record__threads_assign_all(struct record *rec)
+{
+	struct perf_evlist *evlist = rec->evlist;
+	struct record_thread *threads = rec->threads;
+	struct record_thread *thread0 = threads;
+	int cnt = rec->threads_cnt;
+	int i, t, nr, nr0, nr_trk;
+	int nr_cpus = cpu__max_present_cpu();
+
+	nr     = evlist->mmap       ? evlist->nr_mmaps : 0;
+	nr_trk = evlist->track_mmap ? evlist->nr_mmaps : 0;
+
+	BUG_ON(evlist->overwrite_mmap);
+	BUG_ON(nr_cpus != nr);
+
+	nr0 = 1 + nr_trk;
+
+	if (record_thread__mmap(thread0, nr0, 0))
+		return -ENOMEM;
+
+	thread0->mmap[0] = &evlist->mmap[0];
+
+	for (i = 0; i < nr_trk; i++)
+		thread0->mmap[i + 1] = &evlist->track_mmap[i];
+
+	for (t = 1; t < cnt; t++) {
+		struct record_thread *th = threads + t;
+
+		if (record_thread__mmap(th, 1, 0))
+			return -ENOMEM;
+
+		th->mmap[0] = &evlist->mmap[t];
+	}
+
+	return 0;
+}
+
+static int
+record__threads_assign(struct record *rec)
+{
+	if (rec->threads_all)
+		return record__threads_assign_all(rec);
+	else if (rec->threads_one)
+		return record__threads_assign_single(rec);
+	else
+		return -EINVAL;
+}
+
 static int
 record_thread__create_poll(struct record_thread *th,
 			   struct perf_evlist *evlist)
@@ -1146,7 +1237,8 @@ record__threads_create(struct record *rec)
 static int record__threads_cnt(struct record *rec)
 {
 	struct perf_evlist *evlist = rec->evlist;
-	int cnt;
+	bool all = false, one = false;
+	int cnt = 0;
 
 	if (rec->threads_set) {
 		if (rec->threads_cnt) {
@@ -1158,11 +1250,15 @@ static int record__threads_cnt(struct record *rec)
 			return -EINVAL;
 		}
 		cnt = evlist->nr_mmaps;
+		all = true;
 	} else {
+		one = true;
 		cnt = 1;
 	}
 
 	rec->threads_cnt = cnt;
+	rec->threads_all = all;
+	rec->threads_one = one;
 	return 0;
 }
 
@@ -1200,6 +1296,25 @@ static inline pid_t gettid(void)
 	return (pid_t) syscall(__NR_gettid);
 }
 
+static int set_affinity(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	return sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+static void set_thread_affinity(struct record *rec)
+{
+	if (rec->threads_all) {
+		struct perf_mmap *m0 = thread->mmap[0];
+
+		if (set_affinity(m0->cpu))
+			pr_err("failed to set affinity for cpu %d\n", m0->cpu);
+	}
+}
+
 static void*
 record_thread__process(struct record *rec)
 {
@@ -1263,6 +1378,7 @@ static void *worker(void *arg)
 	thread->state = RECORD_THREAD__RUNNING;
 
 	signal_main(rec);
+	set_thread_affinity(rec);
 
 	return record_thread__process(rec);
 }
@@ -1283,6 +1399,7 @@ static int record__threads_start(struct record *rec)
 	if (rec->threads_cnt > 1)
 		wait_for_signal(rec);
 
+	set_thread_affinity(rec);
 	return err;
 }
 
-- 
2.17.1