lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <fc333a628d66e23ac2af6ebd0f16f9b39f9563ba.1629490974.git.rickyman7@gmail.com>
Date:   Sat, 21 Aug 2021 11:19:38 +0200
From:   Riccardo Mancini <rickyman7@...il.com>
To:     Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:     Ian Rogers <irogers@...gle.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Mark Rutland <mark.rutland@....com>,
        Jiri Olsa <jolsa@...hat.com>, linux-kernel@...r.kernel.org,
        linux-perf-users@...r.kernel.org,
        Riccardo Mancini <rickyman7@...il.com>
Subject: [RFC PATCH v1 32/37] perf record: add --threads option

This patch adds a new --threads option to perf-record, which sets the
number of threads to use for multithreaded operations (synthesis and, in
following patches, evlist).

The new option will override the --num-thread-synthesize option if set.
By default, no thread will be used. The option can also be passed
without any argument, setting the number of threads to the number of
online cpus.

Furthermore, two new perf configs are added to selectively disable
multithreading in either synthesis and evlist.

To keep the same behaviour for --num-thread-synthesize, setting only that
option will cause multithreading to be enabled only in synthesis (by
overriding the perf config options for multithreaded synthesis and
evlist).

Examples:
$ ./perf record --threads
uses one thread per cpu for synthesis (and evlist in following patches)

$ ./perf record --threads 2 --num-thread-synthesize 4
the two options shouldn't be mixed, the behaviour would be using 2
threads for everything (4 is ignored)

$ ./perf record --num-thread-synthesize 4
same behaviour as before: 4 threads, but only for synthesis

$ ./perf config record.multithreaded_synthesis=no
$ ./perf record --threads
uses multithreading for everything but synthesis (i.e. evlist in
following patches)

Signed-off-by: Riccardo Mancini <rickyman7@...il.com>
---
 tools/perf/Documentation/perf-record.txt |  9 ++++++
 tools/perf/builtin-record.c              | 35 +++++++++++++++++++-----
 tools/perf/util/record.h                 |  3 ++
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index f1079ee7f2ecf4a8..f5525e3a36e0cf2a 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -629,6 +629,15 @@ appended unit character - B/K/M/G
 	The number of threads to run when synthesizing events for existing processes.
 	By default, the number of threads equals 1.
 
+--threads::
+	The number of threads to use for operations which have multithreaded
+	support (synthesize, evlist).
+	Setting this option overrides --num-thread-synthesize.
+	You can selectively disable any of the multithreaded operations through
+	perf-config record.multithreaded-{synthesis,evlist}.
+	By default, the number of threads equals 1.
+	Setting this option without any parameter sets it to the number of online cpus.
+
 ifdef::HAVE_LIBPFM[]
 --pfm-events events::
 Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dc9a814b2e7906fc..7802a0e25f631fac 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1496,7 +1496,7 @@ static int record__synthesize(struct record *rec, bool tail)
 	if (err < 0)
 		pr_warning("Couldn't synthesize cgroup events.\n");
 
-	if (rec->opts.nr_threads_synthesize > 1) {
+	if (rec->opts.multithreaded_synthesis) {
 		perf_set_multithreaded();
 		f = process_locked_synthesized_event;
 	}
@@ -1504,7 +1504,7 @@ static int record__synthesize(struct record *rec, bool tail)
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
 					    f, opts->sample_address);
 
-	if (rec->opts.nr_threads_synthesize > 1)
+	if (rec->opts.multithreaded_synthesis)
 		perf_set_singlethreaded();
 
 out:
@@ -2188,6 +2188,12 @@ static int perf_record_config(const char *var, const char *value, void *cb)
 			rec->opts.nr_cblocks = nr_cblocks_default;
 	}
 #endif
+	if (!strcmp(var, "record.multithreaded-synthesis"))
+		rec->opts.multithreaded_synthesis = perf_config_bool(var, value);
+
+	if (!strcmp(var, "record.multithreaded-evlist"))
+		rec->opts.multithreaded_evlist = perf_config_bool(var, value);
+
 
 	return 0;
 }
@@ -2434,6 +2440,9 @@ static struct record record = {
 		},
 		.mmap_flush          = MMAP_FLUSH_DEFAULT,
 		.nr_threads_synthesize = 1,
+		.nr_threads          = 1,
+		.multithreaded_evlist = true,
+		.multithreaded_synthesis = true,
 		.ctl_fd              = -1,
 		.ctl_fd_ack          = -1,
 	},
@@ -2640,6 +2649,9 @@ static struct option __record_options[] = {
 	OPT_UINTEGER(0, "num-thread-synthesize",
 		     &record.opts.nr_threads_synthesize,
 		     "number of threads to run for event synthesis"),
+	OPT_UINTEGER_OPTARG(0, "threads",
+		     &record.opts.nr_threads, UINT_MAX,
+		     "number of threads to use"),
 #ifdef HAVE_LIBPFM
 	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
 		"libpfm4 event selector. use 'perf list' to list available events",
@@ -2915,10 +2927,19 @@ int cmd_record(int argc, const char **argv)
 		rec->opts.comp_level = comp_level_max;
 	pr_debug("comp level: %d\n", rec->opts.comp_level);
 
-	if (rec->opts.nr_threads_synthesize == UINT_MAX)
-		rec->opts.nr_threads_synthesize = sysconf(_SC_NPROCESSORS_ONLN);
-	if (rec->opts.nr_threads_synthesize > 1) {
-		err = setup_global_workqueue(rec->opts.nr_threads_synthesize);
+	if (rec->opts.nr_threads <= 1) {
+		rec->opts.multithreaded_evlist = false;
+		if (rec->opts.nr_threads_synthesize > 1) {
+			rec->opts.multithreaded_synthesis = true;
+			rec->opts.nr_threads = rec->opts.nr_threads_synthesize;
+		} else {
+			rec->opts.multithreaded_synthesis = false;
+		}
+	}
+	if (rec->opts.nr_threads == UINT_MAX)
+		rec->opts.nr_threads = sysconf(_SC_NPROCESSORS_ONLN);
+	if (rec->opts.nr_threads > 1) {
+		err = setup_global_workqueue(rec->opts.nr_threads);
 		if (err) {
 			create_workqueue_strerror(global_wq, errbuf, sizeof(errbuf));
 			pr_err("setup_global_workqueue: %s\n", errbuf);
@@ -2928,7 +2949,7 @@ int cmd_record(int argc, const char **argv)
 
 	err = __cmd_record(&record, argc, argv);
 
-	if (rec->opts.nr_threads_synthesize > 1)
+	if (rec->opts.nr_threads > 1)
 		teardown_global_workqueue();
 out:
 	bitmap_free(rec->affinity_mask.bits);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 68f471d9a88b2b36..9c47a7904a43ffc7 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -74,6 +74,9 @@ struct record_opts {
 	int	      mmap_flush;
 	unsigned int  comp_level;
 	unsigned int  nr_threads_synthesize;
+	unsigned int  nr_threads;
+	bool	      multithreaded_synthesis;
+	bool	      multithreaded_evlist;
 	int	      ctl_fd;
 	int	      ctl_fd_ack;
 	bool	      ctl_fd_close;
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ