lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240605063828.195700-2-irogers@google.com>
Date: Tue,  4 Jun 2024 23:38:28 -0700
From: Ian Rogers <irogers@...gle.com>
To: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Mark Rutland <mark.rutland@....com>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>, 
	Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>, 
	Kan Liang <kan.liang@...ux.intel.com>, linux-perf-users@...r.kernel.org, 
	linux-kernel@...r.kernel.org, Stephane Eranian <eranian@...gle.com>
Subject: [RFC PATCH v1 2/2] perf stat: Choose the most disaggregate command
 line option

When multiple aggregation options are passed to perf stat the behavior
isn't clear. Consider "perf stat -A --per-socket .." and "perf stat
--per-socket -A ..", the first won't aggregate at all while the second
will do per-socket aggregation, even though the same options were
passed.

Rather than set an enum value, gather the options in a struct and
process them from most to least aggregate. This ensures the least
aggregate option always applies, so no aggregation if "-A" is passed.

Signed-off-by: Ian Rogers <irogers@...gle.com>
---
 tools/perf/builtin-stat.c | 73 ++++++++++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6227b25d1446..603a9684153d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -164,6 +164,35 @@ static struct perf_stat_config stat_config = {
 	.iostat_run		= false,
 };
 
+/* Options set from the command line. */
+struct opt_aggr_mode {
+	bool node, socket, die, cluster, cache, core, thread, no_aggr;
+};
+
+/* Turn command line option into most generic aggregation mode setting. */
+static enum aggr_mode opt_aggr_mode_to_aggr_mode(struct opt_aggr_mode *opt_mode)
+{
+	enum aggr_mode mode = AGGR_GLOBAL;
+
+	if (opt_mode->node)
+		mode = AGGR_NODE;
+	if (opt_mode->socket)
+		mode = AGGR_SOCKET;
+	if (opt_mode->die)
+		mode = AGGR_DIE;
+	if (opt_mode->cluster)
+		mode = AGGR_CLUSTER;
+	if (opt_mode->cache)
+		mode = AGGR_CACHE;
+	if (opt_mode->core)
+		mode = AGGR_CORE;
+	if (opt_mode->thread)
+		mode = AGGR_THREAD;
+	if (opt_mode->no_aggr)
+		mode = AGGR_NONE;
+	return mode;
+}
+
 static void evlist__check_cpu_maps(struct evlist *evlist)
 {
 	struct evsel *evsel, *warned_leader = NULL;
@@ -1096,7 +1125,7 @@ static int parse_cache_level(const struct option *opt,
 			     int unset __maybe_unused)
 {
 	int level;
-	u32 *aggr_mode = (u32 *)opt->value;
+	struct opt_aggr_mode *opt_aggr_mode = (struct opt_aggr_mode *)opt->value;
 	u32 *aggr_level = (u32 *)opt->data;
 
 	/*
@@ -1135,7 +1164,7 @@ static int parse_cache_level(const struct option *opt,
 		return -EINVAL;
 	}
 out:
-	*aggr_mode = AGGR_CACHE;
+	opt_aggr_mode->cache = true;
 	*aggr_level = level;
 	return 0;
 }
@@ -2101,13 +2130,15 @@ static void init_features(struct perf_session *session)
 	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 }
 
-static int __cmd_record(const struct option stat_options[], int argc, const char **argv)
+static int __cmd_record(const struct option stat_options[], struct opt_aggr_mode *opt_mode,
+			int argc, const char **argv)
 {
 	struct perf_session *session;
 	struct perf_data *data = &perf_stat.data;
 
 	argc = parse_options(argc, argv, stat_options, stat_record_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
+	stat_config.aggr_mode = opt_aggr_mode_to_aggr_mode(opt_mode);
 
 	if (output_name)
 		data->path = output_name;
@@ -2350,6 +2381,7 @@ static void setup_system_wide(int forks)
 
 int cmd_stat(int argc, const char **argv)
 {
+	struct opt_aggr_mode opt_mode = {};
 	struct option stat_options[] = {
 		OPT_BOOLEAN('T', "transaction", &transaction_run,
 			"hardware transaction statistics"),
@@ -2393,10 +2425,10 @@ int cmd_stat(int argc, const char **argv)
 				stat__set_big_num),
 		OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
 			"list of cpus to monitor in system-wide"),
-		OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
-			"disable aggregation across CPUs or PMUs", AGGR_NONE),
-		OPT_SET_UINT(0, "no-merge", &stat_config.aggr_mode,
-			"disable aggregation the same as -A or -no-aggr", AGGR_NONE),
+		OPT_BOOLEAN('A', "no-aggr", &opt_mode.no_aggr,
+			"disable aggregation across CPUs or PMUs"),
+		OPT_BOOLEAN(0, "no-merge", &opt_mode.no_aggr,
+			"disable aggregation the same as -A or -no-aggr"),
 		OPT_BOOLEAN(0, "hybrid-merge", &stat_config.hybrid_merge,
 			"Merge identical named hybrid events"),
 		OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
@@ -2424,21 +2456,18 @@ int cmd_stat(int argc, const char **argv)
 			"clear screen in between new interval"),
 		OPT_UINTEGER(0, "timeout", &stat_config.timeout,
 			"stop workload and print counts after a timeout period in ms (>= 10ms)"),
-		OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
-			"aggregate counts per processor socket", AGGR_SOCKET),
-		OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
-			"aggregate counts per processor die", AGGR_DIE),
-		OPT_SET_UINT(0, "per-cluster", &stat_config.aggr_mode,
-			"aggregate counts per processor cluster", AGGR_CLUSTER),
-		OPT_CALLBACK_OPTARG(0, "per-cache", &stat_config.aggr_mode, &stat_config.aggr_level,
+		OPT_BOOLEAN(0, "per-socket", &opt_mode.socket,
+			"aggregate counts per processor socket"),
+		OPT_BOOLEAN(0, "per-die", &opt_mode.die, "aggregate counts per processor die"),
+		OPT_BOOLEAN(0, "per-cluster", &opt_mode.cluster,
+			"aggregate counts per processor cluster"),
+		OPT_CALLBACK_OPTARG(0, "per-cache", &opt_mode, &stat_config.aggr_level,
 				"cache level", "aggregate count at this cache level (Default: LLC)",
 				parse_cache_level),
-		OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
-			"aggregate counts per physical processor core", AGGR_CORE),
-		OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
-			"aggregate counts per thread", AGGR_THREAD),
-		OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
-			"aggregate counts per numa node", AGGR_NODE),
+		OPT_BOOLEAN(0, "per-core", &opt_mode.core,
+			"aggregate counts per physical processor core"),
+		OPT_BOOLEAN(0, "per-thread", &opt_mode.thread, "aggregate counts per thread"),
+		OPT_BOOLEAN(0, "per-node", &opt_mode.node, "aggregate counts per numa node"),
 		OPT_INTEGER('D', "delay", &target.initial_delay,
 			"ms to wait before starting measurement after program start (-1: start with events disabled)"),
 		OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
@@ -2521,6 +2550,8 @@ int cmd_stat(int argc, const char **argv)
 					(const char **) stat_usage,
 					PARSE_OPT_STOP_AT_NON_OPTION);
 
+	stat_config.aggr_mode = opt_aggr_mode_to_aggr_mode(&opt_mode);
+
 	if (stat_config.csv_sep) {
 		stat_config.csv_output = true;
 		if (!strcmp(stat_config.csv_sep, "\\t"))
@@ -2529,7 +2560,7 @@ int cmd_stat(int argc, const char **argv)
 		stat_config.csv_sep = DEFAULT_SEPARATOR;
 
 	if (argc && strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
-		argc = __cmd_record(stat_options, argc, argv);
+		argc = __cmd_record(stat_options, &opt_mode, argc, argv);
 		if (argc < 0)
 			return -1;
 	} else if (argc && strlen(argv[0]) > 2 && strstarts("report", argv[0]))
-- 
2.45.1.288.g0e0cd299f1-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ