lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20211120030053.2649499-1-irogers@google.com>
Date:   Fri, 19 Nov 2021 19:00:53 -0800
From:   Ian Rogers <irogers@...gle.com>
To:     Andi Kleen <ak@...ux.intel.com>, Jiri Olsa <jolsa@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        John Garry <john.garry@...wei.com>,
        Kajol Jain <kjain@...ux.ibm.com>,
        "Paul A . Clarke" <pc@...ibm.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Kan Liang <kan.liang@...ux.intel.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Mark Rutland <mark.rutland@....com>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org
Cc:     eranian@...gle.com, Ian Rogers <irogers@...gle.com>
Subject: [PATCH] perf metric: Reduce multiplexing with duration_time

It is common to use the same counters with and without
duration_time. The ID sharing code treats duration_time as if it
were a hardware event placed in the same group. This causes
unnecessary multiplexing such as in the following example where
l3_cache_access isn't shared:

$ perf stat -M l3 -a sleep 1

 Performance counter stats for 'system wide':

         3,117,007      l3_cache_miss             #    199.5 MB/s  l3_rd_bw
                                                  #     43.6 %  l3_hits
                                                  #     56.4 %  l3_miss               (50.00%)
         5,526,447      l3_cache_access                                               (50.00%)
         5,392,435      l3_cache_access           # 5389191.2 access/s  l3_access_rate  (50.00%)
     1,000,601,901 ns   duration_time

       1.000601901 seconds time elapsed

Fix this by placing duration_time in all groups unless metric
sharing has been disabled on the command line:

$ perf stat -M l3 -a sleep 1

 Performance counter stats for 'system wide':

         3,597,972      l3_cache_miss             #    230.3 MB/s  l3_rd_bw
                                                  #     48.0 %  l3_hits
                                                  #     52.0 %  l3_miss
         6,914,459      l3_cache_access           # 6909935.9 access/s  l3_access_rate
     1,000,654,579 ns   duration_time

       1.000654579 seconds time elapsed

$ perf stat --metric-no-merge -M l3 -a sleep 1

 Performance counter stats for 'system wide':

         3,501,834      l3_cache_miss             #     53.5 %  l3_miss               (24.99%)
         6,548,173      l3_cache_access                                               (24.99%)
         3,417,622      l3_cache_miss             #     45.7 %  l3_hits               (25.04%)
         6,294,062      l3_cache_access                                               (25.04%)
         5,923,238      l3_cache_access           # 5919688.1 access/s  l3_access_rate  (24.99%)
     1,000,599,683 ns   duration_time
         3,607,486      l3_cache_miss             #    230.9 MB/s  l3_rd_bw           (49.97%)

       1.000599683 seconds time elapsed

Signed-off-by: Ian Rogers <irogers@...gle.com>
---
 tools/perf/util/metricgroup.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index fffe02aae3ed..145c59ba2ddc 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -1299,14 +1299,16 @@ static int build_combined_expr_ctx(const struct list_head *metric_list,
 /**
  * parse_ids - Build the event string for the ids and parse them creating an
  *             evlist. The encoded metric_ids are decoded.
+ * @metric_no_merge: is metric sharing explicitly disabled.
  * @fake_pmu: used when testing metrics not supported by the current CPU.
  * @ids: the event identifiers parsed from a metric.
  * @modifier: any modifiers added to the events.
  * @has_constraint: false if events should be placed in a weak group.
  * @out_evlist: the created list of events.
  */
-static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
-		     const char *modifier, bool has_constraint, struct evlist **out_evlist)
+static int parse_ids(bool metric_no_merge, struct perf_pmu *fake_pmu,
+		     struct expr_parse_ctx *ids, const char *modifier,
+		     bool has_constraint, struct evlist **out_evlist)
 {
 	struct parse_events_error parse_error;
 	struct evlist *parsed_evlist;
@@ -1314,12 +1316,19 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids,
 	int ret;
 
 	*out_evlist = NULL;
-	if (hashmap__size(ids->ids) == 0) {
+	if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
 		char *tmp;
 		/*
-		 * No ids/events in the expression parsing context. Events may
-		 * have been removed because of constant evaluation, e.g.:
-		 *  event1 if #smt_on else 0
+		 * We may fail to share events between metrics because
+		 * duration_time isn't present in one metric. For example, a
+		 * ratio of cache misses doesn't need duration_time but the same
+		 * events may be used for a misses per second. Events without
+		 * sharing implies multiplexing, that is best avoided, so place
+		 * duration_time in every group.
+		 *
+		 * Also, there may be no ids/events in the expression parsing
+		 * context because of constant evaluation, e.g.:
+		 *    event1 if #smt_on else 0
 		 * Add a duration_time event to avoid a parse error on an empty
 		 * string.
 		 */
@@ -1387,7 +1396,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 		ret = build_combined_expr_ctx(&metric_list, &combined);
 
 		if (!ret && combined && hashmap__size(combined->ids)) {
-			ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL,
+			ret = parse_ids(metric_no_merge, fake_pmu, combined,
+					/*modifier=*/NULL,
 					/*has_constraint=*/true,
 					&combined_evlist);
 		}
@@ -1435,7 +1445,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str,
 			}
 		}
 		if (!metric_evlist) {
-			ret = parse_ids(fake_pmu, m->pctx, m->modifier,
+			ret = parse_ids(metric_no_merge, fake_pmu, m->pctx, m->modifier,
 					m->has_constraint, &m->evlist);
 			if (ret)
 				goto out;
-- 
2.34.0.rc2.393.gf8c9666880-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ