lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230320151509.1137462-5-james.clark@arm.com>
Date:   Mon, 20 Mar 2023 15:15:08 +0000
From:   James Clark <james.clark@....com>
To:     linux-perf-users@...r.kernel.org, Anshuman.Khandual@....com
Cc:     German Gomez <german.gomez@....com>,
        James Clark <james.clark@....com>,
        Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Mark Rutland <mark.rutland@....com>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Jiri Olsa <jolsa@...nel.org>,
        Namhyung Kim <namhyung@...nel.org>,
        Ian Rogers <irogers@...gle.com>,
        Adrian Hunter <adrian.hunter@...el.com>,
        John Garry <john.g.garry@...cle.com>,
        Will Deacon <will@...nel.org>,
        Mike Leach <mike.leach@...aro.org>,
        Leo Yan <leo.yan@...aro.org>, linux-kernel@...r.kernel.org,
        linux-arm-kernel@...ts.infradead.org
Subject: [PATCH v2 4/4] perf report: Add 'simd' sort field

From: German Gomez <german.gomez@....com>

Add 'simd' sort field to visualize SIMD ops in perf-report.

Rows are labeled with the SIMD isa, and the type of predicate (if any):

  - [p] partial predicate
  - [e] empty predicate (no elements in the vector being used)

Example with Arm SPE and SVE (Scalable Vector Extension):

  #include <arm_sve.h>

  double src[1025], dst[1025];

  int main(void) {
    svfloat64_t vc = svdup_f64(1);
    for(;;)
      for(int i = 0; i < 1025; i += svcntd())
      {
        svbool_t pg = svwhilelt_b64(i, 1025);
        svfloat64_t vsrc = svld1(pg, &src[i]);
        svfloat64_t vdst = svadd_x(pg, vsrc, vc);
        svst1(pg, &dst[i], vdst);
      }
    return 0;
  }

  ... compiled using "gcc-11 -march=armv8-a+sve -O3"

Profiling on a platform that implements FEAT_SVE and FEAT_SPEv1p1:

  $ perf record -e arm_spe_0// -- ./a.out
  $ perf report --itrace=i1i -s overhead,pid,simd,sym

  Overhead      Pid:Command   Simd     Symbol
  ........  ................  .......  ......................

    53.76%    10758:program            [.] main
    46.14%    10758:program   [.] SVE  [.] main
     0.09%    10758:program   [p] SVE  [.] main

The report shows 0.09% of the sampled SVE operations use partial
predicates due to src and dst arrays not being multiples of the vector
register lengths.

Signed-off-by: German Gomez <german.gomez@....com>
Signed-off-by: James Clark <james.clark@....com>
---
 tools/perf/Documentation/perf-report.txt |  1 +
 tools/perf/util/hist.c                   |  1 +
 tools/perf/util/hist.h                   |  1 +
 tools/perf/util/sort.c                   | 47 ++++++++++++++++++++++++
 tools/perf/util/sort.h                   |  2 +
 5 files changed, 52 insertions(+)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index c242e8da6b1a..cfd502f7e6da 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -117,6 +117,7 @@ OPTIONS
 	- addr: (Full) virtual address of the sampled instruction
 	- retire_lat: On X86, this reports pipeline stall of this instruction compared
 	  to the previous instruction in cycles. And currently supported only on X86
+	- simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
 
 	By default, comm, dso and symbol keys are used.
 	(i.e. --sort comm,dso,symbol)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 3670136a0074..0c11f50abfec 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -745,6 +745,7 @@ __hists__add_entry(struct hists *hists,
 		.weight = sample->weight,
 		.ins_lat = sample->ins_lat,
 		.p_stage_cyc = sample->p_stage_cyc,
+		.simd_flags = sample->simd_flags,
 	}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
 
 	if (!hists->has_callchains && he && he->callchain_size != 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 86a677954279..afc9f1c7f4dc 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -81,6 +81,7 @@ enum hist_column {
 	HISTC_ADDR_FROM,
 	HISTC_ADDR_TO,
 	HISTC_ADDR,
+	HISTC_SIMD,
 	HISTC_NR_COLS, /* Last entry */
 };
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 093a0c8b2e3d..e11e68ecf0a2 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -139,6 +139,52 @@ struct sort_entry sort_thread = {
 	.se_width_idx	= HISTC_THREAD,
 };
 
+/* --sort simd */
+
+static int64_t
+sort__simd_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	if (left->simd_flags.arch != right->simd_flags.arch)
+		return (int64_t) left->simd_flags.arch - right->simd_flags.arch;
+
+	return (int64_t) left->simd_flags.pred - right->simd_flags.pred;
+}
+
+static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
+{
+	u64 arch = simd_flags->arch;
+
+	if (arch & SIMD_OP_FLAGS_ARCH_SVE)
+		return "SVE";
+	else
+		return "n/a";
+}
+
+static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
+				     size_t size, unsigned int width __maybe_unused)
+{
+	const char *name;
+
+	if (!he->simd_flags.arch)
+		return repsep_snprintf(bf, size, "");
+
+	name = hist_entry__get_simd_name(&he->simd_flags);
+
+	if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
+		return repsep_snprintf(bf, size, "[e] %s", name);
+	else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
+		return repsep_snprintf(bf, size, "[p] %s", name);
+
+	return repsep_snprintf(bf, size, "[.] %s", name);
+}
+
+struct sort_entry sort_simd = {
+	.se_header	= "Simd   ",
+	.se_cmp		= sort__simd_cmp,
+	.se_snprintf	= hist_entry__simd_snprintf,
+	.se_width_idx	= HISTC_SIMD,
+};
+
 /* --sort comm */
 
 /*
@@ -2142,6 +2188,7 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_ADDR, "addr", sort_addr),
 	DIM(SORT_LOCAL_RETIRE_LAT, "local_retire_lat", sort_local_p_stage_cyc),
 	DIM(SORT_GLOBAL_RETIRE_LAT, "retire_lat", sort_global_p_stage_cyc),
+	DIM(SORT_SIMD, "simd", sort_simd)
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 22f437c3476f..ecfb7f1359d5 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -111,6 +111,7 @@ struct hist_entry {
 	u64			p_stage_cyc;
 	u8			cpumode;
 	u8			depth;
+	struct simd_flags	simd_flags;
 
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
@@ -241,6 +242,7 @@ enum sort_type {
 	SORT_ADDR,
 	SORT_LOCAL_RETIRE_LAT,
 	SORT_GLOBAL_RETIRE_LAT,
+	SORT_SIMD,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ