linux-kernel - [PATCH 30/30] perf stat: Introduce --per-task option

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1434269985-521-31-git-send-email-jolsa@kernel.org>
Date:	Sun, 14 Jun 2015 10:19:45 +0200
From:	Jiri Olsa <jolsa@...nel.org>
To:	Arnaldo Carvalho de Melo <acme@...nel.org>
Cc:	lkml <linux-kernel@...r.kernel.org>,
	Adrian Hunter <adrian.hunter@...el.com>,
	Andi Kleen <ak@...ux.intel.com>,
	David Ahern <dsahern@...il.com>,
	Ingo Molnar <mingo@...nel.org>,
	Namhyung Kim <namhyung@...nel.org>,
	Peter Zijlstra <a.p.zijlstra@...llo.nl>,
	Stephane Eranian <eranian@...gle.com>
Subject: [PATCH 30/30] perf stat: Introduce --per-task option

Currently all the -p option PID arguments tasks values
get aggregated and printed as single values.

Adding --per-tasks option to print values per task.

  $ perf stat  -e cycles,instructions --per-task -p 25388,25442 -a
  ^C
   Performance counter stats for process id '25388,25442':

  cat-25388               100,122      cycles
  vim-25442             4,167,876      cycles
  cat-25388                20,080      instructions
  vim-25442             3,232,735      instructions

         6.057130572 seconds time elapsed

Also works under interval mode:

  $ perf stat  -e cycles,instructions --per-task -p 25388,25442 -a -I 1000
  #           time task                      counts unit events
       1.000190002 cat-25388                54,196      cycles
       1.000190002 vim-25442                     0      cycles
       1.000190002 cat-25388                11,660      instructions
       1.000190002 vim-25442                     0      instructions
       2.000742579 cat-25388                55,247      cycles
       2.000742579 vim-25442             1,752,951      cycles
       2.000742579 cat-25388                11,872      instructions
       2.000742579 vim-25442             1,238,512      instructions
       3.001057871 cat-25388                     0      cycles
       3.001057871 vim-25442                     0      cycles
       3.001057871 cat-25388                     0      instructions
       3.001057871 vim-25442                     0      instructions
  ^C     3.242519732 cat-25388                     0      cycles
       3.242519732 vim-25442                     0      cycles
       3.242519732 cat-25388                     0      instructions
       3.242519732 vim-25442                     0      instructions

It works only with -t and -p options, otherwise following
error is printed:

  $ perf stat  -e cycles --per-task  -I 1000 ls
  The --per-task option is only available when monitoring tasks via -p -t options.
      -p, --pid <pid>       stat events on existing process id
      -t, --tid <tid>       stat events on existing thread id

Link: http://lkml.kernel.org/n/tip-0v0ixd9k7o9z1u8hqngm1coe@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@...nel.org>
---
 tools/perf/Documentation/perf-stat.txt |  3 ++
 tools/perf/builtin-stat.c              | 67 +++++++++++++++++++++++++++++++++-
 tools/perf/util/stat.h                 |  1 +
 3 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d83e7d..b83cc5bbfa9a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,9 @@ is a useful mode to detect imbalance between physical cores.  To enable this mod
 use --per-core in addition to -a. (system-wide).  The output includes the
 core number and the number of online logical processors on that physical processor.
 
+--per-task::
+Aggregate counts per monitored threads (-t option) or processes (-p option).
+
 -D msecs::
 --delay msecs::
 After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cefc905343f7..c202c5d629bf 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -237,6 +237,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
 		count = &zero;
 
 	switch (aggr_mode) {
+	case AGGR_TASK:
 	case AGGR_CORE:
 	case AGGR_SOCKET:
 	case AGGR_NONE:
@@ -608,6 +609,13 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[id], csv_sep);
 		break;
+	case AGGR_TASK:
+		fprintf(output, "%s-%*d%s",
+			thread_map__comm(evsel->threads, id),
+			csv_output ? 0 : -8,
+			thread_map__pid(evsel->threads, id),
+			csv_sep);
+		break;
 	case AGGR_GLOBAL:
 	default:
 		break;
@@ -756,6 +764,40 @@ static void print_aggr(char *prefix)
 	}
 }
 
+static void print_aggr_task(struct perf_evsel *counter, char *prefix)
+{
+	int nthreads = thread_map__nr(counter->threads);
+	int ncpus = cpu_map__nr(counter->cpus);
+	int cpu, thread;
+	double uval;
+
+	for (thread = 0; thread < nthreads; thread++) {
+		u64 ena = 0, run = 0, val = 0;
+
+		for (cpu = 0; cpu < ncpus; cpu++) {
+			val += perf_counts(counter->counts, cpu, thread)->val;
+			ena += perf_counts(counter->counts, cpu, thread)->ena;
+			run += perf_counts(counter->counts, cpu, thread)->run;
+		}
+
+		if (prefix)
+			fprintf(output, "%s", prefix);
+
+		uval = val * counter->scale;
+
+		if (nsec_counter(counter))
+			nsec_printout(thread, 0, counter, uval);
+		else
+			abs_printout(thread, 0, counter, uval);
+
+		if (!csv_output)
+			print_noise(counter, 1.0);
+
+		print_running(run, ena);
+		fputc('\n', output);
+	}
+}
+
 /*
  * Print out the results of a single counter:
  * aggregated counts in system-wide mode
@@ -882,6 +924,9 @@ static void print_interval(char *prefix, struct timespec *ts)
 		case AGGR_NONE:
 			fprintf(output, "#           time CPU                counts %*s events\n", unit_width, "unit");
 			break;
+		case AGGR_TASK:
+			fprintf(output, "#           time task                      counts %*s events\n", unit_width, "unit");
+			break;
 		case AGGR_GLOBAL:
 		default:
 			fprintf(output, "#           time             counts %*s events\n", unit_width, "unit");
@@ -950,6 +995,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	case AGGR_SOCKET:
 		print_aggr(prefix);
 		break;
+	case AGGR_TASK:
+		evlist__for_each(evsel_list, counter)
+			print_aggr_task(counter, prefix);
+		break;
 	case AGGR_GLOBAL:
 		evlist__for_each(evsel_list, counter)
 			print_counter_aggr(counter, prefix);
@@ -1037,6 +1086,7 @@ static int perf_stat_init_aggr_mode(void)
 		break;
 	case AGGR_NONE:
 	case AGGR_GLOBAL:
+	case AGGR_TASK:
 	default:
 		break;
 	}
@@ -1261,6 +1311,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "aggregate counts per processor socket", AGGR_SOCKET),
 	OPT_SET_UINT(0, "per-core", &aggr_mode,
 		     "aggregate counts per physical processor core", AGGR_CORE),
+	OPT_SET_UINT(0, "per-task", &aggr_mode,
+		     "aggregate counts per task", AGGR_TASK),
 	OPT_UINTEGER('D', "delay", &initial_delay,
 		     "ms to wait before starting measurement after program start"),
 	OPT_END()
@@ -1352,8 +1404,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		run_count = 1;
 	}
 
-	/* no_aggr, cgroup are for system-wide only */
-	if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
+	if ((aggr_mode == AGGR_TASK) && !target__has_task(&target)) {
+		fprintf(stderr, "The --per-task option is only available "
+			"when monitoring tasks via -p -t options.\n");
+		parse_options_usage(NULL, options, "p", 1);
+		parse_options_usage(NULL, options, "t", 1);
+		goto out;
+	}
+
+	/*
+	 * no_aggr, cgroup are for system-wide only
+	 * --per-task is aggregated per task, we dont mix it with cpu mode
+	 */
+	if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_TASK) || nr_cgroups) &&
 	    !target__has_cpu(&target)) {
 		fprintf(stderr, "both cgroup and no-aggregation "
 			"modes only available in system-wide mode\n");
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 870256735a77..deb9a0faccf7 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -30,6 +30,7 @@ enum aggr_mode {
 	AGGR_GLOBAL,
 	AGGR_SOCKET,
 	AGGR_CORE,
+	AGGR_TASK,
 };
 
 struct perf_counts_values {
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/