linux-kernel - [PATCH 12/12] perf/stat: revamp read error handling, snapshot and per

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1483739814-23000-13-git-send-email-vikas.shivappa@linux.intel.com>
Date:   Fri,  6 Jan 2017 13:56:54 -0800
From:   Vikas Shivappa <vikas.shivappa@...ux.intel.com>
To:     vikas.shivappa@...el.com, vikas.shivappa@...ux.intel.com
Cc:     linux-kernel@...r.kernel.org, x86@...nel.org, hpa@...or.com,
        tglx@...utronix.de, mingo@...nel.org, peterz@...radead.org,
        ravi.v.shankar@...el.com, tony.luck@...el.com,
        fenghua.yu@...el.com, andi.kleen@...el.com, h.peter.anvin@...el.com
Subject: [PATCH 12/12] perf/stat: revamp read error handling, snapshot and per_pkg events

From: David Carrillo-Cisneros <davidcc@...gle.com>

A package wide event can return a valid read even if it has not run in a
specific cpu, this does not fit well with the assumption that run == 0
is equivalent to a <not counted>.

To fix the problem, this patch defines special error values for val,
run and ena (~0ULL), and use them to signal read errors, allowing run == 0
to be a valid value for package events. A new value, (NA), is output on
read error and when event has not been enabled (timed enabled == 0).

Finally, this patch revamps calculation of deltas and scaling for snapshot
events, removing the calculation of deltas for time running and enabled in
snapshot event, as should be.

Tests: After this patch, the user mode can see the package llc_occupancy
count when user calls per stat -C <cpux> -G cgroup_y and the systemwide
count when run with -a option for the cgroup

Reviewed-by: Stephane Eranian <eranian@...gle.com>
Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
Signed-off-by: Vikas Shivappa <vikas.shivappa@...ux.intel.com>
---
 tools/perf/builtin-stat.c | 36 +++++++++++++++++++++++-----------
 tools/perf/util/counts.h  | 19 ++++++++++++++++++
 tools/perf/util/evsel.c   | 49 ++++++++++++++++++++++++++++++++++++-----------
 tools/perf/util/evsel.h   |  8 ++++++--
 tools/perf/util/stat.c    | 35 +++++++++++----------------------
 5 files changed, 99 insertions(+), 48 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9f109b9..69dd3a4 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -311,10 +311,8 @@ static int read_counter(struct perf_evsel *counter)
 
 			count = perf_counts(counter->counts, cpu, thread);
 			if (perf_evsel__read(counter, cpu, thread, count)) {
-				counter->counts->scaled = -1;
-				perf_counts(counter->counts, cpu, thread)->ena = 0;
-				perf_counts(counter->counts, cpu, thread)->run = 0;
-				return -1;
+				/* do not write stat for failed reads. */
+				continue;
 			}
 
 			if (STAT_RECORD) {
@@ -725,12 +723,16 @@ static int run_perf_stat(int argc, const char **argv)
 
 static void print_running(u64 run, u64 ena)
 {
+	bool is_na = run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || !ena;
+
 	if (csv_output) {
-		fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
-					csv_sep,
-					run,
-					csv_sep,
-					ena ? 100.0 * run / ena : 100.0);
+		if (is_na)
+			fprintf(stat_config.output, "%sNA%sNA", csv_sep, csv_sep);
+		else
+			fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
+				csv_sep, run, csv_sep, 100.0 * run / ena);
+	} else if (is_na) {
+		fprintf(stat_config.output, "  (NA)");
 	} else if (run != ena) {
 		fprintf(stat_config.output, "  (%.2f%%)", 100.0 * run / ena);
 	}
@@ -1103,7 +1105,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval,
 		if (counter->cgrp)
 			os.nfields++;
 	}
-	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+	if (run == PERF_COUNTS_NA || ena == PERF_COUNTS_NA || counter->counts->scaled == -1) {
 		if (metric_only) {
 			pm(&os, NULL, "", "", 0);
 			return;
@@ -1209,12 +1211,17 @@ static void print_aggr(char *prefix)
 		id = aggr_map->map[s];
 		first = true;
 		evlist__for_each_entry(evsel_list, counter) {
+			bool all_nan = true;
 			val = ena = run = 0;
 			nr = 0;
 			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 				s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
 				if (s2 != id)
 					continue;
+				/* skip NA reads. */
+				if (perf_counts_values__is_na(perf_counts(counter->counts, cpu, 0)))
+					continue;
+				all_nan = false;
 				val += perf_counts(counter->counts, cpu, 0)->val;
 				ena += perf_counts(counter->counts, cpu, 0)->ena;
 				run += perf_counts(counter->counts, cpu, 0)->run;
@@ -1228,6 +1235,10 @@ static void print_aggr(char *prefix)
 				fprintf(output, "%s", prefix);
 
 			uval = val * counter->scale;
+			if (all_nan) {
+				run = PERF_COUNTS_NA;
+				ena = PERF_COUNTS_NA;
+			}
 			printout(id, nr, counter, uval, prefix, run, ena, 1.0);
 			if (!metric_only)
 				fputc('\n', output);
@@ -1306,7 +1317,10 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 		if (prefix)
 			fprintf(output, "%s", prefix);
 
-		uval = val * counter->scale;
+		if (val != PERF_COUNTS_NA)
+			uval = val * counter->scale;
+		else
+			uval = NAN;
 		printout(cpu, 0, counter, uval, prefix, run, ena, 1.0);
 
 		fputc('\n', output);
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 34d8baa..b65e97a 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -3,6 +3,9 @@
 
 #include "xyarray.h"
 
+/* Not Available (NA) value. Any operation with a NA equals a NA. */
+#define PERF_COUNTS_NA ((u64)~0ULL)
+
 struct perf_counts_values {
 	union {
 		struct {
@@ -14,6 +17,22 @@ struct perf_counts_values {
 	};
 };
 
+static inline void
+perf_counts_values__make_na(struct perf_counts_values *values)
+{
+	values->val = PERF_COUNTS_NA;
+	values->ena = PERF_COUNTS_NA;
+	values->run = PERF_COUNTS_NA;
+}
+
+static inline bool
+perf_counts_values__is_na(struct perf_counts_values *values)
+{
+	return values->val == PERF_COUNTS_NA ||
+	       values->ena == PERF_COUNTS_NA ||
+	       values->run == PERF_COUNTS_NA;
+}
+
 struct perf_counts {
 	s8			  scaled;
 	struct perf_counts_values aggr;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7aa10e3..bbcdcd7 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1191,6 +1191,9 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
 	if (!evsel->prev_raw_counts)
 		return;
 
+	if (perf_counts_values__is_na(count))
+		return;
+
 	if (cpu == -1) {
 		tmp = evsel->prev_raw_counts->aggr;
 		evsel->prev_raw_counts->aggr = *count;
@@ -1199,26 +1202,43 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
 		*perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
 	}
 
-	count->val = count->val - tmp.val;
+	/* Snapshot events do not calculate deltas for count values. */
+	if (!evsel->snapshot)
+		count->val = count->val - tmp.val;
 	count->ena = count->ena - tmp.ena;
 	count->run = count->run - tmp.run;
 }
 
 void perf_counts_values__scale(struct perf_counts_values *count,
-			       bool scale, s8 *pscaled)
+			       bool scale, bool per_pkg, bool snapshot, s8 *pscaled)
 {
 	s8 scaled = 0;
 
+	if (perf_counts_values__is_na(count)) {
+		if (pscaled)
+			*pscaled = -1;
+		return;
+	}
+
 	if (scale) {
-		if (count->run == 0) {
+		/*
+		 * per-pkg events can have run == 0 in a CPU and still be
+		 * valid.
+		 */
+		if (count->run == 0 && !per_pkg) {
 			scaled = -1;
 			count->val = 0;
 		} else if (count->run < count->ena) {
 			scaled = 1;
-			count->val = (u64)((double) count->val * count->ena / count->run + 0.5);
+			/* Snapshot events do not scale counts values. */
+			if (!snapshot && count->run)
+				count->val = (u64)((double) count->val * count->ena /
+					     count->run + 0.5);
 		}
-	} else
-		count->ena = count->run = 0;
+
+	} else {
+		count->run = count->ena;
+	}
 
 	if (pscaled)
 		*pscaled = scaled;
@@ -1232,8 +1252,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
 	if (FD(evsel, cpu, thread) < 0)
 		return -EINVAL;
 
-	if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0)
+	if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) <= 0) {
+		perf_counts_values__make_na(count);
 		return -errno;
+	}
 
 	return 0;
 }
@@ -1241,6 +1263,7 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 			      int cpu, int thread, bool scale)
 {
+	int ret = 0;
 	struct perf_counts_values count;
 	size_t nv = scale ? 3 : 1;
 
@@ -1250,13 +1273,17 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 	if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
 		return -ENOMEM;
 
-	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
-		return -errno;
+	if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) {
+		perf_counts_values__make_na(&count);
+		ret = -errno;
+		goto exit;
+	}
 
 	perf_evsel__compute_deltas(evsel, cpu, thread, &count);
-	perf_counts_values__scale(&count, scale, NULL);
+	perf_counts_values__scale(&count, scale, evsel->per_pkg, evsel->snapshot, NULL);
+exit:
 	*perf_counts(evsel->counts, cpu, thread) = count;
-	return 0;
+	return ret;
 }
 
 static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 06ef6f2..9788ca1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -82,6 +82,10 @@ struct perf_evsel_config_term {
  * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
  *          PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
  *          is used there is an id sample appended to non-sample events
+ * @snapshot: an event that whose raw value cannot be extrapolated based on
+ *	    the ratio of running/enabled time.
+ * @per_pkg: an event that runs package wide. All cores in same package will
+ *	    read the same value, even if running time == 0.
  * @priv:   And what is in its containing unnamed union are tool specific
  */
 struct perf_evsel {
@@ -153,8 +157,8 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
 	return perf_evsel__cpus(evsel)->nr;
 }
 
-void perf_counts_values__scale(struct perf_counts_values *count,
-			       bool scale, s8 *pscaled);
+void perf_counts_values__scale(struct perf_counts_values *count, bool scale,
+			       bool per_pkg, bool snapshot, s8 *pscaled);
 
 void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
 				struct perf_counts_values *count);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 39345c2d..514b953 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -202,7 +202,7 @@ static void zero_per_pkg(struct perf_evsel *counter)
 }
 
 static int check_per_pkg(struct perf_evsel *counter,
-			 struct perf_counts_values *vals, int cpu, bool *skip)
+			 int cpu, bool *skip)
 {
 	unsigned long *mask = counter->per_pkg_mask;
 	struct cpu_map *cpus = perf_evsel__cpus(counter);
@@ -224,17 +224,6 @@ static int check_per_pkg(struct perf_evsel *counter,
 		counter->per_pkg_mask = mask;
 	}
 
-	/*
-	 * we do not consider an event that has not run as a good
-	 * instance to mark a package as used (skip=1). Otherwise
-	 * we may run into a situation where the first CPU in a package
-	 * is not running anything, yet the second is, and this function
-	 * would mark the package as used after the first CPU and would
-	 * not read the values from the second CPU.
-	 */
-	if (!(vals->run && vals->ena))
-		return 0;
-
 	s = cpu_map__get_socket(cpus, cpu, NULL);
 	if (s < 0)
 		return -1;
@@ -249,30 +238,27 @@ static int check_per_pkg(struct perf_evsel *counter,
 		       struct perf_counts_values *count)
 {
 	struct perf_counts_values *aggr = &evsel->counts->aggr;
-	static struct perf_counts_values zero;
 	bool skip = false;
 
-	if (check_per_pkg(evsel, count, cpu, &skip)) {
+	if (check_per_pkg(evsel, cpu, &skip)) {
 		pr_err("failed to read per-pkg counter\n");
 		return -1;
 	}
 
-	if (skip)
-		count = &zero;
-
 	switch (config->aggr_mode) {
 	case AGGR_THREAD:
 	case AGGR_CORE:
 	case AGGR_SOCKET:
 	case AGGR_NONE:
-		if (!evsel->snapshot)
-			perf_evsel__compute_deltas(evsel, cpu, thread, count);
-		perf_counts_values__scale(count, config->scale, NULL);
+		perf_evsel__compute_deltas(evsel, cpu, thread, count);
+		perf_counts_values__scale(count, config->scale,
+					  evsel->per_pkg, evsel->snapshot, NULL);
 		if (config->aggr_mode == AGGR_NONE)
 			perf_stat__update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
-		aggr->val += count->val;
+		if (!skip)
+			aggr->val += count->val;
 		if (config->scale) {
 			aggr->ena += count->ena;
 			aggr->run += count->run;
@@ -337,9 +323,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	if (config->aggr_mode != AGGR_GLOBAL)
 		return 0;
 
-	if (!counter->snapshot)
-		perf_evsel__compute_deltas(counter, -1, -1, aggr);
-	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+	perf_evsel__compute_deltas(counter, -1, -1, aggr);
+	perf_counts_values__scale(aggr, config->scale,
+				  counter->per_pkg, counter->snapshot,
+				  &counter->counts->scaled);
 
 	for (i = 0; i < 3; i++)
 		update_stats(&ps->res_stats[i], count[i]);
-- 
1.9.1