linux-kernel - Re: [PATCH] perf stat: Support metrics with perf stat --per-thread

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171012113027.GB4872@krava>
Date:   Thu, 12 Oct 2017 13:30:27 +0200
From:   Jiri Olsa <jolsa@...hat.com>
To:     Jin Yao <yao.jin@...ux.intel.com>
Cc:     acme@...nel.org, jolsa@...nel.org, peterz@...radead.org,
        mingo@...hat.com, alexander.shishkin@...ux.intel.com,
        Linux-kernel@...r.kernel.org, ak@...ux.intel.com,
        kan.liang@...el.com, yao.jin@...el.com
Subject: Re: [PATCH] perf stat: Support metrics with perf stat --per-thread

On Thu, Oct 12, 2017 at 05:05:00PM +0800, Jin Yao wrote:

SNIP

> ---
>  tools/perf/util/stat.c | 23 +++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index 35e9848..1164e68 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -314,6 +314,26 @@ static int process_counter_maps(struct perf_stat_config *config,
>  	return 0;
>  }
>  
> +static int process_aggr_thread_counter(struct perf_evsel *counter)
> +{
> +	int nthreads = thread_map__nr(counter->threads);
> +	int ncpus = cpu_map__nr(counter->cpus);
> +	int cpu, thread;
> +	u64 tmp;
> +
> +	for (thread = 0; thread < nthreads; thread++) {
> +		u64 val = 0;
> +
> +		for (cpu = 0; cpu < ncpus; cpu++)
> +			val += perf_counts(counter->counts, cpu, thread)->val;
> +
> +		tmp = val * counter->scale;
> +		perf_stat__update_shadow_stats(counter, &tmp, 0);


also maybe we should move scale mult. into perf_stat__update_shadow_stats,
something like below (on top of my earlier patch)

not much tested so far.. but looks ok ;-)

jirka


---
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9f2f07c0237d..4e109e6ba341 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1267,8 +1267,7 @@ static void aggr_update_shadow(void)
 					continue;
 				val += perf_counts(counter->counts, cpu, 0)->val;
 			}
-			val = val * counter->scale;
-			perf_stat__update_shadow_stats(counter, &val,
+			perf_stat__update_shadow_stats(counter, val,
 						       first_shadow_cpu(counter, id));
 		}
 	}
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index a2c12d1ef32a..e66ac456d103 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -178,58 +178,60 @@ void perf_stat__reset_shadow_stats(void)
  * more semantic information such as miss/hit ratios,
  * instruction rates, etc:
  */
-void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
 				    int cpu)
 {
 	int ctx = evsel_context(counter);
 
+	count *= counter->scale;
+
 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
 	    perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
-		update_stats(&runtime_nsecs_stats[cpu], count[0]);
+		update_stats(&runtime_nsecs_stats[cpu], count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_cycles_stats[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
-		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
-		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_transaction_stats[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, ELISION_START))
-		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_elision_stats[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
-		update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+		update_stats(&runtime_topdown_total_slots[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
-		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
-		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
-		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count);
 	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
-		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
+		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_branches_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_cacherefs_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, SMI_NUM))
-		update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_smi_num_stats[ctx][cpu], count);
 	else if (perf_stat_evsel__is(counter, APERF))
-		update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
+		update_stats(&runtime_aperf_stats[ctx][cpu], count);
 
 	if (counter->collect_stat) {
 		struct saved_value *v = saved_value_lookup(counter, cpu, true);
-		update_stats(&v->stats, count[0]);
+		update_stats(&v->stats, count);
 	}
 }
 
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 6f94f3a2282e..c1cf42d04826 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -259,7 +259,6 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
 	struct perf_counts_values *aggr = &evsel->counts->aggr;
 	static struct perf_counts_values zero;
 	bool skip = false;
-	u64 val;
 
 	if (check_per_pkg(evsel, count, cpu, &skip)) {
 		pr_err("failed to read per-pkg counter\n");
@@ -278,11 +277,9 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
 			perf_evsel__compute_deltas(evsel, cpu, thread, count);
 		perf_counts_values__scale(count, config->scale, NULL);
 		if (config->aggr_mode == AGGR_NONE)
-			perf_stat__update_shadow_stats(evsel, count->values, cpu);
-		if (config->aggr_mode == AGGR_THREAD) {
-			val = count->val * evsel->scale;
-			perf_stat__update_shadow_stats(evsel, &val, 0);
-		}
+			perf_stat__update_shadow_stats(evsel, count->val, cpu);
+		if (config->aggr_mode == AGGR_THREAD)
+			perf_stat__update_shadow_stats(evsel, count->val, 0);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
@@ -325,7 +322,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	struct perf_counts_values *aggr = &counter->counts->aggr;
 	struct perf_stat_evsel *ps = counter->priv;
 	u64 *count = counter->counts->aggr.values;
-	u64 val;
 	int i, ret;
 
 	aggr->val = aggr->ena = aggr->run = 0;
@@ -365,8 +361,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	val = counter->scale * *count;
-	perf_stat__update_shadow_stats(counter, &val, 0);
+	perf_stat__update_shadow_stats(counter, *count, 0);
 
 	return 0;
 }
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 47915df346fb..490b78aa7230 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -82,7 +82,7 @@ typedef void (*new_line_t )(void *ctx);
 
 void perf_stat__init_shadow_stats(void);
 void perf_stat__reset_shadow_stats(void);
-void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
 				    int cpu);
 struct perf_stat_output_ctx {
 	void *ctx;