[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200211232955.GB122432@krava>
Date: Wed, 12 Feb 2020 00:29:55 +0100
From: Jiri Olsa <jolsa@...hat.com>
To: Jin Yao <yao.jin@...ux.intel.com>
Cc: acme@...nel.org, jolsa@...nel.org, peterz@...radead.org,
mingo@...hat.com, alexander.shishkin@...ux.intel.com,
Linux-kernel@...r.kernel.org, ak@...ux.intel.com,
kan.liang@...el.com, yao.jin@...el.com
Subject: Re: [PATCH v2] perf stat: Show percore counts in per CPU output
On Tue, Feb 11, 2020 at 10:34:34AM +0800, Jin Yao wrote:
> We have supported the event modifier "percore" which sums up the
> event counts for all hardware threads in a core and show the counts
> per core.
>
> For example,
>
> # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
>
> Performance counter stats for 'system wide':
>
> S0-D0-C0 395,072 cpu/event=cpu-cycles,percore/
> S0-D0-C1 851,248 cpu/event=cpu-cycles,percore/
> S0-D0-C2 954,226 cpu/event=cpu-cycles,percore/
> S0-D0-C3 1,233,659 cpu/event=cpu-cycles,percore/
>
> This patch provides a new option "--percore-show-thread". It is
> used with event modifier "percore" together to sum up the event counts
> for all hardware threads in a core but show the counts per hardware
> thread.
>
> This is essentially a replacement for the any bit (which is gone in
> Icelake). Per core counts are useful for some formulas, e.g. CoreIPC.
> The original percore version was inconvenient to post process. This
> variant matches the output of the any bit.
>
> With this patch, for example,
>
> # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -- sleep 1
>
> Performance counter stats for 'system wide':
>
> CPU0 2,453,061 cpu/event=cpu-cycles,percore/
> CPU1 1,823,921 cpu/event=cpu-cycles,percore/
> CPU2 1,383,166 cpu/event=cpu-cycles,percore/
> CPU3 1,102,652 cpu/event=cpu-cycles,percore/
> CPU4 2,453,061 cpu/event=cpu-cycles,percore/
> CPU5 1,823,921 cpu/event=cpu-cycles,percore/
> CPU6 1,383,166 cpu/event=cpu-cycles,percore/
> CPU7 1,102,652 cpu/event=cpu-cycles,percore/
>
> We can see counts are duplicated in CPU pairs
> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>
> v2:
> ---
> Add the explanation in change log. This is essentially a replacement
> for the any bit. No code change.
-I output is still wrong:
$ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread -I 1000
# time CPU counts unit events
1.000251427 1.000251427 CPU0 43,474,700 cpu/event=cpu-cycles,percore/
1.000251427 1.000251427 CPU1 66,495,270 cpu/event=cpu-cycles,percore/
1.000251427 1.000251427 CPU2 42,342,367 cpu/event=cpu-cycles,percore/
1.000251427 1.000251427 CPU3 43,247,607 cpu/event=cpu-cycles,percore/
plus some comments below,
jirka
SNIP
> @@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
> static void print_counter_aggrdata(struct perf_stat_config *config,
> struct evsel *counter, int s,
> char *prefix, bool metric_only,
> - bool *first)
> + bool *first, int cpu)
> {
> struct aggr_data ad;
> FILE *output = config->output;
> @@ -654,8 +654,15 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
> fprintf(output, "%s", prefix);
>
> uval = val * counter->scale;
> - printout(config, id, nr, counter, uval, prefix,
> - run, ena, 1.0, &rt_stat);
> +
> + if (cpu == -1) {
> + printout(config, id, nr, counter, uval, prefix,
> + run, ena, 1.0, &rt_stat);
> + } else {
> + printout(config, cpu, nr, counter, uval, prefix,
> + run, ena, 1.0, &rt_stat);
> + }
this would be shorter instad of above:
printout(config, cpu != -1 ?: id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat);
> +
> if (!metric_only)
> fputc('\n', output);
> }
> @@ -687,7 +694,7 @@ static void print_aggr(struct perf_stat_config *config,
> evlist__for_each_entry(evlist, counter) {
> print_counter_aggrdata(config, counter, s,
> prefix, metric_only,
> - &first);
> + &first, -1);
> }
> if (metric_only)
> fputc('\n', output);
> @@ -1163,13 +1170,38 @@ static void print_percore(struct perf_stat_config *config,
>
> print_counter_aggrdata(config, counter, s,
> prefix, metric_only,
> - &first);
> + &first, -1);
> }
>
> if (metric_only)
> fputc('\n', output);
> }
>
> +static void print_percore_thread(struct perf_stat_config *config,
> + struct evsel *counter, char *prefix)
> +{
> + int cpu, s, s2, id;
> + bool first = true;
> + FILE *output = config->output;
> +
missing check for
if (!(config->aggr_map || config->aggr_get_id))
> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
> + s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
should you use real cpu valu in here instead of an index? like value of:
perf_cpu_map__cpu(,evsel__cpus(counter), cpu)
instead of 'cpu' above
> +
> + for (s = 0; s < config->aggr_map->nr; s++) {
> + id = config->aggr_map->map[s];
> + if (s2 == id)
> + break;
> + }
> +
> + if (prefix)
> + fprintf(output, "%s", prefix);
> +
> + print_counter_aggrdata(config, counter, s,
> + prefix, false,
> + &first, cpu);
> + }
> +}
> +
> void
> perf_evlist__print_counters(struct evlist *evlist,
> struct perf_stat_config *config,
> @@ -1222,9 +1254,16 @@ perf_evlist__print_counters(struct evlist *evlist,
> print_no_aggr_metric(config, evlist, prefix);
> else {
> evlist__for_each_entry(evlist, counter) {
> - if (counter->percore)
> - print_percore(config, counter, prefix);
> - else
> + if (counter->percore) {
> + if (config->percore_show_thread) {
> + print_percore_thread(config,
> + counter,
> + prefix);
> + } else {
> + print_percore(config, counter,
> + prefix);
please keep the print_percore call in here and check/call
for the percore_show_thread in it
> + }
> + } else
> print_counter(config, counter, prefix);
> }
> }
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index fb990efa54a8..b4fdfaa7f2c0 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -109,6 +109,7 @@ struct perf_stat_config {
> bool walltime_run_table;
> bool all_kernel;
> bool all_user;
> + bool percore_show_thread;
> FILE *output;
> unsigned int interval;
> unsigned int timeout;
> --
> 2.17.1
>
Powered by blists - more mailing lists