lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200211232955.GB122432@krava>
Date:   Wed, 12 Feb 2020 00:29:55 +0100
From:   Jiri Olsa <jolsa@...hat.com>
To:     Jin Yao <yao.jin@...ux.intel.com>
Cc:     acme@...nel.org, jolsa@...nel.org, peterz@...radead.org,
        mingo@...hat.com, alexander.shishkin@...ux.intel.com,
        Linux-kernel@...r.kernel.org, ak@...ux.intel.com,
        kan.liang@...el.com, yao.jin@...el.com
Subject: Re: [PATCH v2] perf stat: Show percore counts in per CPU output

On Tue, Feb 11, 2020 at 10:34:34AM +0800, Jin Yao wrote:
> We have supported the event modifier "percore" which sums up the
> event counts for all hardware threads in a core and show the counts
> per core.
> 
> For example,
> 
>  # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
> 
>   Performance counter stats for 'system wide':
> 
>  S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
>  S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
>  S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
>  S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/
> 
> This patch provides a new option "--percore-show-thread". It is
> used with event modifier "percore" together to sum up the event counts
> for all hardware threads in a core but show the counts per hardware
> thread.
> 
> This is essentially a replacement for the any bit (which is gone in
> Icelake). Per core counts are useful for some formulas, e.g. CoreIPC.
> The original percore version was inconvenient to post process. This
> variant matches the output of the any bit.
> 
> With this patch, for example,
> 
>  # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
> 
>   Performance counter stats for 'system wide':
> 
>  CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>  CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>  CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>  CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>  CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>  CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>  CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>  CPU7               1,102,652      cpu/event=cpu-cycles,percore/
> 
> We can see counts are duplicated in CPU pairs
> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
> 
>  v2:
>  ---
>  Add the explanation in change log. This is essentially a replacement
>  for the any bit. No code change.

-I output is still wrong:

	$ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -I 1000
	#           time CPU                    counts unit events
	     1.000251427      1.000251427 CPU0              43,474,700      cpu/event=cpu-cycles,percore/                                   
	     1.000251427      1.000251427 CPU1              66,495,270      cpu/event=cpu-cycles,percore/                                   
	     1.000251427      1.000251427 CPU2              42,342,367      cpu/event=cpu-cycles,percore/                                   
	     1.000251427      1.000251427 CPU3              43,247,607      cpu/event=cpu-cycles,percore/                                   

plus some comments below,
jirka


SNIP

> @@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
>  static void print_counter_aggrdata(struct perf_stat_config *config,
>  				   struct evsel *counter, int s,
>  				   char *prefix, bool metric_only,
> -				   bool *first)
> +				   bool *first, int cpu)
>  {
>  	struct aggr_data ad;
>  	FILE *output = config->output;
> @@ -654,8 +654,15 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
>  		fprintf(output, "%s", prefix);
>  
>  	uval = val * counter->scale;
> -	printout(config, id, nr, counter, uval, prefix,
> -		 run, ena, 1.0, &rt_stat);
> +
> +	if (cpu == -1) {
> +		printout(config, id, nr, counter, uval, prefix,
> +			 run, ena, 1.0, &rt_stat);
> +	} else {
> +		printout(config, cpu, nr, counter, uval, prefix,
> +			 run, ena, 1.0, &rt_stat);
> +	}

this would be shorter instad of above:

printout(config, cpu != -1 ?: id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat);

> +
>  	if (!metric_only)
>  		fputc('\n', output);
>  }
> @@ -687,7 +694,7 @@ static void print_aggr(struct perf_stat_config *config,
>  		evlist__for_each_entry(evlist, counter) {
>  			print_counter_aggrdata(config, counter, s,
>  					       prefix, metric_only,
> -					       &first);
> +					       &first, -1);
>  		}
>  		if (metric_only)
>  			fputc('\n', output);
> @@ -1163,13 +1170,38 @@ static void print_percore(struct perf_stat_config *config,
>  
>  		print_counter_aggrdata(config, counter, s,
>  				       prefix, metric_only,
> -				       &first);
> +				       &first, -1);
>  	}
>  
>  	if (metric_only)
>  		fputc('\n', output);
>  }
>  
> +static void print_percore_thread(struct perf_stat_config *config,
> +				 struct evsel *counter, char *prefix)
> +{
> +	int cpu, s, s2, id;
> +	bool first = true;
> +	FILE *output = config->output;
> +

missing check for 
          if (!(config->aggr_map || config->aggr_get_id))


> +	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
> +		s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);

should you use real cpu valu in here instead of an index? like value of:

	perf_cpu_map__cpu(,evsel__cpus(counter), cpu)

instead of 'cpu' above

> +
> +		for (s = 0; s < config->aggr_map->nr; s++) {
> +			id = config->aggr_map->map[s];
> +			if (s2 == id)
> +				break;
> +		}
> +
> +		if (prefix)
> +			fprintf(output, "%s", prefix);
> +
> +		print_counter_aggrdata(config, counter, s,
> +				       prefix, false,
> +				       &first, cpu);
> +	}
> +}
> +
>  void
>  perf_evlist__print_counters(struct evlist *evlist,
>  			    struct perf_stat_config *config,
> @@ -1222,9 +1254,16 @@ perf_evlist__print_counters(struct evlist *evlist,
>  			print_no_aggr_metric(config, evlist, prefix);
>  		else {
>  			evlist__for_each_entry(evlist, counter) {
> -				if (counter->percore)
> -					print_percore(config, counter, prefix);
> -				else
> +				if (counter->percore) {
> +					if (config->percore_show_thread) {
> +						print_percore_thread(config,
> +								     counter,
> +								     prefix);
> +					} else {
> +						print_percore(config, counter,
> +							      prefix);

please keep the print_percore call in here and check/call
for the percore_show_thread in it

> +					}
> +				} else
>  					print_counter(config, counter, prefix);
>  			}
>  		}
> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
> index fb990efa54a8..b4fdfaa7f2c0 100644
> --- a/tools/perf/util/stat.h
> +++ b/tools/perf/util/stat.h
> @@ -109,6 +109,7 @@ struct perf_stat_config {
>  	bool			 walltime_run_table;
>  	bool			 all_kernel;
>  	bool			 all_user;
> +	bool			 percore_show_thread;
>  	FILE			*output;
>  	unsigned int		 interval;
>  	unsigned int		 timeout;
> -- 
> 2.17.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ