lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Wed, 12 Feb 2020 12:41:28 +0800
From:   "Jin, Yao" <yao.jin@...ux.intel.com>
To:     Jiri Olsa <jolsa@...hat.com>
Cc:     acme@...nel.org, jolsa@...nel.org, peterz@...radead.org,
        mingo@...hat.com, alexander.shishkin@...ux.intel.com,
        Linux-kernel@...r.kernel.org, ak@...ux.intel.com,
        kan.liang@...el.com, yao.jin@...el.com
Subject: Re: [PATCH v2] perf stat: Show percore counts in per CPU output



On 2/12/2020 7:29 AM, Jiri Olsa wrote:
> On Tue, Feb 11, 2020 at 10:34:34AM +0800, Jin Yao wrote:
>> We have supported the event modifier "percore" which sums up the
>> event counts for all hardware threads in a core and show the counts
>> per core.
>>
>> For example,
>>
>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A -- sleep 1
>>
>>    Performance counter stats for 'system wide':
>>
>>   S0-D0-C0                395,072      cpu/event=cpu-cycles,percore/
>>   S0-D0-C1                851,248      cpu/event=cpu-cycles,percore/
>>   S0-D0-C2                954,226      cpu/event=cpu-cycles,percore/
>>   S0-D0-C3              1,233,659      cpu/event=cpu-cycles,percore/
>>
>> This patch provides a new option "--percore-show-thread". It is
>> used with event modifier "percore" together to sum up the event counts
>> for all hardware threads in a core but show the counts per hardware
>> thread.
>>
>> This is essentially a replacement for the any bit (which is gone in
>> Icelake). Per core counts are useful for some formulas, e.g. CoreIPC.
>> The original percore version was inconvenient to post process. This
>> variant matches the output of the any bit.
>>
>> With this patch, for example,
>>
>>   # perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -- sleep 1
>>
>>    Performance counter stats for 'system wide':
>>
>>   CPU0               2,453,061      cpu/event=cpu-cycles,percore/
>>   CPU1               1,823,921      cpu/event=cpu-cycles,percore/
>>   CPU2               1,383,166      cpu/event=cpu-cycles,percore/
>>   CPU3               1,102,652      cpu/event=cpu-cycles,percore/
>>   CPU4               2,453,061      cpu/event=cpu-cycles,percore/
>>   CPU5               1,823,921      cpu/event=cpu-cycles,percore/
>>   CPU6               1,383,166      cpu/event=cpu-cycles,percore/
>>   CPU7               1,102,652      cpu/event=cpu-cycles,percore/
>>
>> We can see counts are duplicated in CPU pairs
>> (CPU0/CPU4, CPU1/CPU5, CPU2/CPU6, CPU3/CPU7).
>>
>>   v2:
>>   ---
>>   Add the explanation in change log. This is essentially a replacement
>>   for the any bit. No code change.
> 
> -I output is still wrong:
> 
> 	$ sudo ./perf stat -e cpu/event=cpu-cycles,percore/ -a -A --percore-show-thread  -I 1000
> 	#           time CPU                    counts unit events
> 	     1.000251427      1.000251427 CPU0              43,474,700      cpu/event=cpu-cycles,percore/
> 	     1.000251427      1.000251427 CPU1              66,495,270      cpu/event=cpu-cycles,percore/
> 	     1.000251427      1.000251427 CPU2              42,342,367      cpu/event=cpu-cycles,percore/
> 	     1.000251427      1.000251427 CPU3              43,247,607      cpu/event=cpu-cycles,percore/
> 

Ah, double time output, I just realize this issue, very sorry about that :(

It doesn't need to print the prefix.

> plus some comments below,
> jirka
> 
> 
> SNIP
> 
>> @@ -628,7 +628,7 @@ static void aggr_cb(struct perf_stat_config *config,
>>   static void print_counter_aggrdata(struct perf_stat_config *config,
>>   				   struct evsel *counter, int s,
>>   				   char *prefix, bool metric_only,
>> -				   bool *first)
>> +				   bool *first, int cpu)
>>   {
>>   	struct aggr_data ad;
>>   	FILE *output = config->output;
>> @@ -654,8 +654,15 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
>>   		fprintf(output, "%s", prefix);
>>   
>>   	uval = val * counter->scale;
>> -	printout(config, id, nr, counter, uval, prefix,
>> -		 run, ena, 1.0, &rt_stat);
>> +
>> +	if (cpu == -1) {
>> +		printout(config, id, nr, counter, uval, prefix,
>> +			 run, ena, 1.0, &rt_stat);
>> +	} else {
>> +		printout(config, cpu, nr, counter, uval, prefix,
>> +			 run, ena, 1.0, &rt_stat);
>> +	}
> 
> this would be shorter instad of above:
> 
> printout(config, cpu != -1 ?: id, nr, counter, uval, prefix, run, ena, 1.0, &rt_stat);
> 

Yes, that would be shorter.

>> +
>>   	if (!metric_only)
>>   		fputc('\n', output);
>>   }
>> @@ -687,7 +694,7 @@ static void print_aggr(struct perf_stat_config *config,
>>   		evlist__for_each_entry(evlist, counter) {
>>   			print_counter_aggrdata(config, counter, s,
>>   					       prefix, metric_only,
>> -					       &first);
>> +					       &first, -1);
>>   		}
>>   		if (metric_only)
>>   			fputc('\n', output);
>> @@ -1163,13 +1170,38 @@ static void print_percore(struct perf_stat_config *config,
>>   
>>   		print_counter_aggrdata(config, counter, s,
>>   				       prefix, metric_only,
>> -				       &first);
>> +				       &first, -1);
>>   	}
>>   
>>   	if (metric_only)
>>   		fputc('\n', output);
>>   }
>>   
>> +static void print_percore_thread(struct perf_stat_config *config,
>> +				 struct evsel *counter, char *prefix)
>> +{
>> +	int cpu, s, s2, id;
>> +	bool first = true;
>> +	FILE *output = config->output;
>> +
> 
> missing check for
>            if (!(config->aggr_map || config->aggr_get_id))
> 
> 

Yes, we should add checking for config->aggr_map and 
config->aggr_get_id, like what the print_aggr does.

>> +	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
>> +		s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
> 
> should you use real cpu valu in here instead of an index? like value of:
> 
> 	perf_cpu_map__cpu(,evsel__cpus(counter), cpu)
> 
> instead of 'cpu' above
> 

Yes, my fault. I should use the cpu value here but not the cpu idx. 
Thanks for pointing this issue out.

>> +
>> +		for (s = 0; s < config->aggr_map->nr; s++) {
>> +			id = config->aggr_map->map[s];
>> +			if (s2 == id)
>> +				break;
>> +		}
>> +
>> +		if (prefix)
>> +			fprintf(output, "%s", prefix);
>> +
>> +		print_counter_aggrdata(config, counter, s,
>> +				       prefix, false,
>> +				       &first, cpu);
>> +	}
>> +}
>> +
>>   void
>>   perf_evlist__print_counters(struct evlist *evlist,
>>   			    struct perf_stat_config *config,
>> @@ -1222,9 +1254,16 @@ perf_evlist__print_counters(struct evlist *evlist,
>>   			print_no_aggr_metric(config, evlist, prefix);
>>   		else {
>>   			evlist__for_each_entry(evlist, counter) {
>> -				if (counter->percore)
>> -					print_percore(config, counter, prefix);
>> -				else
>> +				if (counter->percore) {
>> +					if (config->percore_show_thread) {
>> +						print_percore_thread(config,
>> +								     counter,
>> +								     prefix);
>> +					} else {
>> +						print_percore(config, counter,
>> +							      prefix);
> 
> please keep the print_percore call in here and check/call
> for the percore_show_thread in it
> 

OK, Yes, that's fine. I will keep the print_percore here and call 
percore_show_thread from it.

Thanks
Jin Yao

>> +					}
>> +				} else
>>   					print_counter(config, counter, prefix);
>>   			}
>>   		}
>> diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
>> index fb990efa54a8..b4fdfaa7f2c0 100644
>> --- a/tools/perf/util/stat.h
>> +++ b/tools/perf/util/stat.h
>> @@ -109,6 +109,7 @@ struct perf_stat_config {
>>   	bool			 walltime_run_table;
>>   	bool			 all_kernel;
>>   	bool			 all_user;
>> +	bool			 percore_show_thread;
>>   	FILE			*output;
>>   	unsigned int		 interval;
>>   	unsigned int		 timeout;
>> -- 
>> 2.17.1
>>
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ