lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 23 Jul 2017 09:53:00 +0900
From:   Namhyung Kim <namhyung@...nel.org>
To:     Jiri Olsa <jolsa@...nel.org>
Cc:     Arnaldo Carvalho de Melo <acme@...nel.org>,
        lkml <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <a.p.zijlstra@...llo.nl>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        David Ahern <dsahern@...il.com>,
        Andi Kleen <andi@...stfloor.org>, kernel-team@....com
Subject: Re: [PATCH 4/4] perf stat: Use group read for event groups

On Fri, Jul 21, 2017 at 02:12:12PM +0200, Jiri Olsa wrote:
> Make perf stat use  group read if there  are groups
> defined. The group read will get the values for all
> member of groups within a single syscall instead of
> calling read syscall for every event.
> 
> We can see considerable less amount of kernel cycles
> spent on single group read, than reading each event
> separately, like for following perf stat command:
> 
>   # perf stat -e {cycles,instructions} -I 10 -a sleep 1
> 
> Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"
> 
> Before:
> 
>         24,325,676      cycles:u
>        297,040,775      cycles:k
> 
>        1.038554134 seconds time elapsed
> 
> After:
>         25,034,418      cycles:u
>        158,256,395      cycles:k
> 
>        1.036864497 seconds time elapsed
> 
> The perf_evsel__open fallback changes contributed by Andi Kleen.
> 
> Link: http://lkml.kernel.org/n/tip-b6g8qarwvptr81cqdtfst59u@git.kernel.org
> Signed-off-by: Jiri Olsa <jolsa@...nel.org>
> ---
>  tools/perf/builtin-stat.c | 30 +++++++++++++++++++++++++++---
>  tools/perf/util/counts.h  |  1 +
>  tools/perf/util/evsel.c   | 10 ++++++++++
>  3 files changed, 38 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 48ac53b199fc..866da7aa54bf 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -213,10 +213,20 @@ static void perf_stat__reset_stats(void)
>  static int create_perf_stat_counter(struct perf_evsel *evsel)
>  {
>  	struct perf_event_attr *attr = &evsel->attr;
> +	struct perf_evsel *leader = evsel->leader;
>  
> -	if (stat_config.scale)
> +	if (stat_config.scale) {
>  		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
>  				    PERF_FORMAT_TOTAL_TIME_RUNNING;
> +	}
> +
> +	/*
> +	 * The event is part of non trivial group, let's enable
> +	 * the group read (for leader) and ID retrieval for all
> +	 * members.
> +	 */
> +	if (leader->nr_members > 1)
> +		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;

I just wonder ID is really necessary.  Doesn't it have same order we
can traverse with the for_each_group_member()?

Thanks,
Namhyung


>  
>  	attr->inherit = !no_inherit;
>  
> @@ -333,13 +343,21 @@ static int read_counter(struct perf_evsel *counter)
>  			struct perf_counts_values *count;
>  
>  			count = perf_counts(counter->counts, cpu, thread);
> -			if (perf_evsel__read(counter, cpu, thread, count)) {
> +
> +			/*
> +			 * The leader's group read loads data into its group members
> +			 * (via perf_evsel__read_counter) and sets threir count->loaded.
> +			 */
> +			if (!count->loaded &&
> +			    perf_evsel__read_counter(counter, cpu, thread)) {
>  				counter->counts->scaled = -1;
>  				perf_counts(counter->counts, cpu, thread)->ena = 0;
>  				perf_counts(counter->counts, cpu, thread)->run = 0;
>  				return -1;
>  			}
>  
> +			count->loaded = false;
> +
>  			if (STAT_RECORD) {
>  				if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
>  					pr_err("failed to write stat event\n");
> @@ -559,6 +577,11 @@ static int store_counter_ids(struct perf_evsel *counter)
>  	return __store_counter_ids(counter, cpus, threads);
>  }
>  
> +static bool perf_evsel__should_store_id(struct perf_evsel *counter)
> +{
> +	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
> +}
> +
>  static int __run_perf_stat(int argc, const char **argv)
>  {
>  	int interval = stat_config.interval;
> @@ -631,7 +654,8 @@ static int __run_perf_stat(int argc, const char **argv)
>  		if (l > unit_width)
>  			unit_width = l;
>  
> -		if (STAT_RECORD && store_counter_ids(counter))
> +		if (perf_evsel__should_store_id(counter) &&
> +		    store_counter_ids(counter))
>  			return -1;
>  	}
>  
> diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
> index 34d8baaf558a..cb45a6aecf9d 100644
> --- a/tools/perf/util/counts.h
> +++ b/tools/perf/util/counts.h
> @@ -12,6 +12,7 @@ struct perf_counts_values {
>  		};
>  		u64 values[3];
>  	};
> +	bool	loaded;
>  };
>  
>  struct perf_counts {
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 89aecf3a35c7..3735c9e0080d 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -49,6 +49,7 @@ static struct {
>  	bool clockid_wrong;
>  	bool lbr_flags;
>  	bool write_backward;
> +	bool group_read;
>  } perf_missing_features;
>  
>  static clockid_t clockid;
> @@ -1321,6 +1322,7 @@ perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
>  	count->val    = val;
>  	count->ena    = ena;
>  	count->run    = run;
> +	count->loaded = true;
>  }
>  
>  static int
> @@ -1677,6 +1679,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>  	if (perf_missing_features.lbr_flags)
>  		evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
>  				     PERF_SAMPLE_BRANCH_NO_CYCLES);
> +	if (perf_missing_features.group_read && evsel->attr.inherit)
> +		evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
>  retry_sample_id:
>  	if (perf_missing_features.sample_id_all)
>  		evsel->attr.sample_id_all = 0;
> @@ -1832,6 +1836,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
>  		perf_missing_features.lbr_flags = true;
>  		pr_debug2("switching off branch sample type no (cycles/flags)\n");
>  		goto fallback_missing_features;
> +	} else if (!perf_missing_features.group_read &&
> +		    evsel->attr.inherit &&
> +		   (evsel->attr.read_format & PERF_FORMAT_GROUP)) {
> +		perf_missing_features.group_read = true;
> +		pr_debug2("switching off group read\n");
> +		goto fallback_missing_features;
>  	}
>  out_close:
>  	do {
> -- 
> 2.9.4
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ