lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <E4C04539-9803-4405-A1EA-5D9517682E08@gmail.com>
Date:   Sun, 17 Oct 2021 18:03:21 -0300
From:   Arnaldo Carvalho de Melo <arnaldo.melo@...il.com>
To:     Jiri Olsa <jolsa@...hat.com>, Namhyung Kim <namhyung@...nel.org>
CC:     Arnaldo Carvalho de Melo <acme@...nel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        LKML <linux-kernel@...r.kernel.org>,
        Andi Kleen <ak@...ux.intel.com>,
        Ian Rogers <irogers@...gle.com>,
        Stephane Eranian <eranian@...gle.com>
Subject: Re: [PATCH] perf evsel: Fix missing exclude_{host,guest} setting



On October 17, 2021 4:05:46 PM GMT-03:00, Jiri Olsa <jolsa@...hat.com> wrote:
>On Fri, Oct 15, 2021 at 10:12:53PM -0700, Namhyung Kim wrote:
>> The current logic for the perf missing feature has a bug that it can
>> wrongly clear some modifiers like G or H.  Actually some PMUs don't
>> support any filtering or exclusion while others do.  But we check it
>> as a global feature.
>> 
>> For example, the cycles event can have 'G' modifier to enable it only
>> in the guest mode on x86.  When you don't run any VMs it'll return 0.
>> 
>>   # perf stat -a -e cycles:G sleep 1
>> 
>>     Performance counter stats for 'system wide':
>> 
>>                     0      cycles:G
>> 
>>           1.000721670 seconds time elapsed
>> 
>> But when it's used with other pmu events that don't support G modifier,
>> it'll be reset and return non-zero values.
>> 
>>   # perf stat -a -e cycles:G,msr/tsc/ sleep 1
>> 
>>     Performance counter stats for 'system wide':
>> 
>>           538,029,960      cycles:G
>>        16,924,010,738      msr/tsc/
>> 
>>           1.001815327 seconds time elapsed
>> 
>> This is because of the missing feature detection logic being global.
>> Add a hashmap to set pmu-specific exclude_host/guest features.
>> 
>> Reported-by: Stephane Eranian <eranian@...gle.com>
>> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
>> ---
>>  tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++------
>>  tools/perf/util/evsel.h |  6 ++++++
>>  2 files changed, 47 insertions(+), 6 deletions(-)
>> 
>> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
>> index dbfeceb2546c..437a28e769fe 100644
>> --- a/tools/perf/util/evsel.c
>> +++ b/tools/perf/util/evsel.c
>> @@ -1434,6 +1434,10 @@ void evsel__delete(struct evsel *evsel)
>>  {
>>  	evsel__exit(evsel);
>>  	free(evsel);
>> +
>> +	/* just free it for the first evsel */
>> +	hashmap__free(perf_missing_features.pmu);
>> +	perf_missing_features.pmu = NULL;
>>  }
>>  
>>  void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
>> @@ -1791,6 +1795,23 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>>  	return 0;
>>  }
>>  
>> +#define PMU_HASH_BITS  4
>> +
>> +static size_t pmu_hash(const void *key, void *ctx __maybe_unused)
>> +{
>> +	const struct evsel *evsel = key;
>> +
>> +	return hash_bits(evsel->core.attr.type, PMU_HASH_BITS);
>> +}
>> +
>> +static bool pmu_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
>> +{
>> +	const struct evsel *a = key1;
>> +	const struct evsel *b = key2;
>> +
>> +	return a->core.attr.type == b->core.attr.type;
>> +}
>> +
>>  static void evsel__disable_missing_features(struct evsel *evsel)
>>  {
>>  	if (perf_missing_features.weight_struct) {
>> @@ -1807,8 +1828,14 @@ static void evsel__disable_missing_features(struct evsel *evsel)
>>  		evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
>>  	if (perf_missing_features.mmap2)
>>  		evsel->core.attr.mmap2 = 0;
>> -	if (perf_missing_features.exclude_guest)
>> -		evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0;
>> +	if (perf_missing_features.exclude_guest) {
>> +		void *pmu;
>
>could you just pass NULL in here instead of NULL?

ENOPARSE 

>
>> +
>> +		if (hashmap__find(perf_missing_features.pmu, evsel, &pmu)) {
>> +			evsel->core.attr.exclude_guest = 0;
>> +			evsel->core.attr.exclude_host = 0;
>> +		}
>> +	}
>>  	if (perf_missing_features.lbr_flags)
>>  		evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
>>  				     PERF_SAMPLE_BRANCH_NO_CYCLES);
>> @@ -1840,6 +1867,9 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
>>  
>>  bool evsel__detect_missing_features(struct evsel *evsel)
>>  {
>> +	if (perf_missing_features.pmu == NULL)
>> +		perf_missing_features.pmu = hashmap__new(pmu_hash, pmu_equal, NULL);
>> +
>>  	/*
>>  	 * Must probe features in the order they were added to the
>>  	 * perf_event_attr interface.
>> @@ -1900,10 +1930,15 @@ bool evsel__detect_missing_features(struct evsel *evsel)
>>  		perf_missing_features.mmap2 = true;
>>  		pr_debug2_peo("switching off mmap2\n");
>>  		return true;
>> -	} else if (!perf_missing_features.exclude_guest &&
>> -		   (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) {
>> -		perf_missing_features.exclude_guest = true;
>> -		pr_debug2_peo("switching off exclude_guest, exclude_host\n");
>> +	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
>> +		   !hashmap__find(perf_missing_features.pmu, evsel, NULL)) {
>> +		struct perf_missing_pmu_features pmu_features = { true };
>
>missing new line after declaration
>
>> +		hashmap__add(perf_missing_features.pmu, evsel, &pmu_features);
>> +
>> +		if (!perf_missing_features.exclude_guest) {
>> +			perf_missing_features.exclude_guest = true;
>> +			pr_debug2_peo("switching off exclude_guest, exclude_host\n");
>> +		}
>>  		return true;
>>  	} else if (!perf_missing_features.sample_id_all) {
>>  		perf_missing_features.sample_id_all = true;
>> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
>> index 1f7edfa8568a..8dd11c8e022d 100644
>> --- a/tools/perf/util/evsel.h
>> +++ b/tools/perf/util/evsel.h
>> @@ -172,6 +172,12 @@ struct perf_missing_features {
>>  	bool data_page_size;
>>  	bool code_page_size;
>>  	bool weight_struct;
>> +
>> +	struct hashmap *pmu;
>> +};
>> +
>> +struct perf_missing_pmu_features {
>> +	bool exclude_guest;
>>  };
>
>hum, is this really needed? I think you could just pass '1' as value,
>because you care only if the item is hashed, right?
>
>in any case the value is the current stack address of the
>  struct perf_missing_pmu_features pmu_features = { true };
>
>so it might as well be just '1' ... I was confused at the beggining
>and looked for the reason of this struct ;-)
>
>we do that already in util/stat.c
>
>jirka
>
>>  
>>  extern struct perf_missing_features perf_missing_features;
>> -- 
>> 2.33.0.1079.g6e70778dc9-goog
>> 
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ