lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YoJP+JqI2M1i647y@kernel.org>
Date:   Mon, 16 May 2022 10:22:00 -0300
From:   Arnaldo Carvalho de Melo <acme@...nel.org>
To:     Ravi Bangoria <ravi.bangoria@....com>
Cc:     peterz@...radead.org, rrichter@....com, mingo@...hat.com,
        mark.rutland@....com, jolsa@...nel.org, namhyung@...nel.org,
        tglx@...utronix.de, bp@...en8.de, irogers@...gle.com,
        yao.jin@...ux.intel.com, james.clark@....com, leo.yan@...aro.org,
        kan.liang@...ux.intel.com, ak@...ux.intel.com, eranian@...gle.com,
        like.xu.linux@...il.com, x86@...nel.org,
        linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
        sandipan.das@....com, ananth.narayan@....com, kim.phillips@....com,
        santosh.shukla@....com
Subject: Re: [PATCH v2 5/8] perf record ibs: Warn about sampling period skew

Em Mon, May 09, 2022 at 10:19:11AM +0530, Ravi Bangoria escreveu:
> Samples without an L3 miss are discarded and counter is reset with
> random value (between 1-15 for fetch pmu and 1-127 for op pmu) when
> IBS L3 miss filtering is enabled. This causes a sampling period skew
> but there is no way to reconstruct aggregated sampling period. So
> print a warning at perf record if user sets l3missonly=1.
> 
> Ex:
>   # perf record -c 10000 -C 0 -e ibs_op/l3missonly=1/
>   WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled
>   and tagged operation does not cause L3 Miss. This causes sampling period skew.
> 
> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
> ---
>  tools/perf/arch/x86/util/evsel.c | 34 ++++++++++++++++++++++++++++++++
>  tools/perf/util/evsel.c          |  7 +++++++
>  tools/perf/util/evsel.h          |  1 +
>  3 files changed, 42 insertions(+)
> 
> diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
> index ac2899a25b7a..6399faa70a88 100644
> --- a/tools/perf/arch/x86/util/evsel.c
> +++ b/tools/perf/arch/x86/util/evsel.c
> @@ -4,6 +4,8 @@
>  #include "util/evsel.h"
>  #include "util/env.h"
>  #include "linux/string.h"
> +#include "util/pmu.h"
> +#include "util/debug.h"
>  
>  void arch_evsel__set_sample_weight(struct evsel *evsel)
>  {
> @@ -29,3 +31,35 @@ void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
>  
>  	free(env.cpuid);
>  }
> +
> +static void ibs_l3miss_warn(void)
> +{
> +	pr_warning(
> +"WARNING: Hw internally resets sampling period when L3 Miss Filtering is enabled\n"
> +"and tagged operation does not cause L3 Miss. This causes sampling period skew.\n");
> +}
> +
> +void arch_evsel__warn_ambiguity(struct evsel *evsel, struct perf_event_attr *attr)
> +{
> +	struct perf_env *env = evsel__env(evsel);
> +	struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel);
> +	struct perf_pmu *ibs_fetch_pmu = perf_pmu__find("ibs_fetch");
> +	struct perf_pmu *ibs_op_pmu = perf_pmu__find("ibs_op");
> +	static int warned_once;

Please check first if the warning was emitted (warned_once is true)
before calling all the find routines above.

> +	if (warned_once || !perf_env__cpuid(env) || !env->cpuid ||
> +	    !strstarts(env->cpuid, "AuthenticAMD") || !evsel_pmu)
> +		return;
> +
> +	if (ibs_fetch_pmu && ibs_fetch_pmu->type == evsel_pmu->type) {
> +		if (attr->config & (1ULL << 59)) {
> +			ibs_l3miss_warn();
> +			warned_once = 1;
> +		}
> +	} else if (ibs_op_pmu && ibs_op_pmu->type == evsel_pmu->type) {
> +		if (attr->config & (1ULL << 16)) {
> +			ibs_l3miss_warn();
> +			warned_once = 1;
> +		}
> +	}
> +}
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 2a1729e7aee4..4f8b72d4a521 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1064,6 +1064,11 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un
>  {
>  }
>  
> +void __weak arch_evsel__warn_ambiguity(struct evsel *evsel __maybe_unused,
> +				       struct perf_event_attr *attr __maybe_unused)
> +{
> +}
> +
>  static void evsel__set_default_freq_period(struct record_opts *opts,
>  					   struct perf_event_attr *attr)
>  {
> @@ -1339,6 +1344,8 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
>  	 */
>  	if (evsel__is_dummy_event(evsel))
>  		evsel__reset_sample_bit(evsel, BRANCH_STACK);
> +
> +	arch_evsel__warn_ambiguity(evsel, attr);

Wouldn't this be better as a single arch__post_evsel_config() function that
could do arch specific fixups or emit such warnings _after_ (thus the
"post") the common code evsel__config() does its thing?

>  }
>  
>  int evsel__set_filter(struct evsel *evsel, const char *filter)
> diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
> index 041b42d33bf5..195ae30ec45b 100644
> --- a/tools/perf/util/evsel.h
> +++ b/tools/perf/util/evsel.h
> @@ -281,6 +281,7 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
>  
>  void arch_evsel__set_sample_weight(struct evsel *evsel);
>  void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
> +void arch_evsel__warn_ambiguity(struct evsel *evsel, struct perf_event_attr *attr);
>  
>  int evsel__set_filter(struct evsel *evsel, const char *filter);
>  int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
> -- 
> 2.27.0

-- 

- Arnaldo

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ