[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fUkU6OBCEWeaXZ2KqeKDcXF_ZTzwwMVq=uy8CcpK90MyQ@mail.gmail.com>
Date: Tue, 7 Nov 2023 10:22:06 -0800
From: Ian Rogers <irogers@...gle.com>
To: Ravi Bangoria <ravi.bangoria@....com>
Cc: acme@...nel.org, namhyung@...nel.org, kim.phillips@....com,
peterz@...radead.org, mingo@...hat.com, mark.rutland@....com,
alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
adrian.hunter@...el.com, kan.liang@...ux.intel.com,
changbin.du@...wei.com, yangjihong1@...wei.com,
zwisler@...omium.org, wangming01@...ngson.cn,
chenhuacai@...nel.org, kprateek.nayak@....com,
linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
sandipan.das@....com, ananth.narayan@....com,
santosh.shukla@....com
Subject: Re: [PATCH 1/2] perf tool AMD: Use non-precise cycles as default
event on certain Zen2 processors
On Tue, Nov 7, 2023 at 12:34 AM Ravi Bangoria <ravi.bangoria@....com> wrote:
>
> By default, Perf uses precise cycles event when no explicit event is
> specified by user. Precise cycles event is forwarded to ibs_op// pmu
> on AMD. However, IBS has hw issue on certain Zen2 processors where
> it might raise NMI without sample_valid bit set, which causes Unknown
> NMI warnings. (Erratum #1215: IBS (Instruction Based Sampling) Counter
> Valid Value May be Incorrect After Exit From Core C6 (CC6) State.) So,
> use non-precise cycles as default event on affected processors.
>
> This does not prevent user to use explicit precise cycles event or
> ibs_op// pmu directly.
>
> Suggested-by: Arnaldo Carvalho de Melo <acme@...nel.org>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
Thanks Ravi,
We read max_precise from sysfs:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmu.c?h=perf-tools-next#n984
But it appears not to be used:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/evsel.c?h=perf-tools-next#n1323
I think:
```
if (evsel->precise_max)
attr->precise_ip = 3;
```
should be:
```
if (evsel->precise_max)
attr->precise_ip = evsel->pmu->max_precise;
```
Presumably this is misreporting as some non-zero value on the buggy
systems - if it doesn't we can declare victory here. If not we can
modify max_precise's calculation on perf's struct pmu for cpu for AMD
so that we just hardwire it to zero in arch code.
I think I prefer this approach as it is fixing max_precise in a more
generic way. If someone were to specify "cycles:P" on the command line
then it wouldn't trigger the bug, similarly for perf script and other
places. Wdyt?
Thanks,
Ian
> ---
> tools/perf/arch/x86/util/evlist.c | 34 +++++++++++++++++++++++++++++++
> tools/perf/builtin-record.c | 2 +-
> tools/perf/builtin-top.c | 2 +-
> tools/perf/util/evlist.c | 12 ++++++++++-
> tools/perf/util/evlist.h | 2 ++
> 5 files changed, 49 insertions(+), 3 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index b1ce0c52d88d..f4478179c91b 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -5,6 +5,8 @@
> #include "util/evlist.h"
> #include "util/parse-events.h"
> #include "util/event.h"
> +#include "util/env.h"
> +#include "linux/string.h"
> #include "topdown.h"
> #include "evsel.h"
>
> @@ -92,3 +94,35 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
> /* Default ordering by insertion index. */
> return lhs->core.idx - rhs->core.idx;
> }
> +
> +/*
> + * Precise cycles event is forwarded to ibs_op// pmu on AMD. However, IBS
> + * has hw issue on certain Zen2 processors where it might raise NMI without
> + * sample_valid bit set, which causes Unknown NMI warnings. So default to
> + * non-precise cycles event on affected processors.
> + */
> +const char *arch_evlist__default_cycles_event(bool can_profile_kernel)
> +{
> + struct perf_env env = { .total_mem = 0, };
> + unsigned int family, model, stepping;
> + bool is_amd;
> + int ret;
> +
> + perf_env__cpuid(&env);
> + is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD");
> + if (!is_amd)
> + goto out;
> +
> + ret = sscanf(env.cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
> + if (ret == 3 && family == 0x17 && (
> + (model >= 0x30 && model <= 0x3f) ||
> + (model >= 0x60 && model <= 0x7f) ||
> + (model >= 0x90 && model <= 0x9f))) {
> + perf_env__exit(&env);
> + return can_profile_kernel ? "cycles" : "cycles:u";
> + }
> +
> +out:
> + perf_env__exit(&env);
> + return can_profile_kernel ? "cycles:P" : "cycles:Pu";
> +}
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index dcf288a4fb9a..e58d8ac8a77b 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -4150,7 +4150,7 @@ int cmd_record(int argc, const char **argv)
> if (rec->evlist->core.nr_entries == 0) {
> bool can_profile_kernel = perf_event_paranoid_check(1);
>
> - err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
> + err = parse_event(rec->evlist, evlist__default_cycles_event(can_profile_kernel));
> if (err)
> goto out;
> }
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index ea8c7eca5eee..21368421eddd 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -1666,7 +1666,7 @@ int cmd_top(int argc, const char **argv)
>
> if (!top.evlist->core.nr_entries) {
> bool can_profile_kernel = perf_event_paranoid_check(1);
> - int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
> + int err = parse_event(top.evlist, evlist__default_cycles_event(can_profile_kernel));
>
> if (err)
> goto out_delete_evlist;
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index e36da58522ef..406ed851cafc 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -90,6 +90,16 @@ struct evlist *evlist__new(void)
> return evlist;
> }
>
> +const char * __weak arch_evlist__default_cycles_event(bool can_profile_kernel)
> +{
> + return can_profile_kernel ? "cycles:P" : "cycles:Pu";
> +}
> +
> +const char *evlist__default_cycles_event(bool can_profile_kernel)
> +{
> + return arch_evlist__default_cycles_event(can_profile_kernel);
> +}
> +
> struct evlist *evlist__new_default(void)
> {
> struct evlist *evlist = evlist__new();
> @@ -100,7 +110,7 @@ struct evlist *evlist__new_default(void)
> return NULL;
>
> can_profile_kernel = perf_event_paranoid_check(1);
> - err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
> + err = parse_event(evlist, evlist__default_cycles_event(can_profile_kernel));
> if (err) {
> evlist__delete(evlist);
> evlist = NULL;
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index 98e7ddb2bd30..7267b4fb1981 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -91,6 +91,8 @@ struct evsel_str_handler {
>
> struct evlist *evlist__new(void);
> struct evlist *evlist__new_default(void);
> +const char *arch_evlist__default_cycles_event(bool can_profile_kernel);
> +const char *evlist__default_cycles_event(bool can_profile_kernel);
> struct evlist *evlist__new_dummy(void);
> void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
> struct perf_thread_map *threads);
> --
> 2.41.0
>
Powered by blists - more mailing lists