[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <85cd13d9-2999-e453-bc2b-12bcf5a5c2b1@linux.intel.com>
Date: Thu, 9 Jun 2022 11:43:25 -0400
From: "Liang, Kan" <kan.liang@...ux.intel.com>
To: zhengjun.xing@...ux.intel.com, acme@...nel.org,
peterz@...radead.org, mingo@...hat.com,
alexander.shishkin@...el.com, jolsa@...hat.com
Cc: linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
irogers@...gle.com, adrian.hunter@...el.com, ak@...ux.intel.com
Subject: Re: [PATCH v2 5/5] perf stat: Add topdown metrics in the default perf
stat on the hybrid machine
On 6/9/2022 10:53 AM, zhengjun.xing@...ux.intel.com wrote:
> From: Zhengjun Xing <zhengjun.xing@...ux.intel.com>
>
> Topdown metrics are missed in the default perf stat on the hybrid machine,
> add Topdown metrics in default perf stat for hybrid systems.
>
> Currently, we support the perf metrics Topdown for the p-core PMU in the
> perf stat default, the perf metrics Topdown support for e-core PMU will be
> implemented later separately. Refactor the code adds two x86 specific
> functions. Widen the size of the event name column by 7 chars, so that all
> metrics after the "#" become aligned again.
>
> The perf metrics topdown feature is supported on the cpu_core of ADL. The
> dedicated perf metrics counter and the fixed counter 3 are used for the
> topdown events. Adding the topdown metrics doesn't trigger multiplexing.
>
> Before:
>
> # ./perf stat -a true
>
> Performance counter stats for 'system wide':
>
> 53.70 msec cpu-clock # 25.736 CPUs utilized
> 80 context-switches # 1.490 K/sec
> 24 cpu-migrations # 446.951 /sec
> 52 page-faults # 968.394 /sec
> 2,788,555 cpu_core/cycles/ # 51.931 M/sec
> 851,129 cpu_atom/cycles/ # 15.851 M/sec
> 2,974,030 cpu_core/instructions/ # 55.385 M/sec
> 416,919 cpu_atom/instructions/ # 7.764 M/sec
> 586,136 cpu_core/branches/ # 10.916 M/sec
> 79,872 cpu_atom/branches/ # 1.487 M/sec
> 14,220 cpu_core/branch-misses/ # 264.819 K/sec
> 7,691 cpu_atom/branch-misses/ # 143.229 K/sec
>
> 0.002086438 seconds time elapsed
>
> After:
>
> # ./perf stat -a true
>
> Performance counter stats for 'system wide':
>
> 61.39 msec cpu-clock # 24.874 CPUs utilized
> 76 context-switches # 1.238 K/sec
> 24 cpu-migrations # 390.968 /sec
> 52 page-faults # 847.097 /sec
> 2,753,695 cpu_core/cycles/ # 44.859 M/sec
> 903,899 cpu_atom/cycles/ # 14.725 M/sec
> 2,927,529 cpu_core/instructions/ # 47.690 M/sec
> 428,498 cpu_atom/instructions/ # 6.980 M/sec
> 581,299 cpu_core/branches/ # 9.470 M/sec
> 83,409 cpu_atom/branches/ # 1.359 M/sec
> 13,641 cpu_core/branch-misses/ # 222.216 K/sec
> 8,008 cpu_atom/branch-misses/ # 130.453 K/sec
> 14,761,308 cpu_core/slots/ # 240.466 M/sec
> 3,288,625 cpu_core/topdown-retiring/ # 22.3% retiring
> 1,323,323 cpu_core/topdown-bad-spec/ # 9.0% bad speculation
> 5,477,470 cpu_core/topdown-fe-bound/ # 37.1% frontend bound
> 4,679,199 cpu_core/topdown-be-bound/ # 31.7% backend bound
> 646,194 cpu_core/topdown-heavy-ops/ # 4.4% heavy operations # 17.9% light operations
> 1,244,999 cpu_core/topdown-br-mispredict/ # 8.4% branch mispredict # 0.5% machine clears
> 3,891,800 cpu_core/topdown-fetch-lat/ # 26.4% fetch latency # 10.7% fetch bandwidth
> 1,879,034 cpu_core/topdown-mem-bound/ # 12.7% memory bound # 19.0% Core bound
>
> 0.002467839 seconds time elapsed
>
> Signed-off-by: Zhengjun Xing <zhengjun.xing@...ux.intel.com>
> Reviewed-by: Kan Liang <kan.liang@...ux.intel.com>
> ---
> Change log:
> v2:
> * Refactor arch_get_topdown_pmu_name() as Namhyung's suggestion.
>
> tools/perf/arch/x86/util/evlist.c | 13 ++------
> tools/perf/arch/x86/util/topdown.c | 52 ++++++++++++++++++++++++++++++
> tools/perf/arch/x86/util/topdown.h | 1 +
> tools/perf/builtin-stat.c | 14 ++------
> tools/perf/util/stat-display.c | 2 +-
> tools/perf/util/topdown.c | 7 ++++
> tools/perf/util/topdown.h | 3 +-
> 7 files changed, 67 insertions(+), 25 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c
> index fd3500fd4b69..03c955480d34 100644
> --- a/tools/perf/arch/x86/util/evlist.c
> +++ b/tools/perf/arch/x86/util/evlist.c
> @@ -3,12 +3,9 @@
> #include "util/pmu.h"
> #include "util/evlist.h"
> #include "util/parse-events.h"
> -#include "topdown.h"
> #include "util/event.h"
> #include "util/pmu-hybrid.h"
> -
> -#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
> -#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
> +#include "topdown.h"
>
> static int ___evlist__add_default_attrs(struct evlist *evlist,
> struct perf_event_attr *attrs,
> @@ -65,13 +62,7 @@ int arch_evlist__add_default_attrs(struct evlist *evlist,
> if (nr_attrs)
> return ___evlist__add_default_attrs(evlist, attrs, nr_attrs);
>
> - if (!pmu_have_event("cpu", "slots"))
> - return 0;
> -
> - if (pmu_have_event("cpu", "topdown-heavy-ops"))
> - return parse_events(evlist, TOPDOWN_L2_EVENTS, NULL);
> - else
> - return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
> + return topdown_parse_events(evlist);
> }
>
> struct evsel *arch_evlist__leader(struct list_head *list)
> diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
> index f81a7cfe4d63..35add91f2cd2 100644
> --- a/tools/perf/arch/x86/util/topdown.c
> +++ b/tools/perf/arch/x86/util/topdown.c
> @@ -3,9 +3,17 @@
> #include "api/fs/fs.h"
> #include "util/pmu.h"
> #include "util/topdown.h"
> +#include "util/evlist.h"
> +#include "util/debug.h"
> +#include "util/pmu-hybrid.h"
> #include "topdown.h"
> #include "evsel.h"
>
> +#define TOPDOWN_L1_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound}"
> +#define TOPDOWN_L1_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/}"
> +#define TOPDOWN_L2_EVENTS "{slots,topdown-retiring,topdown-bad-spec,topdown-fe-bound,topdown-be-bound,topdown-heavy-ops,topdown-br-mispredict,topdown-fetch-lat,topdown-mem-bound}"
> +#define TOPDOWN_L2_EVENTS_CORE "{slots,cpu_core/topdown-retiring/,cpu_core/topdown-bad-spec/,cpu_core/topdown-fe-bound/,cpu_core/topdown-be-bound/,cpu_core/topdown-heavy-ops/,cpu_core/topdown-br-mispredict/,cpu_core/topdown-fetch-lat/,cpu_core/topdown-mem-bound/}"
> +
> /* Check whether there is a PMU which supports the perf metrics. */
> bool topdown_sys_has_perf_metrics(void)
> {
> @@ -73,3 +81,47 @@ bool arch_topdown_sample_read(struct evsel *leader)
>
> return false;
> }
> +
> +const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn)
> +{
> + const char *pmu_name;
> +
> + if (!perf_pmu__has_hybrid())
> + return "cpu";
> +
> + if (!evlist->hybrid_pmu_name) {
> + if (warn)
> + pr_warning
> + ("WARNING: default to use cpu_core topdown events\n");
I missed it in my last review.
Please make the pr_warning in one line.
Thanks,
Kan
> + evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
> + }
> +
> + pmu_name = evlist->hybrid_pmu_name;
> +
> + return pmu_name;
> +}
> +
> +int topdown_parse_events(struct evlist *evlist)
> +{
> + const char *topdown_events;
> + const char *pmu_name;
> +
> + if (!topdown_sys_has_perf_metrics())
> + return 0;
> +
> + pmu_name = arch_get_topdown_pmu_name(evlist, false);
> +
> + if (pmu_have_event(pmu_name, "topdown-heavy-ops")) {
> + if (!strcmp(pmu_name, "cpu_core"))
> + topdown_events = TOPDOWN_L2_EVENTS_CORE;
> + else
> + topdown_events = TOPDOWN_L2_EVENTS;
> + } else {
> + if (!strcmp(pmu_name, "cpu_core"))
> + topdown_events = TOPDOWN_L1_EVENTS_CORE;
> + else
> + topdown_events = TOPDOWN_L1_EVENTS;
> + }
> +
> + return parse_events(evlist, topdown_events, NULL);
> +}
> diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h
> index 46bf9273e572..7eb81f042838 100644
> --- a/tools/perf/arch/x86/util/topdown.h
> +++ b/tools/perf/arch/x86/util/topdown.h
> @@ -3,5 +3,6 @@
> #define _TOPDOWN_H 1
>
> bool topdown_sys_has_perf_metrics(void);
> +int topdown_parse_events(struct evlist *evlist);
>
> #endif
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 837c3ca91af1..c6b68be78f8c 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -71,6 +71,7 @@
> #include "util/bpf_counter.h"
> #include "util/iostat.h"
> #include "util/pmu-hybrid.h"
> + #include "util/topdown.h"
> #include "asm/bug.h"
>
> #include <linux/time64.h>
> @@ -1858,22 +1859,11 @@ static int add_default_attributes(void)
> unsigned int max_level = 1;
> char *str = NULL;
> bool warn = false;
> - const char *pmu_name = "cpu";
> + const char *pmu_name = arch_get_topdown_pmu_name(evsel_list, true);
>
> if (!force_metric_only)
> stat_config.metric_only = true;
>
> - if (perf_pmu__has_hybrid()) {
> - if (!evsel_list->hybrid_pmu_name) {
> - pr_warning("WARNING: default to use cpu_core topdown events\n");
> - evsel_list->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu("core");
> - }
> -
> - pmu_name = evsel_list->hybrid_pmu_name;
> - if (!pmu_name)
> - return -1;
> - }
> -
> if (pmu_have_event(pmu_name, topdown_metric_L2_attrs[5])) {
> metric_attrs = topdown_metric_L2_attrs;
> max_level = 2;
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 606f09b09226..44045565c8f8 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -374,7 +374,7 @@ static void abs_printout(struct perf_stat_config *config,
> config->csv_output ? 0 : config->unit_width,
> evsel->unit, config->csv_sep);
>
> - fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel));
> + fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel));
>
> print_cgroup(config, evsel);
> }
> diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c
> index a369f84ceb6a..1090841550f7 100644
> --- a/tools/perf/util/topdown.c
> +++ b/tools/perf/util/topdown.c
> @@ -65,3 +65,10 @@ __weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused)
> {
> return false;
> }
> +
> +__weak const char *arch_get_topdown_pmu_name(struct evlist *evlist
> + __maybe_unused,
> + bool warn __maybe_unused)
> +{
> + return "cpu";
> +}
> diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h
> index 118e75281f93..f9531528c559 100644
> --- a/tools/perf/util/topdown.h
> +++ b/tools/perf/util/topdown.h
> @@ -2,11 +2,12 @@
> #ifndef TOPDOWN_H
> #define TOPDOWN_H 1
> #include "evsel.h"
> +#include "evlist.h"
>
> bool arch_topdown_check_group(bool *warn);
> void arch_topdown_group_warn(void);
> bool arch_topdown_sample_read(struct evsel *leader);
> -
> +const char *arch_get_topdown_pmu_name(struct evlist *evlist, bool warn);
> int topdown_filter_events(const char **attr, char **str, bool use_group,
> const char *pmu_name);
>
Powered by blists - more mailing lists