[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fWJD4gT+CxBLDdjbdrJF0xyPTobPu20LSZFf-RSJAXMpg@mail.gmail.com>
Date: Fri, 27 Jun 2025 16:12:03 -0700
From: Ian Rogers <irogers@...gle.com>
To: Chun-Tse Shao <ctshao@...gle.com>
Cc: linux-kernel@...r.kernel.org, peterz@...radead.org, mingo@...hat.com,
acme@...nel.org, namhyung@...nel.org, mark.rutland@....com,
alexander.shishkin@...ux.intel.com, jolsa@...nel.org, adrian.hunter@...el.com,
kan.liang@...ux.intel.com, weilin.wang@...el.com, james.clark@...aro.org,
linux-perf-users@...r.kernel.org
Subject: Re: [PATCH v4] perf stat: Fix uncore aggregation number
On Fri, Jun 27, 2025 at 1:18 PM Chun-Tse Shao <ctshao@...gle.com> wrote:
>
> Follow up:
> lore.kernel.org/CAP-5=fVDF4-qYL1Lm7efgiHk7X=_nw_nEFMBZFMcsnOOJgX4Kg@...l.gmail.com/
>
> The patch adds unit aggregation during evsel merge the aggregated uncore
> counters. Change the name of the column to `ctrs` and `counters` for
> json mode.
>
> Tested on a 2-socket machine with SNC3, uncore_imc_[0-11] and
> cpumask="0,120"
> Before:
> perf stat -e clockticks -I 1000 --per-socket
> # time socket cpus counts unit events
> 1.001085024 S0 1 9615386315 clockticks
> 1.001085024 S1 1 9614287448 clockticks
> perf stat -e clockticks -I 1000 --per-node
> # time node cpus counts unit events
> 1.001029867 N0 1 3205726984 clockticks
> 1.001029867 N1 1 3205444421 clockticks
> 1.001029867 N2 1 3205234018 clockticks
> 1.001029867 N3 1 3205224660 clockticks
> 1.001029867 N4 1 3205207213 clockticks
> 1.001029867 N5 1 3205528246 clockticks
> After:
> perf stat -e clockticks -I 1000 --per-socket
> # time socket ctrs counts unit events
> 1.001026071 S0 12 9619677996 clockticks
> 1.001026071 S1 12 9618612614 clockticks
> perf stat -e clockticks -I 1000 --per-node
> # time node ctrs counts unit events
> 1.001027449 N0 4 3207251859 clockticks
> 1.001027449 N1 4 3207315930 clockticks
> 1.001027449 N2 4 3206981828 clockticks
> 1.001027449 N3 4 3206566126 clockticks
> 1.001027449 N4 4 3206032609 clockticks
> 1.001027449 N5 4 3205651355 clockticks
>
> Tested with JSON output linter:
> perf test "perf stat JSON output linter"
> 94: perf stat JSON output linter : Ok
>
> Suggested-by: Ian Rogers <irogers@...gle.com>
> Signed-off-by: Chun-Tse Shao <ctshao@...gle.com>
Reviewed-by: Ian Rogers <irogers@...gle.com>
Thanks,
Ian
> ---
> v4:
> Modify perf-stat.txt and json output lint test
>
> v3: https://lore.kernel.org/20250624221545.1711008-1-ctshao@google.com/
> Rename the column to `ctrs` and `counters` in json mode.
>
> v2: https://lore.kernel.org/20250612225324.3315450-1-ctshao@google.com/
> Rename the column to `aggr_nr`.
> Remove unnecessary comment.
>
> v1: https://lore.kernel.org/20250611233239.3098064-1-ctshao@google.com/
>
>
> tools/perf/Documentation/perf-stat.txt | 6 ++--
> .../tests/shell/lib/perf_json_output_lint.py | 4 +--
> tools/perf/util/stat-display.c | 34 +++++++++----------
> tools/perf/util/stat.c | 2 +-
> 4 files changed, 24 insertions(+), 22 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
> index 61d091670dee..1a766d4a2233 100644
> --- a/tools/perf/Documentation/perf-stat.txt
> +++ b/tools/perf/Documentation/perf-stat.txt
> @@ -640,18 +640,20 @@ JSON FORMAT
> With -j, perf stat is able to print out a JSON format output
> that can be used for parsing.
>
> -- timestamp : optional usec time stamp in fractions of second (with -I)
> +- interval : optional timestamp in fractions of second (with -I)
> - optional aggregate options:
> - core : core identifier (with --per-core)
> - die : die identifier (with --per-die)
> - socket : socket identifier (with --per-socket)
> - node : node identifier (with --per-node)
> - thread : thread identifier (with --per-thread)
> +- counters : number of aggregated PMU counters
> - counter-value : counter value
> - unit : unit of the counter value or empty
> - event : event name
> - variance : optional variance if multiple values are collected (with -r)
> -- runtime : run time of counter
> +- event-runtime : run time of the event
> +- pcnt-running : percentage of time the event was running
> - metric-value : optional metric value
> - metric-unit : optional unit of metric
>
> diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py
> index 9e772a89ce38..c6750ef06c0f 100644
> --- a/tools/perf/tests/shell/lib/perf_json_output_lint.py
> +++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py
> @@ -45,7 +45,7 @@ def is_counter_value(num):
>
> def check_json_output(expected_items):
> checks = {
> - 'aggregate-number': lambda x: isfloat(x),
> + 'counters': lambda x: isfloat(x),
> 'core': lambda x: True,
> 'counter-value': lambda x: is_counter_value(x),
> 'cgroup': lambda x: True,
> @@ -75,7 +75,7 @@ def check_json_output(expected_items):
> if count not in expected_items and count >= 1 and count <= 7 and 'metric-value' in item:
> # Events that generate >1 metric may have isolated metric
> # values and possibly other prefixes like interval, core,
> - # aggregate-number, or event-runtime/pcnt-running from multiplexing.
> + # counters, or event-runtime/pcnt-running from multiplexing.
> pass
> elif count not in expected_items and count >= 1 and count <= 5 and 'metricgroup' in item:
> pass
> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
> index 729ad5cd52cb..9cb5245a92aa 100644
> --- a/tools/perf/util/stat-display.c
> +++ b/tools/perf/util/stat-display.c
> @@ -50,15 +50,15 @@ static int aggr_header_lens[] = {
> };
>
> static const char *aggr_header_csv[] = {
> - [AGGR_CORE] = "core,cpus,",
> - [AGGR_CACHE] = "cache,cpus,",
> - [AGGR_CLUSTER] = "cluster,cpus,",
> - [AGGR_DIE] = "die,cpus,",
> - [AGGR_SOCKET] = "socket,cpus,",
> - [AGGR_NONE] = "cpu,",
> - [AGGR_THREAD] = "comm-pid,",
> - [AGGR_NODE] = "node,",
> - [AGGR_GLOBAL] = ""
> + [AGGR_CORE] = "core,ctrs,",
> + [AGGR_CACHE] = "cache,ctrs,",
> + [AGGR_CLUSTER] = "cluster,ctrs,",
> + [AGGR_DIE] = "die,ctrs,",
> + [AGGR_SOCKET] = "socket,ctrs,",
> + [AGGR_NONE] = "cpu,",
> + [AGGR_THREAD] = "comm-pid,",
> + [AGGR_NODE] = "node,",
> + [AGGR_GLOBAL] = ""
> };
>
> static const char *aggr_header_std[] = {
> @@ -304,7 +304,7 @@ static void print_aggr_id_std(struct perf_stat_config *config,
> return;
> }
>
> - fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, aggr_nr);
> + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, /*strlen("ctrs")*/ 4, aggr_nr);
> }
>
> static void print_aggr_id_csv(struct perf_stat_config *config,
> @@ -366,27 +366,27 @@ static void print_aggr_id_json(struct perf_stat_config *config, struct outstate
> {
> switch (config->aggr_mode) {
> case AGGR_CORE:
> - json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d",
> + json_out(os, "\"core\" : \"S%d-D%d-C%d\", \"counters\" : %d",
> id.socket, id.die, id.core, aggr_nr);
> break;
> case AGGR_CACHE:
> - json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"aggregate-number\" : %d",
> + json_out(os, "\"cache\" : \"S%d-D%d-L%d-ID%d\", \"counters\" : %d",
> id.socket, id.die, id.cache_lvl, id.cache, aggr_nr);
> break;
> case AGGR_CLUSTER:
> - json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"aggregate-number\" : %d",
> + json_out(os, "\"cluster\" : \"S%d-D%d-CLS%d\", \"counters\" : %d",
> id.socket, id.die, id.cluster, aggr_nr);
> break;
> case AGGR_DIE:
> - json_out(os, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d",
> + json_out(os, "\"die\" : \"S%d-D%d\", \"counters\" : %d",
> id.socket, id.die, aggr_nr);
> break;
> case AGGR_SOCKET:
> - json_out(os, "\"socket\" : \"S%d\", \"aggregate-number\" : %d",
> + json_out(os, "\"socket\" : \"S%d\", \"counters\" : %d",
> id.socket, aggr_nr);
> break;
> case AGGR_NODE:
> - json_out(os, "\"node\" : \"N%d\", \"aggregate-number\" : %d",
> + json_out(os, "\"node\" : \"N%d\", \"counters\" : %d",
> id.node, aggr_nr);
> break;
> case AGGR_NONE:
> @@ -1317,7 +1317,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
> case AGGR_CLUSTER:
> case AGGR_CACHE:
> case AGGR_CORE:
> - fprintf(output, "#%*s %-*s cpus",
> + fprintf(output, "#%*s %-*s ctrs",
> INTERVAL_LEN - 1, "time",
> aggr_header_lens[config->aggr_mode],
> aggr_header_std[config->aggr_mode]);
> diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
> index 355a7d5c8ab8..b0205e99a4c9 100644
> --- a/tools/perf/util/stat.c
> +++ b/tools/perf/util/stat.c
> @@ -526,7 +526,7 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
> struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts;
> struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts;
>
> - /* NB: don't increase aggr.nr for aliases */
> + ps_a->aggr[i].nr += ps_b->aggr[i].nr;
>
> aggr_counts_a->val += aggr_counts_b->val;
> aggr_counts_a->ena += aggr_counts_b->ena;
> --
> 2.50.0.727.gbf7dc18ff4-goog
>
Powered by blists - more mailing lists