lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <D148EA88-0609-471A-B7DB-39D4539814CA@linux.vnet.ibm.com>
Date:   Tue, 6 Dec 2022 19:09:40 +0530
From:   Athira Rajeev <atrajeev@...ux.vnet.ibm.com>
To:     Namhyung Kim <namhyung@...nel.org>
Cc:     Arnaldo Carvalho de Melo <acme@...nel.org>,
        Ian Rogers <irogers@...gle.com>, Jiri Olsa <jolsa@...nel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        LKML <linux-kernel@...r.kernel.org>,
        Adrian Hunter <adrian.hunter@...el.com>,
        linux-perf-users@...r.kernel.org,
        Kan Liang <kan.liang@...ux.intel.com>,
        Leo Yan <leo.yan@...aro.org>, Andi Kleen <ak@...ux.intel.com>,
        James Clark <james.clark@....com>,
        Xing Zhengjun <zhengjun.xing@...ux.intel.com>,
        Michael Petlan <mpetlan@...hat.com>
Subject: Re: [PATCH 18/20] perf stat: Display event stats using aggr counts



> On 06-Dec-2022, at 12:30 AM, Namhyung Kim <namhyung@...nel.org> wrote:
> 
> Hello,
> 
> On Mon, Dec 5, 2022 at 2:52 AM Athira Rajeev
> <atrajeev@...ux.vnet.ibm.com> wrote:
>> 
>> 
>> 
>>> On 18-Oct-2022, at 7:32 AM, Namhyung Kim <namhyung@...nel.org> wrote:
>>> 
>>> Now aggr counts are ready for use.  Convert the display routines to use
>>> the aggr counts and update the shadow stat with them.  It doesn't need
>>> to aggregate counts or collect aliases anymore during the display.  Get
>>> rid of now unused struct perf_aggr_thread_value.
>>> 
>>> Note that there's a difference in the display order among the aggr mode.
>>> For per-core/die/socket/node aggregation, it shows relevant events in
>>> the same unit together, whereas global/thread/no aggregation it shows
>>> the same events for different units together.  So it still uses separate
>>> codes to display them due to the ordering.
>>> 
>>> One more thing to note is that it breaks per-core event display for now.
>>> The next patch will fix it to have identical output as of now.
>>> 
>>> Acked-by: Ian Rogers <irogers@...gle.com>
>>> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
>>> ---
>>> tools/perf/util/stat-display.c | 421 ++++-----------------------------
>>> tools/perf/util/stat.c         |   5 -
>>> tools/perf/util/stat.h         |   9 -
>>> 3 files changed, 49 insertions(+), 386 deletions(-)
>>> 
>>> diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
>>> index 4113aa86772f..bfae2784609c 100644
>>> --- a/tools/perf/util/stat-display.c
>>> +++ b/tools/perf/util/stat-display.c
>>> @@ -442,31 +442,6 @@ static void print_metric_header(struct perf_stat_config *config,
>>>              fprintf(os->fh, "%*s ", config->metric_only_len, unit);
>>> }
>>> 
>>> -static int first_shadow_map_idx(struct perf_stat_config *config,
>>> -                             struct evsel *evsel, const struct aggr_cpu_id *id)
>>> -{
>>> -     struct perf_cpu_map *cpus = evsel__cpus(evsel);
>>> -     struct perf_cpu cpu;
>>> -     int idx;
>>> -
>>> -     if (config->aggr_mode == AGGR_NONE)
>>> -             return perf_cpu_map__idx(cpus, id->cpu);
>>> -
>>> -     if (config->aggr_mode == AGGR_THREAD)
>>> -             return id->thread_idx;
>>> -
>>> -     if (!config->aggr_get_id)
>>> -             return 0;
>>> -
>>> -     perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
>>> -             struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
>>> -
>>> -             if (aggr_cpu_id__equal(&cpu_id, id))
>>> -                     return idx;
>>> -     }
>>> -     return 0;
>>> -}
>>> -
>>> static void abs_printout(struct perf_stat_config *config,
>>>                       struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
>>> {
>>> @@ -537,7 +512,7 @@ static bool is_mixed_hw_group(struct evsel *counter)
>>> static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
>>>                   struct evsel *counter, double uval,
>>>                   char *prefix, u64 run, u64 ena, double noise,
>>> -                  struct runtime_stat *st)
>>> +                  struct runtime_stat *st, int map_idx)
>>> {
>>>      struct perf_stat_output_ctx out;
>>>      struct outstate os = {
>>> @@ -648,8 +623,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
>>>              print_running(config, run, ena);
>>>      }
>>> 
>>> -     perf_stat__print_shadow_stats(config, counter, uval,
>>> -                             first_shadow_map_idx(config, counter, &id),
>>> +     perf_stat__print_shadow_stats(config, counter, uval, map_idx,
>>>                              &out, &config->metric_events, st);
>>>      if (!config->csv_output && !config->metric_only && !config->json_output) {
>>>              print_noise(config, counter, noise);
>>> @@ -657,34 +631,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
>>>      }
>>> }
>>> 
>>> -static void aggr_update_shadow(struct perf_stat_config *config,
>>> -                            struct evlist *evlist)
>>> -{
>>> -     int idx, s;
>>> -     struct perf_cpu cpu;
>>> -     struct aggr_cpu_id s2, id;
>>> -     u64 val;
>>> -     struct evsel *counter;
>>> -     struct perf_cpu_map *cpus;
>>> -
>>> -     for (s = 0; s < config->aggr_map->nr; s++) {
>>> -             id = config->aggr_map->map[s];
>>> -             evlist__for_each_entry(evlist, counter) {
>>> -                     cpus = evsel__cpus(counter);
>>> -                     val = 0;
>>> -                     perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
>>> -                             s2 = config->aggr_get_id(config, cpu);
>>> -                             if (!aggr_cpu_id__equal(&s2, &id))
>>> -                                     continue;
>>> -                             val += perf_counts(counter->counts, idx, 0)->val;
>>> -                     }
>>> -                     perf_stat__update_shadow_stats(counter, val,
>>> -                                     first_shadow_map_idx(config, counter, &id),
>>> -                                     &rt_stat);
>>> -             }
>>> -     }
>>> -}
>>> -
>>> static void uniquify_event_name(struct evsel *counter)
>>> {
>>>      char *new_name;
>>> @@ -721,137 +667,51 @@ static void uniquify_event_name(struct evsel *counter)
>>>      counter->uniquified_name = true;
>>> }
>>> 
>>> -static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter,
>>> -                         void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>> -                                    bool first),
>>> -                         void *data)
>>> +static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
>>> {
>>> -     struct evlist *evlist = counter->evlist;
>>> -     struct evsel *alias;
>>> -
>>> -     alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
>>> -     list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
>>> -             /* Merge events with the same name, etc. but on different PMUs. */
>>> -             if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
>>> -                     alias->scale == counter->scale &&
>>> -                     alias->cgrp == counter->cgrp &&
>>> -                     !strcmp(alias->unit, counter->unit) &&
>>> -                     evsel__is_clock(alias) == evsel__is_clock(counter) &&
>>> -                     strcmp(alias->pmu_name, counter->pmu_name)) {
>>> -                     alias->merged_stat = true;
>>> -                     cb(config, alias, data, false);
>>> -             }
>>> -     }
>>> +     return evsel__is_hybrid(evsel) && !config->hybrid_merge;
>>> }
>>> 
>>> -static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
>>> -                      bool check)
>>> +static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
>>> {
>>> -     if (evsel__is_hybrid(counter)) {
>>> -             if (check)
>>> -                     return config->hybrid_merge;
>>> -             else
>>> -                     return !config->hybrid_merge;
>>> -     }
>>> -
>>> -     return false;
>>> -}
>>> -
>>> -static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
>>> -                         void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
>>> -                                    bool first),
>>> -                         void *data)
>>> -{
>>> -     if (counter->merged_stat)
>>> -             return false;
>>> -     cb(config, counter, data, true);
>>> -     if (config->no_merge || hybrid_merge(counter, config, false))
>>> +     if (config->no_merge || hybrid_uniquify(counter, config))
>>>              uniquify_event_name(counter);
>>> -     else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
>>> -             collect_all_aliases(config, counter, cb, data);
>>> -     return true;
>>> -}
>>> -
>>> -struct aggr_data {
>>> -     u64 ena, run, val;
>>> -     struct aggr_cpu_id id;
>>> -     int nr;
>>> -     int cpu_map_idx;
>>> -};
>>> -
>>> -static void aggr_cb(struct perf_stat_config *config,
>>> -                 struct evsel *counter, void *data, bool first)
>>> -{
>>> -     struct aggr_data *ad = data;
>>> -     int idx;
>>> -     struct perf_cpu cpu;
>>> -     struct perf_cpu_map *cpus;
>>> -     struct aggr_cpu_id s2;
>>> -
>>> -     cpus = evsel__cpus(counter);
>>> -     perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
>>> -             struct perf_counts_values *counts;
>>> -
>>> -             s2 = config->aggr_get_id(config, cpu);
>>> -             if (!aggr_cpu_id__equal(&s2, &ad->id))
>>> -                     continue;
>>> -             if (first)
>>> -                     ad->nr++;
>>> -             counts = perf_counts(counter->counts, idx, 0);
>>> -             /*
>>> -              * When any result is bad, make them all to give
>>> -              * consistent output in interval mode.
>>> -              */
>>> -             if (counts->ena == 0 || counts->run == 0 ||
>>> -                 counter->counts->scaled == -1) {
>>> -                     ad->ena = 0;
>>> -                     ad->run = 0;
>>> -                     break;
>>> -             }
>>> -             ad->val += counts->val;
>>> -             ad->ena += counts->ena;
>>> -             ad->run += counts->run;
>>> -     }
>>> }
>>> 
>>> static void print_counter_aggrdata(struct perf_stat_config *config,
>>>                                 struct evsel *counter, int s,
>>>                                 char *prefix, bool metric_only,
>>> -                                bool *first, struct perf_cpu cpu)
>>> +                                bool *first)
>>> {
>>> -     struct aggr_data ad;
>>>      FILE *output = config->output;
>>>      u64 ena, run, val;
>>> -     int nr;
>>> -     struct aggr_cpu_id id;
>>>      double uval;
>>> +     struct perf_stat_evsel *ps = counter->stats;
>>> +     struct perf_stat_aggr *aggr = &ps->aggr[s];
>>> +     struct aggr_cpu_id id = config->aggr_map->map[s];
>>> +     double avg = aggr->counts.val;
>>> 
>>> -     ad.id = id = config->aggr_map->map[s];
>>> -     ad.val = ad.ena = ad.run = 0;
>>> -     ad.nr = 0;
>>> -     if (!collect_data(config, counter, aggr_cb, &ad))
>>> +     if (counter->supported && aggr->nr == 0)
>>>              return;
>> 
>> Hi Namhyung,
>> 
>> Observed one issue with this change in tmp.perf/core branch of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git
>> Patch link: https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/?h=tmp.perf/core&id=91f85f98da7ab8c32105f42dd03884c01ec4498f
>> 
>> If we have multiple events in a group and if all events in that group can't go in at once, the group will fail to schedule.
>> The perf stat result used to have status of all events in group.
>> 
>> Example result from powerpc:
>>        # perf stat -e '{r1001e,r401e0,r4c010}' <workload>
>>        Performance counter stats for 'workload’:
>> 
>>             <not counted>      r1001e
>>             <not counted>      r401e0
>>           <not supported>      r4c010
>> 
>> But after this change, it shows only the event which broke the constraint.
>> 
>>        # ./perf stat -e '{r1001e,r401e0,r4c010}' <workload>
>>        Performance counter stats for 'workload’:
>>        <not supported>      r4c010
>> 
>> The new condition added by the patch is:
>> 
>>        if (counter->supported && aggr->nr == 0)
>>                        return;
>> 
>> 
>> The first two events r1001e and r401e0 are good to go in this group, hence
>> counter->supported  is true where as aggr->nr is zero since this group is
>> not scheduled. Since counter->supported is false for third event which
>> broke the group constraint, the condition won't return and hence only prints
>> this event in output.
>> 
>> Namhyung, is there any scenario because of which this condition was added ?
>> If not I would go ahead and sent a fix patch to remove this check.
> 
> I remember it's for AGGR_THREAD and merged (uncore) aliases and hybrid
> events.  In system-wide per-thread mode, many of them would get 0 results
> and we'd like to skip them.  And now I see that we have evsel->merged_stat
> check already.
> 
> Maybe we could just add the aggr_mode check there.

Thanks for the response Namhyung.
I will share my response in patch "[PATCH] perf stat: Update event skip condition”

Thanks
Athira
> 
> Thanks,
> Namhyung

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ