[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fU2sGSS+OsKmn8C+Y7Msm-5qUFpotRc2rfgprhGpsroew@mail.gmail.com>
Date: Fri, 31 Jan 2025 23:26:02 -0800
From: Ian Rogers <irogers@...gle.com>
To: "Liang, Kan" <kan.liang@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>, James Clark <james.clark@...aro.org>,
Ze Gao <zegao2021@...il.com>, Weilin Wang <weilin.wang@...el.com>,
Jean-Philippe Romain <jean-philippe.romain@...s.st.com>, Junhao He <hejunhao3@...wei.com>,
Yicong Yang <yangyicong@...ilicon.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/5] perf pmus: Restructure pmu_read_sysfs to scan
fewer PMUs
On Thu, Jan 30, 2025 at 11:24 AM Liang, Kan <kan.liang@...ux.intel.com> wrote:
>
>
>
> On 2025-01-23 2:46 a.m., Ian Rogers wrote:
> > Rather than scanning core or all PMUs, allow pmu_read_sysfs to read
> > some combination of core, other, hwmon and tool PMUs. The PMUs that
> > should be read and are already read are held as bitmaps. It is known
> > that a "hwmon_" prefix is necessary for a hwmon PMU's name, similarly
> > with "tool", so only scan those PMUs in situations the PMU name or the
> > PMU's type number make sense to.
> >
> > The number of openat system calls reduces from 276 to 98 for a hwmon
> > event. The number of openats for regular perf events isn't changed.
> >
> > Signed-off-by: Ian Rogers <irogers@...gle.com>
> > ---
> > tools/perf/util/pmu.h | 2 +
> > tools/perf/util/pmus.c | 144 ++++++++++++++++++++++++++---------------
> > 2 files changed, 95 insertions(+), 51 deletions(-)
> >
> > diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
> > index dbed6c243a5e..edd36c20aedc 100644
> > --- a/tools/perf/util/pmu.h
> > +++ b/tools/perf/util/pmu.h
> > @@ -37,6 +37,8 @@ struct perf_pmu_caps {
> > };
> >
> > enum {
> > + PERF_PMU_TYPE_PE_START = 0,
> > + PERF_PMU_TYPE_PE_END = 0xFFFEFFFF,
> > PERF_PMU_TYPE_HWMON_START = 0xFFFF0000,
> > PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD,
> > PERF_PMU_TYPE_TOOL = 0xFFFFFFFE,
> > diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
> > index b493da0d22ef..3e3ffafcad71 100644
> > --- a/tools/perf/util/pmus.c
> > +++ b/tools/perf/util/pmus.c
> > @@ -37,10 +37,23 @@
> > */
> > static LIST_HEAD(core_pmus);
> > static LIST_HEAD(other_pmus);
> > -static bool read_sysfs_core_pmus;
> > -static bool read_sysfs_all_pmus;
> > +enum perf_tool_pmu_type {
> > + PERF_TOOL_PMU_TYPE_PE_CORE,
> > +#define PERF_TOOL_PMU_TYPE_PE_CORE_MASK (1 << PERF_TOOL_PMU_TYPE_PE_CORE)
> > + PERF_TOOL_PMU_TYPE_PE_OTHER,
>
> What is the PE short for?
Perf event, rather than type numbers being used for things like the fake PMU.
> The terminology "other" confused me at first glance.
Agreed. It corresponds to the other list immediately above this code.
> I think the PE_OTHER means the PMUs not core, hwmon or tool.
> There is a "other_pmus" nearby, which means the PMUs not core, but
> include hwmon and tool.
>
> Please add some comments to explain the scope of the TYPE_PE_OTHER.
This is already this immediately above:
https://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/pmus.c?h=perf-tools-next#n31
```
* other_pmus: All other PMUs which are not part of core_pmus list. It doesn't
* matter whether PMU is present per SMT-thread or outside of the
* core in the hw. For e.g., an instance of AMD ibs_fetch// and
* ibs_op// PMUs is present in each hw SMT thread, however they
* are captured under other_pmus. PMUs belonging to other_pmus
* must have pmu->is_core=0 but pmu->is_uncore could be 0 or 1.
```
> > +#define PERF_TOOL_PMU_TYPE_PE_OTHER_MASK (1 << PERF_TOOL_PMU_TYPE_PE_OTHER)
> > + PERF_TOOL_PMU_TYPE_TOOL,
> > +#define PERF_TOOL_PMU_TYPE_TOOL_MASK (1 << PERF_TOOL_PMU_TYPE_TOOL)
> > + PERF_TOOL_PMU_TYPE_HWMON,
> > +#define PERF_TOOL_PMU_TYPE_HWMON_MASK (1 << PERF_TOOL_PMU_TYPE_HWMON)
> > +#define PERF_TOOL_PMU_TYPE_ALL_MASK (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | \
> > + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK | \
> > + PERF_TOOL_PMU_TYPE_TOOL_MASK | \
> > + PERF_TOOL_PMU_TYPE_HWMON_MASK)
> > +};
>
> Nit: Maybe it's better to split the enum perf_tool_pmu_type and #define.
> It's a little bit hard for me to locate how many types there are at
> first glance.
Ok, will do in v3.
> > +static unsigned int read_pmu_types;
> >
> > -static void pmu_read_sysfs(bool core_only);
> > +static void pmu_read_sysfs(unsigned int to_read_pmus);
> >
> > size_t pmu_name_len_no_suffix(const char *str)
> > {
> > @@ -102,8 +115,7 @@ void perf_pmus__destroy(void)
> >
> > perf_pmu__delete(pmu);
> > }
> > - read_sysfs_core_pmus = false;
> > - read_sysfs_all_pmus = false;
> > + read_pmu_types = 0;
> > }
> >
> > static struct perf_pmu *pmu_find(const char *name)
> > @@ -129,6 +141,7 @@ struct perf_pmu *perf_pmus__find(const char *name)
> > struct perf_pmu *pmu;
> > int dirfd;
> > bool core_pmu;
> > + unsigned int to_read_pmus = 0;
> >
> > /*
> > * Once PMU is loaded it stays in the list,
> > @@ -139,11 +152,11 @@ struct perf_pmu *perf_pmus__find(const char *name)
> > if (pmu)
> > return pmu;
> >
> > - if (read_sysfs_all_pmus)
> > + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
> > return NULL;
> >
> > core_pmu = is_pmu_core(name);
> > - if (core_pmu && read_sysfs_core_pmus)
> > + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
> > return NULL;
> >
> > dirfd = perf_pmu__event_source_devices_fd();
> > @@ -151,15 +164,27 @@ struct perf_pmu *perf_pmus__find(const char *name)
> > /*eager_load=*/false);
> > close(dirfd);
> >
> > - if (!pmu) {
> > - /*
> > - * Looking up an inidividual PMU failed. This may mean name is
> > - * an alias, so read the PMUs from sysfs and try to find again.
> > - */
> > - pmu_read_sysfs(core_pmu);
> > + if (pmu)
> > + return pmu;
> > +
> > + /* Looking up an individual perf event PMU failed, check if a tool PMU should be read. */
> > + if (!strncmp(name, "hwmon_", 6))
> > + to_read_pmus |= PERF_TOOL_PMU_TYPE_HWMON_MASK;
> > + else if (!strcmp(name, "tool"))
> > + to_read_pmus |= PERF_TOOL_PMU_TYPE_TOOL_MASK;
> > +
> > + if (to_read_pmus) {
> > + pmu_read_sysfs(to_read_pmus);
> > pmu = pmu_find(name);
> > + if (pmu)
> > + return pmu;
> > }
> > - return pmu;
> > + /* Read all necessary PMUs from sysfs and see if the PMU is found. */
> > + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK;
> > + if (!core_pmu)
> > + to_read_pmus |= PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
> > + pmu_read_sysfs(to_read_pmus);
> > + return pmu_find(name);
> > }
> >
> > static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
> > @@ -176,11 +201,11 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
> > if (pmu)
> > return pmu;
> >
> > - if (read_sysfs_all_pmus)
> > + if (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK)
> > return NULL;
> >
> > core_pmu = is_pmu_core(name);
> > - if (core_pmu && read_sysfs_core_pmus)
> > + if (core_pmu && (read_pmu_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK))
> > return NULL;
> >
> > return perf_pmu__lookup(core_pmu ? &core_pmus : &other_pmus, dirfd, name,
> > @@ -197,52 +222,60 @@ static int pmus_cmp(void *priv __maybe_unused,
> > }
> >
> > /* Add all pmus in sysfs to pmu list: */
> > -static void pmu_read_sysfs(bool core_only)
> > +static void pmu_read_sysfs(unsigned int to_read_types)
> > {
> > - int fd;
> > - DIR *dir;
> > - struct dirent *dent;
> > struct perf_pmu *tool_pmu;
> >
> > - if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus))
> > + if ((read_pmu_types & to_read_types) == to_read_types) {
> > + /* All requested PMU types have been read. */
> > return;
> > + }
> >
> > - fd = perf_pmu__event_source_devices_fd();
> > - if (fd < 0)
> > - return;
> > + if (to_read_types & (PERF_TOOL_PMU_TYPE_PE_CORE_MASK | PERF_TOOL_PMU_TYPE_PE_OTHER_MASK)) {
> > + int fd = perf_pmu__event_source_devices_fd();
> > + DIR *dir;
> > + struct dirent *dent;
> > + bool core_only = (to_read_types & PERF_TOOL_PMU_TYPE_PE_OTHER_MASK) == 0;
> >
> > - dir = fdopendir(fd);
> > - if (!dir) {
> > - close(fd);
> > - return;
> > - }
> > + if (fd < 0)
> > + goto skip_pe_pmus;
> >
> > - while ((dent = readdir(dir))) {
> > - if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
> > - continue;
> > - if (core_only && !is_pmu_core(dent->d_name))
> > - continue;
> > - /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
> > - perf_pmu__find2(fd, dent->d_name);
> > - }
> > + dir = fdopendir(fd);
> > + if (!dir) {
> > + close(fd);
> > + goto skip_pe_pmus;
> > + }
> >
> > - closedir(dir);
> > - if (list_empty(&core_pmus)) {
> > + while ((dent = readdir(dir))) {
> > + if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
> > + continue;
> > + if (core_only && !is_pmu_core(dent->d_name))
> > + continue;
> > + /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
> > + perf_pmu__find2(fd, dent->d_name);
> > + }
> > +
> > + closedir(dir);
> > + }
> > +skip_pe_pmus:
> > + if ((to_read_types & PERF_TOOL_PMU_TYPE_PE_CORE_MASK) && list_empty(&core_pmus)) {
> > if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
> > pr_err("Failure to set up any core PMUs\n");
> > }
> > list_sort(NULL, &core_pmus, pmus_cmp);
> > - if (!core_only) {
> > +
> > + if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) != 0 &&
> > + (read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) == 0) {
>
> Nit: Maybe
>
> if ((to_read_types & PERF_TOOL_PMU_TYPE_TOOL_MASK) &&
> !(read_pmu_types & PERF_TOOL_PMU_TYPE_TOOL_MASK))
Thanks. I'm not convinced on the readability, which was why I was
being a bit more verbose in what I wrote. I worrited the & reads like
an && and the ! is hard to miss.
Thanks,
Ian
> > tool_pmu = perf_pmus__tool_pmu();
> > list_add_tail(&tool_pmu->list, &other_pmus);
> > - perf_pmus__read_hwmon_pmus(&other_pmus);
> > }
> > + if ((to_read_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) != 0 &&
> > + (read_pmu_types & PERF_TOOL_PMU_TYPE_HWMON_MASK) == 0)
>
> ditto
>
> Thanks,
> Kan
> > + perf_pmus__read_hwmon_pmus(&other_pmus);
> > +
> > list_sort(NULL, &other_pmus, pmus_cmp);
> > - if (!list_empty(&core_pmus)) {
> > - read_sysfs_core_pmus = true;
> > - if (!core_only)
> > - read_sysfs_all_pmus = true;
> > - }
> > +
> > + read_pmu_types |= to_read_types;
> > }
> >
> > static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
> > @@ -263,12 +296,21 @@ static struct perf_pmu *__perf_pmus__find_by_type(unsigned int type)
> >
> > struct perf_pmu *perf_pmus__find_by_type(unsigned int type)
> > {
> > + unsigned int to_read_pmus;
> > struct perf_pmu *pmu = __perf_pmus__find_by_type(type);
> >
> > - if (pmu || read_sysfs_all_pmus)
> > + if (pmu || (read_pmu_types == PERF_TOOL_PMU_TYPE_ALL_MASK))
> > return pmu;
> >
> > - pmu_read_sysfs(/*core_only=*/false);
> > + if (type >= PERF_PMU_TYPE_PE_START && type <= PERF_PMU_TYPE_PE_END) {
> > + to_read_pmus = PERF_TOOL_PMU_TYPE_PE_CORE_MASK |
> > + PERF_TOOL_PMU_TYPE_PE_OTHER_MASK;
> > + } else if (type >= PERF_PMU_TYPE_HWMON_START && type <= PERF_PMU_TYPE_HWMON_END) {
> > + to_read_pmus = PERF_TOOL_PMU_TYPE_HWMON_MASK;
> > + } else {
> > + to_read_pmus = PERF_TOOL_PMU_TYPE_TOOL_MASK;
> > + }
> > + pmu_read_sysfs(to_read_pmus);
> > pmu = __perf_pmus__find_by_type(type);
> > return pmu;
> > }
> > @@ -282,7 +324,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
> > bool use_core_pmus = !pmu || pmu->is_core;
> >
> > if (!pmu) {
> > - pmu_read_sysfs(/*core_only=*/false);
> > + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
> > pmu = list_prepare_entry(pmu, &core_pmus, list);
> > }
> > if (use_core_pmus) {
> > @@ -300,7 +342,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
> > struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
> > {
> > if (!pmu) {
> > - pmu_read_sysfs(/*core_only=*/true);
> > + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_PE_CORE_MASK);
> > return list_first_entry_or_null(&core_pmus, typeof(*pmu), list);
> > }
> > list_for_each_entry_continue(pmu, &core_pmus, list)
> > @@ -316,7 +358,7 @@ static struct perf_pmu *perf_pmus__scan_skip_duplicates(struct perf_pmu *pmu)
> > const char *last_pmu_name = (pmu && pmu->name) ? pmu->name : "";
> >
> > if (!pmu) {
> > - pmu_read_sysfs(/*core_only=*/false);
> > + pmu_read_sysfs(PERF_TOOL_PMU_TYPE_ALL_MASK);
> > pmu = list_prepare_entry(pmu, &core_pmus, list);
> > } else
> > last_pmu_name_len = pmu_name_len_no_suffix(pmu->name ?: "");
>
Powered by blists - more mailing lists