[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <961e538e-9d1a-741c-56e9-bb546eab2aec@linux.intel.com>
Date: Wed, 1 Jun 2022 09:37:28 -0400
From: "Liang, Kan" <kan.liang@...ux.intel.com>
To: Ravi Bangoria <ravi.bangoria@....com>, acme@...nel.org
Cc: jolsa@...nel.org, irogers@...gle.com, peterz@...radead.org,
rrichter@....com, mingo@...hat.com, mark.rutland@....com,
namhyung@...nel.org, tglx@...utronix.de, bp@...en8.de,
james.clark@....com, leo.yan@...aro.org, ak@...ux.intel.com,
eranian@...gle.com, like.xu.linux@...il.com, x86@...nel.org,
linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
sandipan.das@....com, ananth.narayan@....com, kim.phillips@....com,
santosh.shukla@....com
Subject: Re: [PATCH v5 5/8] perf headers: Record non-cpu pmu capabilities
On 5/31/2022 11:26 PM, Ravi Bangoria wrote:
> Pmus advertise their capabilities via sysfs attribute files but
> perf tool currently parses only core(cpu) or hybrid core pmu
> capabilities. Add support of recording non-core pmu capabilities
> int perf.data header.
>
> Note that a newly proposed HEADER_PMU_CAPS is replacing existing
> HEADER_HYBRID_CPU_PMU_CAPS. Special care is taken for hybrid core
> pmus by writing their capabilities first in the perf.data header
> to make sure new perf.data file being read by old perf tool does
> not break.
>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
> ---
> .../Documentation/perf.data-file-format.txt | 10 +-
> tools/perf/builtin-inject.c | 2 +-
> tools/perf/util/env.c | 60 ++++++-
> tools/perf/util/env.h | 12 +-
> tools/perf/util/header.c | 160 ++++++++++--------
> tools/perf/util/header.h | 2 +-
> 6 files changed, 158 insertions(+), 88 deletions(-)
>
> diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
> index f56d0e0fbff6..2233534e508a 100644
> --- a/tools/perf/Documentation/perf.data-file-format.txt
> +++ b/tools/perf/Documentation/perf.data-file-format.txt
> @@ -419,18 +419,20 @@ Example:
> cpu_core cpu list : 0-15
> cpu_atom cpu list : 16-23
>
> - HEADER_HYBRID_CPU_PMU_CAPS = 31,
> + HEADER_PMU_CAPS = 31,
>
> - A list of hybrid CPU PMU capabilities.
> + List of pmu capabilities (except cpu pmu which is already
> + covered by HEADER_CPU_PMU_CAPS). Note that hybrid cpu pmu
> + capabilities are also stored here.
>
> struct {
> u32 nr_pmu;
> struct {
> - u32 nr_cpu_pmu_caps;
> + u32 nr_caps;
> {
> char name[];
> char value[];
> - } [nr_cpu_pmu_caps];
> + } [nr_caps];
> char pmu_name[];
> } [nr_pmu];
> };
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index a75bf11585b5..05bc0cfccf83 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -809,7 +809,7 @@ static bool keep_feat(int feat)
> case HEADER_CPU_PMU_CAPS:
> case HEADER_CLOCK_DATA:
> case HEADER_HYBRID_TOPOLOGY:
> - case HEADER_HYBRID_CPU_PMU_CAPS:
> + case HEADER_PMU_CAPS:
> return true;
> /* Information that can be updated */
> case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
> index 7d3aeb2e4622..5b8cf6a421a4 100644
> --- a/tools/perf/util/env.c
> +++ b/tools/perf/util/env.c
> @@ -219,13 +219,13 @@ void perf_env__exit(struct perf_env *env)
> }
> zfree(&env->hybrid_nodes);
>
> - for (i = 0; i < env->nr_hybrid_cpc_nodes; i++) {
> - for (j = 0; j < env->hybrid_cpc_nodes[i].nr_cpu_pmu_caps; j++)
> - zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps[j]);
> - zfree(&env->hybrid_cpc_nodes[i].cpu_pmu_caps);
> - zfree(&env->hybrid_cpc_nodes[i].pmu_name);
> + for (i = 0; i < env->nr_pmus_with_caps; i++) {
> + for (j = 0; j < env->pmu_caps[i].nr_caps; j++)
> + zfree(&env->pmu_caps[i].caps[j]);
> + zfree(&env->pmu_caps[i].caps);
> + zfree(&env->pmu_caps[i].pmu_name);
> }
> - zfree(&env->hybrid_cpc_nodes);
> + zfree(&env->pmu_caps);
> }
>
> void perf_env__init(struct perf_env *env)
> @@ -531,3 +531,51 @@ int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu)
>
> return cpu.cpu >= 0 && cpu.cpu < env->nr_numa_map ? env->numa_map[cpu.cpu] : -1;
> }
> +
> +char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
> + const char *cap)
> +{
> + char *cap_eq;
> + int cap_size;
> + char **ptr;
> + int i, j;
> +
> + if (!pmu_name || !cap)
> + return NULL;
> +
> + cap_size = strlen(cap);
> + cap_eq = zalloc(cap_size + 2);
> + if (!cap_eq)
> + return NULL;
> +
> + memcpy(cap_eq, cap, cap_size);
> + cap_eq[cap_size] = '=';
> +
> + if (!strcmp(pmu_name, "cpu")) {
> + for (i = 0; i < env->nr_cpu_pmu_caps; i++) {
> + if (!strncmp(env->cpu_pmu_caps[i], cap_eq, cap_size + 1)) {
> + free(cap_eq);
> + return &env->cpu_pmu_caps[i][cap_size + 1];
> + }
> + }
> + goto out;
> + }
> +
> + for (i = 0; i < env->nr_pmus_with_caps; i++) {
> + if (strcmp(env->pmu_caps[i].pmu_name, pmu_name))
> + continue;
> +
> + ptr = env->pmu_caps[i].caps;
> +
> + for (j = 0; j < env->pmu_caps[i].nr_caps; j++) {
> + if (!strncmp(ptr[j], cap_eq, cap_size + 1)) {
> + free(cap_eq);
> + return &ptr[j][cap_size + 1];
> + }
> + }
> + }
> +
> +out:
> + free(cap_eq);
> + return NULL;
> +}
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index 43aab59f7322..4566c51f2fd9 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -43,10 +43,10 @@ struct hybrid_node {
> char *cpus;
> };
>
> -struct hybrid_cpc_node {
> - int nr_cpu_pmu_caps;
> +struct pmu_caps {
> + int nr_caps;
> unsigned int max_branches;
> - char **cpu_pmu_caps;
> + char **caps;
> char *pmu_name;
> };
>
> @@ -74,7 +74,7 @@ struct perf_env {
> int nr_groups;
> int nr_cpu_pmu_caps;
> int nr_hybrid_nodes;
> - int nr_hybrid_cpc_nodes;
> + int nr_pmus_with_caps;
> char *cmdline;
> const char **cmdline_argv;
> char *sibling_cores;
> @@ -94,7 +94,7 @@ struct perf_env {
> struct memory_node *memory_nodes;
> unsigned long long memory_bsize;
> struct hybrid_node *hybrid_nodes;
> - struct hybrid_cpc_node *hybrid_cpc_nodes;
> + struct pmu_caps *pmu_caps;
> #ifdef HAVE_LIBBPF_SUPPORT
> /*
> * bpf_info_lock protects bpf rbtrees. This is needed because the
> @@ -172,4 +172,6 @@ bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
> struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
>
> int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
> +char *perf_env__find_pmu_cap(struct perf_env *env, const char *pmu_name,
> + const char *cap);
> #endif /* __PERF_ENV_H */
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index a1e4ec53333d..67bf897e8f89 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1512,18 +1512,13 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
> return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
> }
>
> -static int write_per_cpu_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
> - bool write_pmu)
> +static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu,
> + bool write_pmu)
> {
> struct perf_pmu_caps *caps = NULL;
> - int nr_caps;
> int ret;
>
> - nr_caps = perf_pmu__caps_parse(pmu);
> - if (nr_caps < 0)
> - return nr_caps;
> -
> - ret = do_write(ff, &nr_caps, sizeof(nr_caps));
> + ret = do_write(ff, &pmu->nr_caps, sizeof(pmu->nr_caps));
The nr_caps can be retrieved from the cached perf_pmu__caps_parse().
Seems we only need to pass the nr_caps as a parameter of the
__write_pmu_caps().
> if (ret < 0)
> return ret;
>
> @@ -1550,33 +1545,60 @@ static int write_cpu_pmu_caps(struct feat_fd *ff,
> struct evlist *evlist __maybe_unused)
> {
> struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
> + int ret;
>
> if (!cpu_pmu)
> return -ENOENT;
>
> - return write_per_cpu_pmu_caps(ff, cpu_pmu, false);
> + ret = perf_pmu__caps_parse(cpu_pmu);
> + if (ret < 0)
> + return ret;
> +
> + return __write_pmu_caps(ff, cpu_pmu, false);
> }
>
> -static int write_hybrid_cpu_pmu_caps(struct feat_fd *ff,
> - struct evlist *evlist __maybe_unused)
> +static int write_pmu_caps(struct feat_fd *ff,
> + struct evlist *evlist __maybe_unused)
> {
> - struct perf_pmu *pmu;
> - u32 nr_pmu = perf_pmu__hybrid_pmu_num();
> + struct perf_pmu *pmu = NULL;
> + int nr_pmu = 0;
> int ret;
>
> - if (nr_pmu == 0)
> - return -ENOENT;
> + while ((pmu = perf_pmu__scan(pmu))) {
> + if (!pmu->name || !strcmp(pmu->name, "cpu") ||
> + perf_pmu__caps_parse(pmu) <= 0)
> + continue;
> + nr_pmu++;
> + }
>
> ret = do_write(ff, &nr_pmu, sizeof(nr_pmu));
> if (ret < 0)
> return ret;
>
> + if (!nr_pmu)
> + return 0;
> +
> + /*
> + * Write hybrid pmu caps first to maintain compatibility with
> + * older perf tool.
> + */
> + pmu = NULL;
> perf_pmu__for_each_hybrid_pmu(pmu) {
> - ret = write_per_cpu_pmu_caps(ff, pmu, true);
> + ret = __write_pmu_caps(ff, pmu, true);
> if (ret < 0)
> return ret;
> }
>
> + pmu = NULL;
> + while ((pmu = perf_pmu__scan(pmu))) {
> + if (!pmu->name || !strcmp(pmu->name, "cpu") ||
> + !pmu->nr_caps || perf_pmu__is_hybrid(pmu->name))
The perf_pmu__caps_parse() can used to replace the pmu->nr_caps.
It seems we can factor out a macro to replace the repeat check
!pmu->name || !strcmp(pmu->name, "cpu") || perf_pmu__caps_parse(pmu) <= 0.
Thanks,
Kan
> + continue;
> +
> + ret = __write_pmu_caps(ff, pmu, true);
> + if (ret < 0)
> + return ret;
> + }
> return 0;
> }
>
> @@ -2051,8 +2073,7 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
> ff->ph->env.comp_level, ff->ph->env.comp_ratio);
> }
>
> -static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char **cpu_pmu_caps,
> - char *pmu_name)
> +static void __print_pmu_caps(FILE *fp, int nr_caps, char **caps, char *pmu_name)
> {
> const char *delimiter = "";
> int i;
> @@ -2064,7 +2085,7 @@ static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char **cpu_pmu_caps,
>
> fprintf(fp, "# %s pmu capabilities: ", pmu_name);
> for (i = 0; i < nr_caps; i++) {
> - fprintf(fp, "%s%s", delimiter, cpu_pmu_caps[i]);
> + fprintf(fp, "%s%s", delimiter, caps[i]);
> delimiter = ", ";
> }
>
> @@ -2073,19 +2094,18 @@ static void print_per_cpu_pmu_caps(FILE *fp, int nr_caps, char **cpu_pmu_caps,
>
> static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
> {
> - print_per_cpu_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
> - ff->ph->env.cpu_pmu_caps, (char *)"cpu");
> + __print_pmu_caps(fp, ff->ph->env.nr_cpu_pmu_caps,
> + ff->ph->env.cpu_pmu_caps, (char *)"cpu");
> }
>
> -static void print_hybrid_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
> +static void print_pmu_caps(struct feat_fd *ff, FILE *fp)
> {
> - struct hybrid_cpc_node *n;
> + struct pmu_caps *pmu_caps;
>
> - for (int i = 0; i < ff->ph->env.nr_hybrid_cpc_nodes; i++) {
> - n = &ff->ph->env.hybrid_cpc_nodes[i];
> - print_per_cpu_pmu_caps(fp, n->nr_cpu_pmu_caps,
> - n->cpu_pmu_caps,
> - n->pmu_name);
> + for (int i = 0; i < ff->ph->env.nr_pmus_with_caps; i++) {
> + pmu_caps = &ff->ph->env.pmu_caps[i];
> + __print_pmu_caps(fp, pmu_caps->nr_caps, pmu_caps->caps,
> + pmu_caps->pmu_name);
> }
> }
>
> @@ -3196,28 +3216,27 @@ static int process_compressed(struct feat_fd *ff,
> return 0;
> }
>
> -static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
> - char ***cpu_pmu_caps,
> - unsigned int *max_branches)
> +static int __process_pmu_caps(struct feat_fd *ff, int *nr_caps,
> + char ***caps, unsigned int *max_branches)
> {
> int name_size, value_size;
> char *name, *value, *ptr;
> - u32 nr_caps, i;
> + u32 nr_pmu_caps, i;
>
> - *nr_cpu_pmu_caps = 0;
> - *cpu_pmu_caps = NULL;
> + *nr_caps = 0;
> + *caps = NULL;
>
> - if (do_read_u32(ff, &nr_caps))
> + if (do_read_u32(ff, &nr_pmu_caps))
> return -1;
>
> - if (!nr_caps)
> + if (!nr_pmu_caps)
> return 0;
>
> - *cpu_pmu_caps = zalloc(sizeof(char *) * nr_caps);
> - if (!*cpu_pmu_caps)
> + *caps = zalloc(sizeof(char *) * nr_pmu_caps);
> + if (!*caps)
> return -1;
>
> - for (i = 0; i < nr_caps; i++) {
> + for (i = 0; i < nr_pmu_caps; i++) {
> name = do_read_string(ff);
> if (!name)
> goto error;
> @@ -3235,7 +3254,7 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
> memcpy(ptr, name, name_size);
> ptr[name_size] = '=';
> memcpy(ptr + name_size + 1, value, value_size);
> - (*cpu_pmu_caps)[i] = ptr;
> + (*caps)[i] = ptr;
>
> if (!strcmp(name, "branches"))
> *max_branches = atoi(value);
> @@ -3243,7 +3262,7 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
> free(value);
> free(name);
> }
> - *nr_cpu_pmu_caps = nr_caps;
> + *nr_caps = nr_pmu_caps;
> return 0;
>
> free_value:
> @@ -3252,29 +3271,28 @@ static int process_per_cpu_pmu_caps(struct feat_fd *ff, int *nr_cpu_pmu_caps,
> free(name);
> error:
> for (; i > 0; i--)
> - free((*cpu_pmu_caps)[i - 1]);
> - free(*cpu_pmu_caps);
> - *cpu_pmu_caps = NULL;
> - *nr_cpu_pmu_caps = 0;
> + free((*caps)[i - 1]);
> + free(*caps);
> + *caps = NULL;
> + *nr_caps = 0;
> return -1;
> }
>
> static int process_cpu_pmu_caps(struct feat_fd *ff,
> void *data __maybe_unused)
> {
> - int ret = process_per_cpu_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
> - &ff->ph->env.cpu_pmu_caps,
> - &ff->ph->env.max_branches);
> + int ret = __process_pmu_caps(ff, &ff->ph->env.nr_cpu_pmu_caps,
> + &ff->ph->env.cpu_pmu_caps,
> + &ff->ph->env.max_branches);
>
> if (!ret && !ff->ph->env.cpu_pmu_caps)
> pr_debug("cpu pmu capabilities not available\n");
> return ret;
> }
>
> -static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff,
> - void *data __maybe_unused)
> +static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
> {
> - struct hybrid_cpc_node *nodes;
> + struct pmu_caps *pmu_caps;
> u32 nr_pmu, i;
> int ret;
> int j;
> @@ -3283,45 +3301,45 @@ static int process_hybrid_cpu_pmu_caps(struct feat_fd *ff,
> return -1;
>
> if (!nr_pmu) {
> - pr_debug("hybrid cpu pmu capabilities not available\n");
> + pr_debug("pmu capabilities not available\n");
> return 0;
> }
>
> - nodes = zalloc(sizeof(*nodes) * nr_pmu);
> - if (!nodes)
> + pmu_caps = zalloc(sizeof(*pmu_caps) * nr_pmu);
> + if (!pmu_caps)
> return -ENOMEM;
>
> for (i = 0; i < nr_pmu; i++) {
> - struct hybrid_cpc_node *n = &nodes[i];
> -
> - ret = process_per_cpu_pmu_caps(ff, &n->nr_cpu_pmu_caps,
> - &n->cpu_pmu_caps,
> - &n->max_branches);
> + ret = __process_pmu_caps(ff, &pmu_caps[i].nr_caps,
> + &pmu_caps[i].caps,
> + &pmu_caps[i].max_branches);
> if (ret)
> goto err;
>
> - n->pmu_name = do_read_string(ff);
> - if (!n->pmu_name) {
> + pmu_caps[i].pmu_name = do_read_string(ff);
> + if (!pmu_caps[i].pmu_name) {
> ret = -1;
> goto err;
> }
> - if (!n->nr_cpu_pmu_caps)
> - pr_debug("%s pmu capabilities not available\n", n->pmu_name);
> + if (!pmu_caps[i].nr_caps) {
> + pr_debug("%s pmu capabilities not available\n",
> + pmu_caps[i].pmu_name);
> + }
> }
>
> - ff->ph->env.nr_hybrid_cpc_nodes = nr_pmu;
> - ff->ph->env.hybrid_cpc_nodes = nodes;
> + ff->ph->env.nr_pmus_with_caps = nr_pmu;
> + ff->ph->env.pmu_caps = pmu_caps;
> return 0;
>
> err:
> for (i = 0; i < nr_pmu; i++) {
> - for (j = 0; j < nodes[i].nr_cpu_pmu_caps; j++)
> - free(nodes[i].cpu_pmu_caps[j]);
> - free(nodes[i].cpu_pmu_caps);
> - free(nodes[i].pmu_name);
> + for (j = 0; j < pmu_caps[i].nr_caps; j++)
> + free(pmu_caps[i].caps[j]);
> + free(pmu_caps[i].caps);
> + free(pmu_caps[i].pmu_name);
> }
>
> - free(nodes);
> + free(pmu_caps);
> return ret;
> }
>
> @@ -3387,7 +3405,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
> FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
> FEAT_OPR(CLOCK_DATA, clock_data, false),
> FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
> - FEAT_OPR(HYBRID_CPU_PMU_CAPS, hybrid_cpu_pmu_caps, false),
> + FEAT_OPR(PMU_CAPS, pmu_caps, false),
> };
>
> struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index 08563c1f1bff..5790bf556b90 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -46,7 +46,7 @@ enum {
> HEADER_CPU_PMU_CAPS,
> HEADER_CLOCK_DATA,
> HEADER_HYBRID_TOPOLOGY,
> - HEADER_HYBRID_CPU_PMU_CAPS,
> + HEADER_PMU_CAPS,
> HEADER_LAST_FEATURE,
> HEADER_FEAT_BITS = 256,
> };
Powered by blists - more mailing lists