[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260119175833.340369-3-swapnil.sapkal@amd.com>
Date: Mon, 19 Jan 2026 17:58:24 +0000
From: Swapnil Sapkal <swapnil.sapkal@....com>
To: <peterz@...radead.org>, <mingo@...hat.com>, <acme@...nel.org>,
<namhyung@...nel.org>, <irogers@...gle.com>, <james.clark@....com>
CC: <ravi.bangoria@....com>, <swapnil.sapkal@....com>, <yu.c.chen@...el.com>,
<mark.rutland@....com>, <alexander.shishkin@...ux.intel.com>,
<jolsa@...nel.org>, <rostedt@...dmis.org>, <vincent.guittot@...aro.org>,
<adrian.hunter@...el.com>, <kan.liang@...ux.intel.com>,
<gautham.shenoy@....com>, <kprateek.nayak@....com>, <juri.lelli@...hat.com>,
<yangjihong@...edance.com>, <void@...ifault.com>, <tj@...nel.org>,
<sshegde@...ux.ibm.com>, <ctshao@...gle.com>, <quic_zhonhan@...cinc.com>,
<thomas.falcon@...el.com>, <blakejones@...gle.com>, <ashelat@...hat.com>,
<leo.yan@....com>, <dvyukov@...gle.com>, <ak@...ux.intel.com>,
<yujie.liu@...el.com>, <graham.woodward@....com>, <ben.gainey@....com>,
<vineethr@...ux.ibm.com>, <tim.c.chen@...ux.intel.com>, <linux@...blig.org>,
<santosh.shukla@....com>, <sandipan.das@....com>,
<linux-kernel@...r.kernel.org>, <linux-perf-users@...r.kernel.org>
Subject: [PATCH v5 02/10] perf header: Support CPU DOMAIN relation info
'/proc/schedstat' gives the info about load balancing statistics within
a given domain. It also contains the cpu_mask giving information about
the sibling cpus and domain names after schedstat version 17. Storing
this information in perf header will help tools like `perf sched stats`
for better analysis.
Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
---
.../Documentation/perf.data-file-format.txt | 17 ++
tools/perf/builtin-inject.c | 1 +
tools/perf/util/env.c | 29 ++
tools/perf/util/env.h | 17 ++
tools/perf/util/header.c | 286 ++++++++++++++++++
tools/perf/util/header.h | 1 +
tools/perf/util/util.c | 42 +++
tools/perf/util/util.h | 3 +
8 files changed, 396 insertions(+)
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index c9d4dec65344..0e4d0ecc9e12 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -447,6 +447,23 @@ struct {
} [nr_pmu];
};
+ HEADER_CPU_DOMAIN_INFO = 32,
+
+List of cpu-domain relation info. The format of the data is as below.
+
+struct domain_info {
+ int domain;
+ char dname[];
+ char cpumask[];
+ char cpulist[];
+};
+
+struct cpu_domain_info {
+ int cpu;
+ int nr_domains;
+ struct domain_info domains[];
+};
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 6080afec537d..587c180035b2 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -2047,6 +2047,7 @@ static bool keep_feat(struct perf_inject *inject, int feat)
case HEADER_CLOCK_DATA:
case HEADER_HYBRID_TOPOLOGY:
case HEADER_PMU_CAPS:
+ case HEADER_CPU_DOMAIN_INFO:
return true;
/* Information that can be updated */
case HEADER_BUILD_ID:
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index f1626d2032cd..93d475a80f14 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -216,6 +216,34 @@ static void perf_env__purge_bpf(struct perf_env *env __maybe_unused)
}
#endif // HAVE_LIBBPF_SUPPORT
+void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr)
+{
+ if (!cd_map)
+ return;
+
+ for (u32 i = 0; i < nr; i++) {
+ if (!cd_map[i])
+ continue;
+
+ for (u32 j = 0; j < cd_map[i]->nr_domains; j++) {
+ struct domain_info *d_info = cd_map[i]->domains[j];
+
+ if (!d_info)
+ continue;
+
+ if (schedstat_version >= 17)
+ zfree(&d_info->dname);
+
+ zfree(&d_info->cpumask);
+ zfree(&d_info->cpulist);
+ zfree(&d_info);
+ }
+ zfree(&cd_map[i]->domains);
+ zfree(&cd_map[i]);
+ }
+ zfree(&cd_map);
+}
+
void perf_env__exit(struct perf_env *env)
{
int i, j;
@@ -265,6 +293,7 @@ void perf_env__exit(struct perf_env *env)
zfree(&env->pmu_caps[i].pmu_name);
}
zfree(&env->pmu_caps);
+ free_cpu_domain_info(env->cpu_domain, env->schedstat_version, env->nr_cpus_avail);
}
void perf_env__init(struct perf_env *env)
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 9977b85523a8..76ba1a36e9ff 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -54,6 +54,19 @@ struct pmu_caps {
char *pmu_name;
};
+struct domain_info {
+ u32 domain;
+ char *dname;
+ char *cpumask;
+ char *cpulist;
+};
+
+struct cpu_domain_map {
+ u32 cpu;
+ u32 nr_domains;
+ struct domain_info **domains;
+};
+
typedef const char *(arch_syscalls__strerrno_t)(int err);
struct perf_env {
@@ -70,6 +83,8 @@ struct perf_env {
unsigned int max_branches;
unsigned int br_cntr_nr;
unsigned int br_cntr_width;
+ unsigned int schedstat_version;
+ unsigned int max_sched_domains;
int kernel_is_64_bit;
int nr_cmdline;
@@ -92,6 +107,7 @@ struct perf_env {
char **cpu_pmu_caps;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
+ struct cpu_domain_map **cpu_domain;
int caches_cnt;
u32 comp_ratio;
u32 comp_ver;
@@ -151,6 +167,7 @@ struct bpf_prog_info_node;
struct btf_node;
int perf_env__read_core_pmu_caps(struct perf_env *env);
+void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr);
void perf_env__exit(struct perf_env *env);
int perf_env__kernel_is_64_bit(struct perf_env *env);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index f5cad377c99e..673d53bb2a2c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1614,6 +1614,162 @@ static int write_pmu_caps(struct feat_fd *ff,
return 0;
}
+static struct cpu_domain_map **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains,
+ u32 nr)
+{
+ struct domain_info *domain_info;
+ struct cpu_domain_map **cd_map;
+ char dname[16], cpumask[256];
+ char cpulist[1024];
+ char *line = NULL;
+ u32 cpu, domain;
+ u32 dcount = 0;
+ size_t len;
+ FILE *fp;
+
+ fp = fopen("/proc/schedstat", "r");
+ if (!fp) {
+ pr_err("Failed to open /proc/schedstat\n");
+ return NULL;
+ }
+
+ cd_map = zalloc(sizeof(*cd_map) * nr);
+ if (!cd_map)
+ goto out;
+
+ while (getline(&line, &len, fp) > 0) {
+ int retval;
+
+ if (strncmp(line, "version", 7) == 0) {
+ retval = sscanf(line, "version %d\n", schedstat_version);
+ if (retval != 1)
+ continue;
+
+ } else if (strncmp(line, "cpu", 3) == 0) {
+ retval = sscanf(line, "cpu%u %*s", &cpu);
+ if (retval == 1) {
+ cd_map[cpu] = zalloc(sizeof(*cd_map[cpu]));
+ if (!cd_map[cpu])
+ goto out_free_line;
+ cd_map[cpu]->cpu = cpu;
+ } else
+ continue;
+
+ dcount = 0;
+ } else if (strncmp(line, "domain", 6) == 0) {
+ struct domain_info **temp_domains;
+
+ dcount++;
+ temp_domains = realloc(cd_map[cpu]->domains, dcount * sizeof(domain_info));
+ if (!temp_domains)
+ goto out_free_line;
+ else
+ cd_map[cpu]->domains = temp_domains;
+
+ domain_info = zalloc(sizeof(*domain_info));
+ if (!domain_info)
+ goto out_free_line;
+
+ cd_map[cpu]->domains[dcount - 1] = domain_info;
+
+ if (*schedstat_version >= 17) {
+ retval = sscanf(line, "domain%u %s %s %*s", &domain, dname,
+ cpumask);
+ if (retval != 3)
+ continue;
+
+ domain_info->dname = strdup(dname);
+ if (!domain_info->dname)
+ goto out_free_line;
+ } else {
+ retval = sscanf(line, "domain%u %s %*s", &domain, cpumask);
+ if (retval != 2)
+ continue;
+ }
+
+ domain_info->domain = domain;
+ if (domain > *max_sched_domains)
+ *max_sched_domains = domain;
+
+ domain_info->cpumask = strdup(cpumask);
+ if (!domain_info->cpumask)
+ goto out_free_line;
+
+ cpumask_to_cpulist(cpumask, cpulist);
+ domain_info->cpulist = strdup(cpulist);
+ if (!domain_info->cpulist)
+ goto out_free_line;
+
+ cd_map[cpu]->nr_domains = dcount;
+ }
+ }
+
+out_free_line:
+ free(line);
+out:
+ fclose(fp);
+ return cd_map;
+}
+
+static int write_cpu_domain_info(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ u32 max_sched_domains = 0, schedstat_version = 0;
+ struct cpu_domain_map **cd_map;
+ u32 i, j, nr, ret;
+
+ nr = cpu__max_present_cpu().cpu;
+
+ cd_map = build_cpu_domain_map(&schedstat_version, &max_sched_domains, nr);
+ if (!cd_map)
+ return -1;
+
+ ret = do_write(ff, &schedstat_version, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ max_sched_domains += 1;
+ ret = do_write(ff, &max_sched_domains, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ if (!cd_map[i])
+ continue;
+
+ ret = do_write(ff, &cd_map[i]->cpu, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &cd_map[i]->nr_domains, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ for (j = 0; j < cd_map[i]->nr_domains; j++) {
+ ret = do_write(ff, &cd_map[i]->domains[j]->domain, sizeof(u32));
+ if (ret < 0)
+ goto out;
+ if (schedstat_version >= 17) {
+ ret = do_write_string(ff, cd_map[i]->domains[j]->dname);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = do_write_string(ff, cd_map[i]->domains[j]->cpumask);
+ if (ret < 0)
+ goto out;
+
+ ret = do_write_string(ff, cd_map[i]->domains[j]->cpulist);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+out:
+ free_cpu_domain_info(cd_map, schedstat_version, nr);
+ return ret;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -2247,6 +2403,39 @@ static void print_mem_topology(struct feat_fd *ff, FILE *fp)
}
}
+static void print_cpu_domain_info(struct feat_fd *ff, FILE *fp)
+{
+ struct cpu_domain_map **cd_map = ff->ph->env.cpu_domain;
+ u32 nr = ff->ph->env.nr_cpus_avail;
+ struct domain_info *d_info;
+ u32 i, j;
+
+ fprintf(fp, "# schedstat version : %u\n", ff->ph->env.schedstat_version);
+ fprintf(fp, "# Maximum sched domains : %u\n", ff->ph->env.max_sched_domains);
+
+ for (i = 0; i < nr; i++) {
+ if (!cd_map[i])
+ continue;
+
+ fprintf(fp, "# cpu : %u\n", cd_map[i]->cpu);
+ fprintf(fp, "# nr_domains : %u\n", cd_map[i]->nr_domains);
+
+ for (j = 0; j < cd_map[i]->nr_domains; j++) {
+ d_info = cd_map[i]->domains[j];
+ if (!d_info)
+ continue;
+
+ fprintf(fp, "# Domain : %u\n", d_info->domain);
+
+ if (ff->ph->env.schedstat_version >= 17)
+ fprintf(fp, "# Domain name : %s\n", d_info->dname);
+
+ fprintf(fp, "# Domain cpu map : %s\n", d_info->cpumask);
+ fprintf(fp, "# Domain cpu list : %s\n", d_info->cpulist);
+ }
+ }
+}
+
static int __event_process_build_id(struct perf_record_header_build_id *bev,
char *filename,
struct perf_session *session)
@@ -3388,6 +3577,102 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
return ret;
}
+static int process_cpu_domain_info(struct feat_fd *ff, void *data __maybe_unused)
+{
+ u32 schedstat_version, max_sched_domains, cpu, domain, nr_domains;
+ struct perf_env *env = &ff->ph->env;
+ char *dname, *cpumask, *cpulist;
+ struct cpu_domain_map **cd_map;
+ struct domain_info *d_info;
+ u32 nra, nr, i, j;
+ int ret;
+
+ nra = env->nr_cpus_avail;
+ nr = env->nr_cpus_online;
+
+ cd_map = zalloc(sizeof(*cd_map) * nra);
+ if (!cd_map)
+ return -1;
+
+ env->cpu_domain = cd_map;
+
+ ret = do_read_u32(ff, &schedstat_version);
+ if (ret)
+ return ret;
+
+ env->schedstat_version = schedstat_version;
+
+ ret = do_read_u32(ff, &max_sched_domains);
+ if (ret)
+ return ret;
+
+ env->max_sched_domains = max_sched_domains;
+
+ for (i = 0; i < nr; i++) {
+ if (do_read_u32(ff, &cpu))
+ return -1;
+
+ cd_map[cpu] = zalloc(sizeof(*cd_map[cpu]));
+ if (!cd_map[cpu])
+ return -1;
+
+ cd_map[cpu]->cpu = cpu;
+
+ if (do_read_u32(ff, &nr_domains))
+ return -1;
+
+ cd_map[cpu]->nr_domains = nr_domains;
+
+ cd_map[cpu]->domains = zalloc(sizeof(*d_info) * max_sched_domains);
+ if (!cd_map[cpu]->domains)
+ return -1;
+
+ for (j = 0; j < nr_domains; j++) {
+ if (do_read_u32(ff, &domain))
+ return -1;
+
+ d_info = zalloc(sizeof(*d_info));
+ if (!d_info)
+ return -1;
+
+ cd_map[cpu]->domains[domain] = d_info;
+ d_info->domain = domain;
+
+ if (schedstat_version >= 17) {
+ dname = do_read_string(ff);
+ if (!dname)
+ return -1;
+
+ d_info->dname = zalloc(strlen(dname) + 1);
+ if (!d_info->dname)
+ return -1;
+
+ d_info->dname = strdup(dname);
+ }
+
+ cpumask = do_read_string(ff);
+ if (!cpumask)
+ return -1;
+
+ d_info->cpumask = zalloc(strlen(cpumask) + 1);
+ if (!d_info->cpumask)
+ return -1;
+ d_info->cpumask = strdup(cpumask);
+
+ cpulist = do_read_string(ff);
+ if (!cpulist)
+ return -1;
+
+ d_info->cpulist = zalloc(strlen(cpulist) + 1);
+ if (!d_info->cpulist)
+ return -1;
+ d_info->cpulist = strdup(cpulist);
+ }
+ }
+
+ return ret;
+}
+
#define FEAT_OPR(n, func, __full_only) \
[HEADER_##n] = { \
.name = __stringify(n), \
@@ -3453,6 +3738,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(CLOCK_DATA, clock_data, false),
FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true),
FEAT_OPR(PMU_CAPS, pmu_caps, false),
+ FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true),
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index c058021c3150..c62f3275a80f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -53,6 +53,7 @@ enum {
HEADER_CLOCK_DATA,
HEADER_HYBRID_TOPOLOGY,
HEADER_PMU_CAPS,
+ HEADER_CPU_DOMAIN_INFO,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 0f031eb80b4c..b87ff96a9f45 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -257,6 +257,48 @@ static int rm_rf_kcore_dir(const char *path)
return 0;
}
+void cpumask_to_cpulist(char *cpumask, char *cpulist)
+{
+ int i, j, bm_size, nbits;
+ int len = strlen(cpumask);
+ unsigned long *bm;
+ char cpus[1024];
+
+ for (i = 0; i < len; i++) {
+ if (cpumask[i] == ',') {
+ for (j = i; j < len; j++)
+ cpumask[j] = cpumask[j + 1];
+ }
+ }
+
+ len = strlen(cpumask);
+ bm_size = (len + 15) / 16;
+ nbits = bm_size * 64;
+ if (nbits <= 0)
+ return;
+
+ bm = calloc(bm_size, sizeof(unsigned long));
+ if (!cpumask)
+ goto free_bm;
+
+ for (i = 0; i < bm_size; i++) {
+ char blk[17];
+ int blklen = len > 16 ? 16 : len;
+
+ strncpy(blk, cpumask + len - blklen, blklen);
+ blk[blklen] = '\0';
+ bm[i] = strtoul(blk, NULL, 16);
+ cpumask[len - blklen] = '\0';
+ len = strlen(cpumask);
+ }
+
+ bitmap_scnprintf(bm, nbits, cpus, sizeof(cpus));
+ strcpy(cpulist, cpus);
+
+free_bm:
+ free(bm);
+}
+
int rm_rf_perf_data(const char *path)
{
const char *pat[] = {
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 3423778e39a5..1572c8cf04e5 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -11,6 +11,7 @@
#include <stdbool.h>
#include <stddef.h>
#include <linux/compiler.h>
+#include <linux/bitmap.h>
#include <sys/types.h>
#ifndef __cplusplus
#include <internal/cpumap.h>
@@ -48,6 +49,8 @@ bool sysctl__nmi_watchdog_enabled(void);
int perf_tip(char **strp, const char *dirpath);
+void cpumask_to_cpulist(char *cpumask, char *cpulist);
+
#ifndef HAVE_SCHED_GETCPU_SUPPORT
int sched_getcpu(void);
#endif
--
2.43.0
Powered by blists - more mailing lists