lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aVhGJJBp0F5kygFt@google.com>
Date: Fri, 2 Jan 2026 14:26:44 -0800
From: Namhyung Kim <namhyung@...nel.org>
To: Swapnil Sapkal <swapnil.sapkal@....com>
Cc: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
	irogers@...gle.com, james.clark@....com, ravi.bangoria@....com,
	yu.c.chen@...el.com, mark.rutland@....com,
	alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
	rostedt@...dmis.org, vincent.guittot@...aro.org,
	adrian.hunter@...el.com, kan.liang@...ux.intel.com,
	gautham.shenoy@....com, kprateek.nayak@....com,
	juri.lelli@...hat.com, yangjihong@...edance.com, void@...ifault.com,
	tj@...nel.org, sshegde@...ux.ibm.com, ctshao@...gle.com,
	quic_zhonhan@...cinc.com, thomas.falcon@...el.com,
	blakejones@...gle.com, ashelat@...hat.com, leo.yan@....com,
	dvyukov@...gle.com, ak@...ux.intel.com, yujie.liu@...el.com,
	graham.woodward@....com, ben.gainey@....com, vineethr@...ux.ibm.com,
	tim.c.chen@...ux.intel.com, linux@...blig.org,
	linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
	santosh.shukla@....com, sandipan.das@....com
Subject: Re: [PATCH RESEND v4 03/11] perf header: Support CPU DOMAIN relation
 info

On Tue, Sep 09, 2025 at 11:42:19AM +0000, Swapnil Sapkal wrote:
> '/proc/schedstat' gives the info about load balancing statistics within
> a given domain. It also contains the cpu_mask giving information about
> the sibling cpus and domain names after schedstat version 17. Storing
> this information in perf header will help tools like `perf sched stats`
> for better analysis.
> 
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
> ---
>  .../Documentation/perf.data-file-format.txt   |  17 +
>  tools/perf/builtin-inject.c                   |   1 +
>  tools/perf/util/env.h                         |  16 +
>  tools/perf/util/header.c                      | 304 ++++++++++++++++++
>  tools/perf/util/header.h                      |   1 +
>  tools/perf/util/util.c                        |  42 +++
>  tools/perf/util/util.h                        |   3 +
>  7 files changed, 384 insertions(+)
> 
> diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
> index cd95ba09f727..92dbba1003cf 100644
> --- a/tools/perf/Documentation/perf.data-file-format.txt
> +++ b/tools/perf/Documentation/perf.data-file-format.txt
> @@ -437,6 +437,23 @@ struct {
>  	} [nr_pmu];
>  };
>  
> +	HEADER_CPU_DOMAIN_INFO = 32,
> +
> +List of cpu-domain relation info. The format of the data is as below.
> +
> +struct domain_info {
> +	int domain;
> +	char dname[];
> +	char cpumask[];
> +	char cpulist[];
> +};
> +
> +struct cpu_domain_info {
> +	int cpu;
> +	int nr_domains;
> +	struct domain_info domains[];
> +};
> +
>  	other bits are reserved and should ignored for now
>  	HEADER_FEAT_BITS	= 256,
>  
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index a114b3fa1bea..f43a7ec44b5f 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -2058,6 +2058,7 @@ static bool keep_feat(int feat)
>  	case HEADER_CLOCK_DATA:
>  	case HEADER_HYBRID_TOPOLOGY:
>  	case HEADER_PMU_CAPS:
> +	case HEADER_CPU_DOMAIN_INFO:
>  		return true;
>  	/* Information that can be updated */
>  	case HEADER_BUILD_ID:
> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
> index e00179787a34..71034c4b4488 100644
> --- a/tools/perf/util/env.h
> +++ b/tools/perf/util/env.h
> @@ -54,6 +54,19 @@ struct pmu_caps {
>  	char            *pmu_name;
>  };
>  
> +struct domain_info {
> +	u32	domain;
> +	char	*dname;
> +	char	*cpumask;
> +	char	*cpulist;
> +};
> +
> +struct cpu_domain_map {
> +	u32			cpu;
> +	u32			nr_domains;
> +	struct domain_info	**domains;
> +};
> +
>  typedef const char *(arch_syscalls__strerrno_t)(int err);
>  
>  struct perf_env {
> @@ -70,6 +83,8 @@ struct perf_env {
>  	unsigned int		max_branches;
>  	unsigned int		br_cntr_nr;
>  	unsigned int		br_cntr_width;
> +	unsigned int		schedstat_version;
> +	unsigned int		max_sched_domains;
>  	int			kernel_is_64_bit;
>  
>  	int			nr_cmdline;
> @@ -92,6 +107,7 @@ struct perf_env {
>  	char			**cpu_pmu_caps;
>  	struct cpu_topology_map	*cpu;
>  	struct cpu_cache_level	*caches;
> +	struct cpu_domain_map	**cpu_domain;
>  	int			 caches_cnt;
>  	u32			comp_ratio;
>  	u32			comp_ver;
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 4f2a6e10ed5c..7ff7434bac2c 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -1621,6 +1621,184 @@ static int write_pmu_caps(struct feat_fd *ff,
>  	return 0;
>  }
>  
> +static void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr)
> +{
> +	for (u32 i = 0; i < nr; i++) {
> +		if (cd_map[i]->domains) {
> +			for (u32 j = 0; j < cd_map[i]->nr_domains; j++) {
> +				struct domain_info *d_info = cd_map[i]->domains[j];
> +

I'm not sure if it needs a NULL check for d_info before access.


> +				if (schedstat_version >= 17)
> +					free(d_info->dname);
> +
> +				free(d_info->cpumask);
> +				free(d_info->cpulist);
> +			}
> +			free(cd_map[i]->domains);
> +		}
> +	}
> +
> +	free(cd_map);
> +}
> +
> +static struct cpu_domain_map  **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains,
> +						     u32 nr)
> +{
> +	struct domain_info *domain_info;
> +	struct cpu_domain_map **cd_map;
> +	char dname[16], cpumask[256];
> +	char cpulist[1024];
> +	char *line = NULL;
> +	u32 cpu, domain;
> +	u32 dcount = 0;
> +	size_t len;
> +	FILE *fp;
> +
> +	fp = fopen("/proc/schedstat", "r");
> +	if (!fp) {
> +		pr_err("Failed to open /proc/schedstat\n");
> +		return NULL;
> +	}
> +
> +	cd_map = calloc(nr, sizeof(*cd_map));
> +	if (!cd_map)
> +		goto out;
> +
> +	while (getline(&line, &len, fp) > 0) {
> +		int retval;
> +
> +		if (strncmp(line, "version", 7) == 0) {
> +			retval = sscanf(line, "version %d\n", schedstat_version);
> +			if (retval != 1)
> +				continue;
> +
> +		} else if (strncmp(line, "cpu", 3) == 0) {
> +			retval = sscanf(line, "cpu%u %*s", &cpu);
> +			if (retval == 1) {
> +				cd_map[cpu] = calloc(1, sizeof(*cd_map[cpu]));
> +				if (!cd_map[cpu])
> +					goto out_free_line;
> +				cd_map[cpu]->cpu = cpu;
> +			} else
> +				continue;
> +
> +			dcount = 0;
> +		} else if (strncmp(line, "domain", 6) == 0) {
> +			dcount++;
> +
> +			cd_map[cpu]->domains = realloc(cd_map[cpu]->domains,
> +						       dcount * sizeof(domain_info));
> +			if (!cd_map[cpu]->domains)
> +				goto out_free_line;

Please use a temporary variable to save the result in order to not lose
the original pointer in case of failure.

> +
> +			domain_info = calloc(1, sizeof(*domain_info));
> +			if (!domain_info)
> +				goto out_free_line;
> +
> +			cd_map[cpu]->domains[dcount - 1] = domain_info;
> +
> +			if (*schedstat_version >= 17) {
> +				retval = sscanf(line, "domain%u %s %s %*s", &domain, dname,
> +						cpumask);
> +				if (retval != 3)
> +					continue;
> +
> +				domain_info->dname = calloc(strlen(dname) + 1, sizeof(char));
> +				if (!domain_info->dname)
> +					goto out_free_line;
> +
> +				strcpy(domain_info->dname, dname);

This can be simply:
				domain_info->dname = strdup(dname);


> +			} else {
> +				retval = sscanf(line, "domain%u %s %*s", &domain, cpumask);
> +				if (retval != 2)
> +					continue;
> +			}
> +
> +			domain_info->domain = domain;
> +			if (domain > *max_sched_domains)
> +				*max_sched_domains = domain;
> +
> +			domain_info->cpumask = calloc(strlen(cpumask) + 1, sizeof(char));
> +			if (!domain_info->cpumask)
> +				goto out_free_line;
> +
> +			strcpy(domain_info->cpumask, cpumask);
> +
> +			cpumask_to_cpulist(cpumask, cpulist);
> +			domain_info->cpulist = calloc(strlen(cpulist) + 1, sizeof(char));
> +			if (!domain_info->cpulist)
> +				goto out_free_line;

All error paths should call free_cpu_domain_info() at some point and
free the intermediate domain_info properly.

> +
> +			strcpy(domain_info->cpulist, cpulist);
> +			cd_map[cpu]->nr_domains = dcount;
> +		}
> +	}
> +
> +out_free_line:
> +	free(line);
> +out:
> +	fclose(fp);
> +	return cd_map;
> +}
> +
> +static int write_cpu_domain_info(struct feat_fd *ff,
> +				 struct evlist *evlist __maybe_unused)
> +{
> +	u32 max_sched_domains = 0, schedstat_version = 0;
> +	struct cpu_domain_map **cd_map;
> +	u32 i, j, nr, ret;
> +
> +	nr = cpu__max_present_cpu().cpu;
> +
> +	cd_map = build_cpu_domain_map(&schedstat_version, &max_sched_domains, nr);
> +	if (!cd_map)
> +		return -1;
> +
> +	ret = do_write(ff, &schedstat_version, sizeof(u32));
> +	if (ret < 0)
> +		goto out;
> +
> +	max_sched_domains += 1;
> +	ret = do_write(ff, &max_sched_domains, sizeof(u32));
> +	if (ret < 0)
> +		goto out;
> +
> +	for (i = 0; i < nr; i++) {
> +		if (cd_map[i]->domains) {

Is it supposed to have NULL domains?  Anyway it'd be nice if you can
skip the case like with 'continue' statement to reduce the indentation
level.

> +			ret = do_write(ff, &cd_map[i]->cpu, sizeof(u32));
> +			if (ret < 0)
> +				goto out;
> +
> +			ret = do_write(ff, &cd_map[i]->nr_domains, sizeof(u32));
> +			if (ret < 0)
> +				goto out;
> +
> +			for (j = 0; j < cd_map[i]->nr_domains; j++) {
> +				ret = do_write(ff, &cd_map[i]->domains[j]->domain, sizeof(u32));
> +				if (ret < 0)
> +					goto out;
> +				if (schedstat_version >= 17) {
> +					ret = do_write_string(ff, cd_map[i]->domains[j]->dname);
> +					if (ret < 0)
> +						goto out;
> +				}
> +
> +				ret = do_write_string(ff, cd_map[i]->domains[j]->cpumask);
> +				if (ret < 0)
> +					goto out;
> +
> +				ret = do_write_string(ff, cd_map[i]->domains[j]->cpulist);
> +				if (ret < 0)
> +					goto out;
> +			}
> +		}
> +	}
> +
> +out:
> +	free_cpu_domain_info(cd_map, schedstat_version, nr);
> +	return ret;
> +}
> +
>  static void print_hostname(struct feat_fd *ff, FILE *fp)
>  {
>  	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
> @@ -2254,6 +2432,35 @@ static void print_mem_topology(struct feat_fd *ff, FILE *fp)
>  	}
>  }
>  
> +static void print_cpu_domain_info(struct feat_fd *ff, FILE *fp)
> +{
> +	struct cpu_domain_map **cd_map = ff->ph->env.cpu_domain;
> +	u32 nr = ff->ph->env.nr_cpus_avail;
> +	struct domain_info *d_info;
> +	u32 i, j;
> +
> +	fprintf(fp, "# schedstat version	: %u\n", ff->ph->env.schedstat_version);
> +	fprintf(fp, "# Maximum sched domains	: %u\n", ff->ph->env.max_sched_domains);
> +
> +	for (i = 0; i < nr; i++) {
> +		if (cd_map[i]->domains) {

Ditto.

> +			fprintf(fp, "# cpu		: %u\n", cd_map[i]->cpu);
> +			fprintf(fp, "# nr_domains	: %u\n", cd_map[i]->nr_domains);
> +
> +			for (j = 0; j < cd_map[i]->nr_domains; j++) {
> +				d_info = cd_map[i]->domains[j];
> +				fprintf(fp, "# Domain		: %u\n", d_info->domain);
> +
> +				if (ff->ph->env.schedstat_version >= 17)
> +					fprintf(fp, "# Domain name      : %s\n", d_info->dname);
> +
> +				fprintf(fp, "# Domain cpu map   : %s\n", d_info->cpumask);
> +				fprintf(fp, "# Domain cpu list  : %s\n", d_info->cpulist);
> +			}
> +		}
> +	}
> +}
> +
>  static int __event_process_build_id(struct perf_record_header_build_id *bev,
>  				    char *filename,
>  				    struct perf_session *session)
> @@ -3395,6 +3602,102 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
>  	return ret;
>  }
>  
> +static int process_cpu_domain_info(struct feat_fd *ff, void *data __maybe_unused)
> +{
> +	u32 schedstat_version, max_sched_domains, cpu, domain, nr_domains;
> +	struct perf_env *env = &ff->ph->env;
> +	char *dname, *cpumask, *cpulist;
> +	struct cpu_domain_map **cd_map;
> +	struct domain_info *d_info;
> +	u32 nra, nr, i, j;
> +	int ret;
> +
> +	nra = env->nr_cpus_avail;
> +	nr = env->nr_cpus_online;
> +
> +	cd_map = calloc(nra, sizeof(*cd_map));
> +	if (!cd_map)
> +		return -1;
> +
> +	env->cpu_domain = cd_map;

Where is it freed?

Thanks,
Namhyung

> +
> +	ret = do_read_u32(ff, &schedstat_version);
> +	if (ret)
> +		return ret;
> +
> +	env->schedstat_version = schedstat_version;
> +
> +	ret = do_read_u32(ff, &max_sched_domains);
> +	if (ret)
> +		return ret;
> +
> +	env->max_sched_domains = max_sched_domains;
> +
> +	for (i = 0; i < nr; i++) {
> +		if (do_read_u32(ff, &cpu))
> +			return -1;
> +
> +		cd_map[cpu] = calloc(1, sizeof(*cd_map[cpu]));
> +		if (!cd_map[cpu])
> +			return -1;
> +
> +		cd_map[cpu]->cpu = cpu;
> +
> +		if (do_read_u32(ff, &nr_domains))
> +			return -1;
> +
> +		cd_map[cpu]->nr_domains = nr_domains;
> +
> +		cd_map[cpu]->domains = calloc(max_sched_domains, sizeof(*d_info));
> +		if (!cd_map[cpu]->domains)
> +			return -1;
> +
> +		for (j = 0; j < nr_domains; j++) {
> +			if (do_read_u32(ff, &domain))
> +				return -1;
> +
> +			d_info = calloc(1, sizeof(*d_info));
> +			if (!d_info)
> +				return -1;
> +
> +			cd_map[cpu]->domains[domain] = d_info;
> +			d_info->domain = domain;
> +
> +			if (schedstat_version >= 17) {
> +				dname = do_read_string(ff);
> +				if (!dname)
> +					return -1;
> +
> +				d_info->dname = calloc(strlen(dname) + 1, sizeof(char));
> +				if (!d_info->dname)
> +					return -1;
> +
> +				strcpy(d_info->dname, dname);
> +			}
> +
> +			cpumask = do_read_string(ff);
> +			if (!cpumask)
> +				return -1;
> +
> +			d_info->cpumask = calloc(strlen(cpumask) + 1, sizeof(char));
> +			if (!d_info->cpumask)
> +				return -1;
> +			strcpy(d_info->cpumask, cpumask);
> +
> +			cpulist = do_read_string(ff);
> +			if (!cpulist)
> +				return -1;
> +
> +			d_info->cpulist = calloc(strlen(cpulist) + 1, sizeof(char));
> +			if (!d_info->cpulist)
> +				return -1;
> +			strcpy(d_info->cpulist, cpulist);
> +		}
> +	}
> +
> +	return ret;
> +}
> +
>  #define FEAT_OPR(n, func, __full_only) \
>  	[HEADER_##n] = {					\
>  		.name	    = __stringify(n),			\
> @@ -3460,6 +3763,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
>  	FEAT_OPR(CLOCK_DATA,	clock_data,	false),
>  	FEAT_OPN(HYBRID_TOPOLOGY,	hybrid_topology,	true),
>  	FEAT_OPR(PMU_CAPS,	pmu_caps,	false),
> +	FEAT_OPR(CPU_DOMAIN_INFO,	cpu_domain_info,	true),
>  };
>  
>  struct header_print_data {
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index d16dfceccd74..edcb95e0dc49 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -53,6 +53,7 @@ enum {
>  	HEADER_CLOCK_DATA,
>  	HEADER_HYBRID_TOPOLOGY,
>  	HEADER_PMU_CAPS,
> +	HEADER_CPU_DOMAIN_INFO,
>  	HEADER_LAST_FEATURE,
>  	HEADER_FEAT_BITS	= 256,
>  };
> diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
> index 1b91834e11de..47bfc0259b0e 100644
> --- a/tools/perf/util/util.c
> +++ b/tools/perf/util/util.c
> @@ -263,6 +263,48 @@ void print_separator(int pre_dash_cnt, const char *s, int post_dash_cnt)
>  	       graph_dotted_line);
>  }
>  
> +void cpumask_to_cpulist(char *cpumask, char *cpulist)
> +{
> +	int i, j, bm_size, nbits;
> +	int len = strlen(cpumask);
> +	unsigned long *bm;
> +	char cpus[1024];
> +
> +	for (i = 0; i < len; i++) {
> +		if (cpumask[i] == ',') {
> +			for (j = i; j < len; j++)
> +				cpumask[j] = cpumask[j + 1];
> +		}
> +	}
> +
> +	len = strlen(cpumask);
> +	bm_size = (len + 15) / 16;
> +	nbits = bm_size * 64;
> +	if (nbits <= 0)
> +		return;
> +
> +	bm = calloc(bm_size, sizeof(unsigned long));
> +	if (!cpumask)
> +		goto free_bm;
> +
> +	for (i = 0; i < bm_size; i++) {
> +		char blk[17];
> +		int blklen = len > 16 ? 16 : len;
> +
> +		strncpy(blk, cpumask + len - blklen, blklen);
> +		blk[len] = '\0';
> +		bm[i] = strtoul(blk, NULL, 16);
> +		cpumask[len - blklen] = '\0';
> +		len = strlen(cpumask);
> +	}
> +
> +	bitmap_scnprintf(bm, nbits, cpus, sizeof(cpus));
> +	strcpy(cpulist, cpus);
> +
> +free_bm:
> +	free(bm);
> +}
> +
>  int rm_rf_perf_data(const char *path)
>  {
>  	const char *pat[] = {
> diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
> index de69384380c2..90a8b4d2e59c 100644
> --- a/tools/perf/util/util.h
> +++ b/tools/perf/util/util.h
> @@ -11,6 +11,7 @@
>  #include <stdbool.h>
>  #include <stddef.h>
>  #include <linux/compiler.h>
> +#include <linux/bitmap.h>
>  #include <sys/types.h>
>  #ifndef __cplusplus
>  #include <internal/cpumap.h>
> @@ -50,6 +51,8 @@ int perf_tip(char **strp, const char *dirpath);
>  
>  void print_separator(int pre_dash_cnt, const char *s, int post_dash_cnt);
>  
> +void cpumask_to_cpulist(char *cpumask, char *cpulist);
> +
>  #ifndef HAVE_SCHED_GETCPU_SUPPORT
>  int sched_getcpu(void);
>  #endif
> -- 
> 2.43.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ