lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bef26ca0-6600-431e-8a23-4d612a24c87c@amd.com>
Date: Fri, 9 Jan 2026 16:54:13 +0530
From: Swapnil Sapkal <swapnil.sapkal@....com>
To: Namhyung Kim <namhyung@...nel.org>
CC: <peterz@...radead.org>, <mingo@...hat.com>, <acme@...nel.org>,
	<irogers@...gle.com>, <james.clark@....com>, <ravi.bangoria@....com>,
	<yu.c.chen@...el.com>, <mark.rutland@....com>,
	<alexander.shishkin@...ux.intel.com>, <jolsa@...nel.org>,
	<rostedt@...dmis.org>, <vincent.guittot@...aro.org>,
	<adrian.hunter@...el.com>, <kan.liang@...ux.intel.com>,
	<gautham.shenoy@....com>, <kprateek.nayak@....com>, <juri.lelli@...hat.com>,
	<yangjihong@...edance.com>, <void@...ifault.com>, <tj@...nel.org>,
	<sshegde@...ux.ibm.com>, <ctshao@...gle.com>, <quic_zhonhan@...cinc.com>,
	<thomas.falcon@...el.com>, <blakejones@...gle.com>, <ashelat@...hat.com>,
	<leo.yan@....com>, <dvyukov@...gle.com>, <ak@...ux.intel.com>,
	<yujie.liu@...el.com>, <graham.woodward@....com>, <ben.gainey@....com>,
	<vineethr@...ux.ibm.com>, <tim.c.chen@...ux.intel.com>, <linux@...blig.org>,
	<linux-kernel@...r.kernel.org>, <linux-perf-users@...r.kernel.org>,
	<santosh.shukla@....com>, <sandipan.das@....com>
Subject: Re: [PATCH RESEND v4 03/11] perf header: Support CPU DOMAIN relation
 info

Hello Namhyung,

On 03-01-2026 03:56, Namhyung Kim wrote:
> On Tue, Sep 09, 2025 at 11:42:19AM +0000, Swapnil Sapkal wrote:
>> '/proc/schedstat' gives the info about load balancing statistics within
>> a given domain. It also contains the cpu_mask giving information about
>> the sibling cpus and domain names after schedstat version 17. Storing
>> this information in perf header will help tools like `perf sched stats`
>> for better analysis.
>>
>> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
>> ---
>>   .../Documentation/perf.data-file-format.txt   |  17 +
>>   tools/perf/builtin-inject.c                   |   1 +
>>   tools/perf/util/env.h                         |  16 +
>>   tools/perf/util/header.c                      | 304 ++++++++++++++++++
>>   tools/perf/util/header.h                      |   1 +
>>   tools/perf/util/util.c                        |  42 +++
>>   tools/perf/util/util.h                        |   3 +
>>   7 files changed, 384 insertions(+)
>>
>> diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
>> index cd95ba09f727..92dbba1003cf 100644
>> --- a/tools/perf/Documentation/perf.data-file-format.txt
>> +++ b/tools/perf/Documentation/perf.data-file-format.txt
>> @@ -437,6 +437,23 @@ struct {
>>   	} [nr_pmu];
>>   };
>>   
>> +	HEADER_CPU_DOMAIN_INFO = 32,
>> +
>> +List of cpu-domain relation info. The format of the data is as below.
>> +
>> +struct domain_info {
>> +	int domain;
>> +	char dname[];
>> +	char cpumask[];
>> +	char cpulist[];
>> +};
>> +
>> +struct cpu_domain_info {
>> +	int cpu;
>> +	int nr_domains;
>> +	struct domain_info domains[];
>> +};
>> +
>>   	other bits are reserved and should ignored for now
>>   	HEADER_FEAT_BITS	= 256,
>>   
>> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
>> index a114b3fa1bea..f43a7ec44b5f 100644
>> --- a/tools/perf/builtin-inject.c
>> +++ b/tools/perf/builtin-inject.c
>> @@ -2058,6 +2058,7 @@ static bool keep_feat(int feat)
>>   	case HEADER_CLOCK_DATA:
>>   	case HEADER_HYBRID_TOPOLOGY:
>>   	case HEADER_PMU_CAPS:
>> +	case HEADER_CPU_DOMAIN_INFO:
>>   		return true;
>>   	/* Information that can be updated */
>>   	case HEADER_BUILD_ID:
>> diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
>> index e00179787a34..71034c4b4488 100644
>> --- a/tools/perf/util/env.h
>> +++ b/tools/perf/util/env.h
>> @@ -54,6 +54,19 @@ struct pmu_caps {
>>   	char            *pmu_name;
>>   };
>>   
>> +struct domain_info {
>> +	u32	domain;
>> +	char	*dname;
>> +	char	*cpumask;
>> +	char	*cpulist;
>> +};
>> +
>> +struct cpu_domain_map {
>> +	u32			cpu;
>> +	u32			nr_domains;
>> +	struct domain_info	**domains;
>> +};
>> +
>>   typedef const char *(arch_syscalls__strerrno_t)(int err);
>>   
>>   struct perf_env {
>> @@ -70,6 +83,8 @@ struct perf_env {
>>   	unsigned int		max_branches;
>>   	unsigned int		br_cntr_nr;
>>   	unsigned int		br_cntr_width;
>> +	unsigned int		schedstat_version;
>> +	unsigned int		max_sched_domains;
>>   	int			kernel_is_64_bit;
>>   
>>   	int			nr_cmdline;
>> @@ -92,6 +107,7 @@ struct perf_env {
>>   	char			**cpu_pmu_caps;
>>   	struct cpu_topology_map	*cpu;
>>   	struct cpu_cache_level	*caches;
>> +	struct cpu_domain_map	**cpu_domain;
>>   	int			 caches_cnt;
>>   	u32			comp_ratio;
>>   	u32			comp_ver;
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index 4f2a6e10ed5c..7ff7434bac2c 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -1621,6 +1621,184 @@ static int write_pmu_caps(struct feat_fd *ff,
>>   	return 0;
>>   }
>>   
>> +static void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr)
>> +{
>> +	for (u32 i = 0; i < nr; i++) {
>> +		if (cd_map[i]->domains) {
>> +			for (u32 j = 0; j < cd_map[i]->nr_domains; j++) {
>> +				struct domain_info *d_info = cd_map[i]->domains[j];
>> +
> 
> I'm not sure if it needs a NULL check for d_info before access.
> 

Thanks for catching this. Yes we need a NULL check here.

If some of the cpus are offline then it is possible that some sched 
domains can be degenerated and for some cpus.

for e.g. cpu0, cpu128 are smt siblings. If cpu 128 is offline then SMT 
domain will be degenerated for cpu0 alone. All the other cpus will have 
a valid SMT domain. Thus SMT for the d_info can be NULL.

> 
>> +				if (schedstat_version >= 17)
>> +					free(d_info->dname);
>> +
>> +				free(d_info->cpumask);
>> +				free(d_info->cpulist);
>> +			}
>> +			free(cd_map[i]->domains);
>> +		}
>> +	}
>> +
>> +	free(cd_map);
>> +}
>> +
>> +static struct cpu_domain_map  **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains,
>> +						     u32 nr)
>> +{
>> +	struct domain_info *domain_info;
>> +	struct cpu_domain_map **cd_map;
>> +	char dname[16], cpumask[256];
>> +	char cpulist[1024];
>> +	char *line = NULL;
>> +	u32 cpu, domain;
>> +	u32 dcount = 0;
>> +	size_t len;
>> +	FILE *fp;
>> +
>> +	fp = fopen("/proc/schedstat", "r");
>> +	if (!fp) {
>> +		pr_err("Failed to open /proc/schedstat\n");
>> +		return NULL;
>> +	}
>> +
>> +	cd_map = calloc(nr, sizeof(*cd_map));
>> +	if (!cd_map)
>> +		goto out;
>> +
>> +	while (getline(&line, &len, fp) > 0) {
>> +		int retval;
>> +
>> +		if (strncmp(line, "version", 7) == 0) {
>> +			retval = sscanf(line, "version %d\n", schedstat_version);
>> +			if (retval != 1)
>> +				continue;
>> +
>> +		} else if (strncmp(line, "cpu", 3) == 0) {
>> +			retval = sscanf(line, "cpu%u %*s", &cpu);
>> +			if (retval == 1) {
>> +				cd_map[cpu] = calloc(1, sizeof(*cd_map[cpu]));
>> +				if (!cd_map[cpu])
>> +					goto out_free_line;
>> +				cd_map[cpu]->cpu = cpu;
>> +			} else
>> +				continue;
>> +
>> +			dcount = 0;
>> +		} else if (strncmp(line, "domain", 6) == 0) {
>> +			dcount++;
>> +
>> +			cd_map[cpu]->domains = realloc(cd_map[cpu]->domains,
>> +						       dcount * sizeof(domain_info));
>> +			if (!cd_map[cpu]->domains)
>> +				goto out_free_line;
> 
> Please use a temporary variable to save the result in order to not lose
> the original pointer in case of failure.
> 

Sure, will do.

>> +
>> +			domain_info = calloc(1, sizeof(*domain_info));
>> +			if (!domain_info)
>> +				goto out_free_line;
>> +
>> +			cd_map[cpu]->domains[dcount - 1] = domain_info;
>> +
>> +			if (*schedstat_version >= 17) {
>> +				retval = sscanf(line, "domain%u %s %s %*s", &domain, dname,
>> +						cpumask);
>> +				if (retval != 3)
>> +					continue;
>> +
>> +				domain_info->dname = calloc(strlen(dname) + 1, sizeof(char));
>> +				if (!domain_info->dname)
>> +					goto out_free_line;
>> +
>> +				strcpy(domain_info->dname, dname);
> 
> This can be simply:
> 				domain_info->dname = strdup(dname);
> 

Sure, will do.

> 
>> +			} else {
>> +				retval = sscanf(line, "domain%u %s %*s", &domain, cpumask);
>> +				if (retval != 2)
>> +					continue;
>> +			}
>> +
>> +			domain_info->domain = domain;
>> +			if (domain > *max_sched_domains)
>> +				*max_sched_domains = domain;
>> +
>> +			domain_info->cpumask = calloc(strlen(cpumask) + 1, sizeof(char));
>> +			if (!domain_info->cpumask)
>> +				goto out_free_line;
>> +
>> +			strcpy(domain_info->cpumask, cpumask);
>> +
>> +			cpumask_to_cpulist(cpumask, cpulist);
>> +			domain_info->cpulist = calloc(strlen(cpulist) + 1, sizeof(char));
>> +			if (!domain_info->cpulist)
>> +				goto out_free_line;
> 
> All error paths should call free_cpu_domain_info() at some point and
> free the intermediate domain_info properly.
> 

Sure, will do.

>> +
>> +			strcpy(domain_info->cpulist, cpulist);
>> +			cd_map[cpu]->nr_domains = dcount;
>> +		}
>> +	}
>> +
>> +out_free_line:
>> +	free(line);
>> +out:
>> +	fclose(fp);
>> +	return cd_map;
>> +}
>> +
>> +static int write_cpu_domain_info(struct feat_fd *ff,
>> +				 struct evlist *evlist __maybe_unused)
>> +{
>> +	u32 max_sched_domains = 0, schedstat_version = 0;
>> +	struct cpu_domain_map **cd_map;
>> +	u32 i, j, nr, ret;
>> +
>> +	nr = cpu__max_present_cpu().cpu;
>> +
>> +	cd_map = build_cpu_domain_map(&schedstat_version, &max_sched_domains, nr);
>> +	if (!cd_map)
>> +		return -1;
>> +
>> +	ret = do_write(ff, &schedstat_version, sizeof(u32));
>> +	if (ret < 0)
>> +		goto out;
>> +
>> +	max_sched_domains += 1;
>> +	ret = do_write(ff, &max_sched_domains, sizeof(u32));
>> +	if (ret < 0)
>> +		goto out;
>> +
>> +	for (i = 0; i < nr; i++) {
>> +		if (cd_map[i]->domains) {
> 
> Is it supposed to have NULL domains? 

Yes, It is possible to have NULL domains if the cmdline has isolcpus. I 
will also add NULL check for cd_map[i] to handle offline cpu.

> Anyway it'd be nice if you can
> skip the case like with 'continue' statement to reduce the indentation
> level.
> 

Sure, will do this.

>> +			ret = do_write(ff, &cd_map[i]->cpu, sizeof(u32));
>> +			if (ret < 0)
>> +				goto out;
>> +
>> +			ret = do_write(ff, &cd_map[i]->nr_domains, sizeof(u32));
>> +			if (ret < 0)
>> +				goto out;
>> +
>> +			for (j = 0; j < cd_map[i]->nr_domains; j++) {
>> +				ret = do_write(ff, &cd_map[i]->domains[j]->domain, sizeof(u32));
>> +				if (ret < 0)
>> +					goto out;
>> +				if (schedstat_version >= 17) {
>> +					ret = do_write_string(ff, cd_map[i]->domains[j]->dname);
>> +					if (ret < 0)
>> +						goto out;
>> +				}
>> +
>> +				ret = do_write_string(ff, cd_map[i]->domains[j]->cpumask);
>> +				if (ret < 0)
>> +					goto out;
>> +
>> +				ret = do_write_string(ff, cd_map[i]->domains[j]->cpulist);
>> +				if (ret < 0)
>> +					goto out;
>> +			}
>> +		}
>> +	}
>> +
>> +out:
>> +	free_cpu_domain_info(cd_map, schedstat_version, nr);
>> +	return ret;
>> +}
>> +
>>   static void print_hostname(struct feat_fd *ff, FILE *fp)
>>   {
>>   	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
>> @@ -2254,6 +2432,35 @@ static void print_mem_topology(struct feat_fd *ff, FILE *fp)
>>   	}
>>   }
>>   
>> +static void print_cpu_domain_info(struct feat_fd *ff, FILE *fp)
>> +{
>> +	struct cpu_domain_map **cd_map = ff->ph->env.cpu_domain;
>> +	u32 nr = ff->ph->env.nr_cpus_avail;
>> +	struct domain_info *d_info;
>> +	u32 i, j;
>> +
>> +	fprintf(fp, "# schedstat version	: %u\n", ff->ph->env.schedstat_version);
>> +	fprintf(fp, "# Maximum sched domains	: %u\n", ff->ph->env.max_sched_domains);
>> +
>> +	for (i = 0; i < nr; i++) {
>> +		if (cd_map[i]->domains) {
> 
> Ditto.

Ack.

> 
>> +			fprintf(fp, "# cpu		: %u\n", cd_map[i]->cpu);
>> +			fprintf(fp, "# nr_domains	: %u\n", cd_map[i]->nr_domains);
>> +
>> +			for (j = 0; j < cd_map[i]->nr_domains; j++) {
>> +				d_info = cd_map[i]->domains[j];
>> +				fprintf(fp, "# Domain		: %u\n", d_info->domain);
>> +
>> +				if (ff->ph->env.schedstat_version >= 17)
>> +					fprintf(fp, "# Domain name      : %s\n", d_info->dname);
>> +
>> +				fprintf(fp, "# Domain cpu map   : %s\n", d_info->cpumask);
>> +				fprintf(fp, "# Domain cpu list  : %s\n", d_info->cpulist);
>> +			}
>> +		}
>> +	}
>> +}
>> +
>>   static int __event_process_build_id(struct perf_record_header_build_id *bev,
>>   				    char *filename,
>>   				    struct perf_session *session)
>> @@ -3395,6 +3602,102 @@ static int process_pmu_caps(struct feat_fd *ff, void *data __maybe_unused)
>>   	return ret;
>>   }
>>   
>> +static int process_cpu_domain_info(struct feat_fd *ff, void *data __maybe_unused)
>> +{
>> +	u32 schedstat_version, max_sched_domains, cpu, domain, nr_domains;
>> +	struct perf_env *env = &ff->ph->env;
>> +	char *dname, *cpumask, *cpulist;
>> +	struct cpu_domain_map **cd_map;
>> +	struct domain_info *d_info;
>> +	u32 nra, nr, i, j;
>> +	int ret;
>> +
>> +	nra = env->nr_cpus_avail;
>> +	nr = env->nr_cpus_online;
>> +
>> +	cd_map = calloc(nra, sizeof(*cd_map));
>> +	if (!cd_map)
>> +		return -1;
>> +
>> +	env->cpu_domain = cd_map;
> 
> Where is it freed?
> 

I missed freeing this. This needs to be freed in perf_env__exit(). 
Please correct me if I am missing something.

--
Thanks and Regards,
Swapnil

> Thanks,
> Namhyung

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ