[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <4e9d2241-0794-43c5-b13c-49e86e5f83ed@amd.com>
Date: Fri, 9 Jan 2026 16:55:06 +0530
From: Swapnil Sapkal <swapnil.sapkal@....com>
To: Namhyung Kim <namhyung@...nel.org>
CC: <peterz@...radead.org>, <mingo@...hat.com>, <acme@...nel.org>,
<irogers@...gle.com>, <james.clark@....com>, <ravi.bangoria@....com>,
<yu.c.chen@...el.com>, <mark.rutland@....com>,
<alexander.shishkin@...ux.intel.com>, <jolsa@...nel.org>,
<rostedt@...dmis.org>, <vincent.guittot@...aro.org>,
<adrian.hunter@...el.com>, <kan.liang@...ux.intel.com>,
<gautham.shenoy@....com>, <kprateek.nayak@....com>, <juri.lelli@...hat.com>,
<yangjihong@...edance.com>, <void@...ifault.com>, <tj@...nel.org>,
<sshegde@...ux.ibm.com>, <ctshao@...gle.com>, <quic_zhonhan@...cinc.com>,
<thomas.falcon@...el.com>, <blakejones@...gle.com>, <ashelat@...hat.com>,
<leo.yan@....com>, <dvyukov@...gle.com>, <ak@...ux.intel.com>,
<yujie.liu@...el.com>, <graham.woodward@....com>, <ben.gainey@....com>,
<vineethr@...ux.ibm.com>, <tim.c.chen@...ux.intel.com>, <linux@...blig.org>,
<linux-kernel@...r.kernel.org>, <linux-perf-users@...r.kernel.org>,
<santosh.shukla@....com>, <sandipan.das@....com>, James Clark
<james.clark@...aro.org>
Subject: Re: [PATCH RESEND v4 07/11] perf sched stats: Add support for report
subcommand
Hello Namhyung,
On 03-01-2026 04:05, Namhyung Kim wrote:
> On Tue, Sep 09, 2025 at 11:42:23AM +0000, Swapnil Sapkal wrote:
>> `perf sched stats record` captures two sets of samples. For workload
>> profile, first set right before workload starts and second set after
>> workload finishes. For the systemwide profile, first set at the
>> beginning of profile and second set on receiving SIGINT signal.
>>
>> Add `perf sched stats report` subcommand that will read both the set
>> of samples, get the diff and render a final report. Final report prints
>> scheduler stat at cpu granularity as well as sched domain granularity.
>>
>> Example usage:
>>
>> # perf sched stats record
>> # perf sched stats report
>
> It'd be great if you could add an example output as well.
>
Sure, will do.
>>
>> Co-developed-by: Ravi Bangoria <ravi.bangoria@....com>
>> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
>> Tested-by: James Clark <james.clark@...aro.org>
>> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
>> ---
>> tools/perf/builtin-sched.c | 509 ++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 508 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
>> index 2573491fa5f8..e23018798f5b 100644
>> --- a/tools/perf/builtin-sched.c
>> +++ b/tools/perf/builtin-sched.c
>> @@ -3940,6 +3940,505 @@ static int perf_sched__schedstat_record(struct perf_sched *sched,
>> return err;
>> }
>>
>> +struct schedstat_domain {
>> + struct list_head domain_list;
>> + struct perf_record_schedstat_domain *domain_data;
>> +};
>> +
>> +struct schedstat_cpu {
>> + struct list_head cpu_list;
>> + struct list_head domain_head;
>> + struct perf_record_schedstat_cpu *cpu_data;
>> +};
>> +
>> +static struct list_head cpu_head = LIST_HEAD_INIT(cpu_head);
>> +static struct schedstat_cpu *cpu_second_pass;
>> +static struct schedstat_domain *domain_second_pass;
>> +static bool after_workload_flag;
>> +static bool verbose_field;
>> +
>> +static void store_schedtstat_cpu_diff(struct schedstat_cpu *after_workload)
>> +{
>> + struct perf_record_schedstat_cpu *before = cpu_second_pass->cpu_data;
>> + struct perf_record_schedstat_cpu *after = after_workload->cpu_data;
>> + __u16 version = after_workload->cpu_data->version;
>> +
>> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
>> + (before->_ver._name = after->_ver._name - before->_ver._name)
>> +
>> + if (version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +
>> +#undef CPU_FIELD
>> +}
>> +
>> +static void store_schedstat_domain_diff(struct schedstat_domain *after_workload)
>> +{
>> + struct perf_record_schedstat_domain *before = domain_second_pass->domain_data;
>> + struct perf_record_schedstat_domain *after = after_workload->domain_data;
>> + __u16 version = after_workload->domain_data->version;
>> +
>> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
>> + (before->_ver._name = after->_ver._name - before->_ver._name)
>> +
>> + if (version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +#undef DOMAIN_FIELD
>> +}
>> +
>> +static inline void print_cpu_stats(struct perf_record_schedstat_cpu *cs)
>> +{
>> + printf("%-65s %12s %12s\n", "DESC", "COUNT", "PCT_CHANGE");
>> + printf("%.*s\n", 100, graph_dotted_line);
>> +
>> +#define CALC_PCT(_x, _y) ((_y) ? ((double)(_x) / (_y)) * 100 : 0.0)
>> +
>> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
>> + do { \
>> + printf("%-65s: " _format, verbose_field ? _desc : #_name, \
>> + cs->_ver._name); \
>> + if (_is_pct) { \
>> + printf(" ( %8.2lf%% )", \
>> + CALC_PCT(cs->_ver._name, cs->_ver._pct_of)); \
>> + } \
>> + printf("\n"); \
>> + } while (0)
>> +
>> + if (cs->version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (cs->version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (cs->version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +
>> +#undef CPU_FIELD
>> +#undef CALC_PCT
>> +}
>> +
>> +static inline void print_domain_stats(struct perf_record_schedstat_domain *ds,
>> + __u64 jiffies)
>> +{
>> + printf("%-65s %12s %14s\n", "DESC", "COUNT", "AVG_JIFFIES");
>> +
>> +#define DOMAIN_CATEGORY(_desc) \
>> + do { \
>> + size_t _len = strlen(_desc); \
>> + size_t _pre_dash_cnt = (100 - _len) / 2; \
>> + size_t _post_dash_cnt = 100 - _len - _pre_dash_cnt; \
>> + print_separator((int)_pre_dash_cnt, _desc, (int)_post_dash_cnt);\
>> + } while (0)
>> +
>> +#define CALC_AVG(_x, _y) ((_y) ? (long double)(_x) / (_y) : 0.0)
>> +
>> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
>> + do { \
>> + printf("%-65s: " _format, verbose_field ? _desc : #_name, \
>> + ds->_ver._name); \
>> + if (_is_jiffies) { \
>> + printf(" $ %11.2Lf $", \
>> + CALC_AVG(jiffies, ds->_ver._name)); \
>> + } \
>> + printf("\n"); \
>> + } while (0)
>> +
>> +#define DERIVED_CNT_FIELD(_name, _desc, _format, _x, _y, _z, _ver) \
>> + printf("*%-64s: " _format "\n", verbose_field ? _desc : #_name, \
>> + (ds->_ver._x) - (ds->_ver._y) - (ds->_ver._z))
>> +
>> +#define DERIVED_AVG_FIELD(_name, _desc, _format, _x, _y, _z, _w, _ver) \
>> + printf("*%-64s: " _format "\n", verbose_field ? _desc : #_name, \
>> + CALC_AVG(ds->_ver._w, \
>> + ((ds->_ver._x) - (ds->_ver._y) - (ds->_ver._z))))
>> +
>> + if (ds->version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (ds->version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (ds->version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +
>> +#undef DERIVED_AVG_FIELD
>> +#undef DERIVED_CNT_FIELD
>> +#undef DOMAIN_FIELD
>> +#undef CALC_AVG
>> +#undef DOMAIN_CATEGORY
>> +}
>> +
>> +static void summarize_schedstat_cpu(struct schedstat_cpu *summary_cpu,
>> + struct schedstat_cpu *cptr,
>> + int cnt, bool is_last)
>> +{
>> + struct perf_record_schedstat_cpu *summary_cs = summary_cpu->cpu_data,
>> + *temp_cs = cptr->cpu_data;
>> +
>> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
>> + do { \
>> + summary_cs->_ver._name += temp_cs->_ver._name; \
>> + if (is_last) \
>> + summary_cs->_ver._name /= cnt; \
>> + } while (0)
>> +
>> + if (cptr->cpu_data->version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (cptr->cpu_data->version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (cptr->cpu_data->version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +#undef CPU_FIELD
>> +}
>> +
>> +static void summarize_schedstat_domain(struct schedstat_domain *summary_domain,
>> + struct schedstat_domain *dptr,
>> + int cnt, bool is_last)
>> +{
>> + struct perf_record_schedstat_domain *summary_ds = summary_domain->domain_data,
>> + *temp_ds = dptr->domain_data;
>> +
>> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
>> + do { \
>> + summary_ds->_ver._name += temp_ds->_ver._name; \
>> + if (is_last) \
>> + summary_ds->_ver._name /= cnt; \
>> + } while (0)
>> +
>> + if (dptr->domain_data->version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (dptr->domain_data->version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (dptr->domain_data->version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +#undef DOMAIN_FIELD
>> +}
>> +
>> +static int get_all_cpu_stats(struct list_head *head)
>> +{
>> + struct schedstat_cpu *cptr = list_first_entry(head, struct schedstat_cpu, cpu_list);
>> + struct schedstat_cpu *summary_head = NULL;
>> + struct perf_record_schedstat_domain *ds;
>> + struct perf_record_schedstat_cpu *cs;
>> + struct schedstat_domain *dptr, *tdptr;
>> + bool is_last = false;
>> + int cnt = 1;
>> + int ret = 0;
>> +
>> + if (cptr) {
>> + summary_head = zalloc(sizeof(*summary_head));
>> + if (!summary_head)
>> + return -ENOMEM;
>> +
>> + summary_head->cpu_data = zalloc(sizeof(*cs));
>> + memcpy(summary_head->cpu_data, cptr->cpu_data, sizeof(*cs));
>> +
>> + INIT_LIST_HEAD(&summary_head->domain_head);
>> +
>> + list_for_each_entry(dptr, &cptr->domain_head, domain_list) {
>> + tdptr = zalloc(sizeof(*tdptr));
>> + if (!tdptr)
>> + return -ENOMEM;
>> +
>> + tdptr->domain_data = zalloc(sizeof(*ds));
>> + if (!tdptr->domain_data)
>> + return -ENOMEM;
>
> Please free tdptr too.
>
Ack.
>> +
>> + memcpy(tdptr->domain_data, dptr->domain_data, sizeof(*ds));
>> + list_add_tail(&tdptr->domain_list, &summary_head->domain_head);
>> + }
>> + }
>> +
>> +
>> + list_for_each_entry(cptr, head, cpu_list) {
>> + if (list_is_first(&cptr->cpu_list, head))
>> + continue;
>> +
>> + if (list_is_last(&cptr->cpu_list, head))
>> + is_last = true;
>> +
>> + cnt++;
>> + summarize_schedstat_cpu(summary_head, cptr, cnt, is_last);
>> + tdptr = list_first_entry(&summary_head->domain_head, struct schedstat_domain,
>> + domain_list);
>> +
>> + list_for_each_entry(dptr, &cptr->domain_head, domain_list) {
>> + summarize_schedstat_domain(tdptr, dptr, cnt, is_last);
>> + tdptr = list_next_entry(tdptr, domain_list);
>> + }
>> + }
>> +
>> + list_add(&summary_head->cpu_list, head);
>> +
>> + return ret;
>> +}
>> +
>> +static void print_field_description(struct schedstat_cpu *cptr)
>> +{
>> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
>> + printf("%-30s-> %s\n", #_name, _desc) \
>> +
>> +#define DOMAIN_CATEGORY(_desc) \
>> + do { \
>> + size_t _len = strlen(_desc); \
>> + size_t _pre_dash_cnt = (100 - _len) / 2; \
>> + size_t _post_dash_cnt = 100 - _len - _pre_dash_cnt; \
>> + print_separator((int)_pre_dash_cnt, _desc, (int)_post_dash_cnt);\
>> + } while (0)
>> +
>> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
>> + printf("%-30s-> %s\n", #_name, _desc) \
>> +
>> +#define DERIVED_CNT_FIELD(_name, _desc, _format, _x, _y, _z, _ver) \
>> + printf("*%-29s-> %s\n", #_name, _desc) \
>> +
>> +#define DERIVED_AVG_FIELD(_name, _desc, _format, _x, _y, _z, _w, _ver) \
>> + printf("*%-29s-> %s\n", #_name, _desc) \
>> +
>> + if (cptr->cpu_data->version == 15) {
>> +#include <perf/schedstat-v15.h>
>> + } else if (cptr->cpu_data->version == 16) {
>> +#include <perf/schedstat-v16.h>
>> + } else if (cptr->cpu_data->version == 17) {
>> +#include <perf/schedstat-v17.h>
>> + }
>> +#undef CPU_FIELD
>> +#undef DOMAIN_CATEGORY
>> +#undef DERIVED_CNT_FIELD
>> +#undef DERIVED_AVG_FIELD
>> +#undef DOMAIN_FIELD
>> +}
>> +
>> +static int show_schedstat_data(struct list_head *head, struct cpu_domain_map **cd_map)
>> +{
>> + struct schedstat_cpu *cptr = list_first_entry(head, struct schedstat_cpu, cpu_list);
>> + __u64 jiffies = cptr->cpu_data->timestamp;
>> + struct perf_record_schedstat_domain *ds;
>> + struct perf_record_schedstat_cpu *cs;
>> + struct schedstat_domain *dptr;
>> + bool is_summary = true;
>> + int ret = 0;
>> +
>> + printf("Description\n");
>> + print_separator(100, "", 0);
>> + printf("%-30s-> %s\n", "DESC", "Description of the field");
>> + printf("%-30s-> %s\n", "COUNT", "Value of the field");
>> + printf("%-30s-> %s\n", "PCT_CHANGE", "Percent change with corresponding base value");
>> + printf("%-30s-> %s\n", "AVG_JIFFIES",
>> + "Avg time in jiffies between two consecutive occurrence of event");
>> +
>> + if (!verbose_field) {
>> + print_separator(100, "", 0);
>> + print_field_description(cptr);
>> + }
>> +
>> + print_separator(100, "", 0);
>> + printf("\n");
>> +
>> + printf("%-65s: %11llu\n", "Time elapsed (in jiffies)", jiffies);
>> + print_separator(100, "", 0);
>> +
>> + ret = get_all_cpu_stats(head);
>> +
>> + list_for_each_entry(cptr, head, cpu_list) {
>> + cs = cptr->cpu_data;
>> + printf("\n");
>> + print_separator(100, "", 0);
>> +
>> + if (is_summary)
>> + printf("CPU <ALL CPUS SUMMARY>\n");
>> + else
>> + printf("CPU %d\n", cs->cpu);
>> +
>> + print_separator(100, "", 0);
>> + print_cpu_stats(cs);
>> + print_separator(100, "", 0);
>> +
>> + list_for_each_entry(dptr, &cptr->domain_head, domain_list) {
>> + struct domain_info *dinfo;
>> +
>> + ds = dptr->domain_data;
>> + dinfo = cd_map[ds->cpu]->domains[ds->domain];
>> + if (is_summary)
>
> Please add a pair of braces for multiline statements.
>
Ack.
>
>> + if (dinfo->dname)
>> + printf("CPU <ALL CPUS SUMMARY>, DOMAIN %s\n", dinfo->dname);
>> + else
>> + printf("CPU <ALL CPUS SUMMARY>, DOMAIN %d\n", dinfo->domain);
>> + else {
>> + if (dinfo->dname)
>> + printf("CPU %d, DOMAIN %s CPUS ", cs->cpu, dinfo->dname);
>> + else
>> + printf("CPU %d, DOMAIN %d CPUS ", cs->cpu, dinfo->domain);
>> +
>> + printf("%s\n", dinfo->cpulist);
>> + }
>> + print_separator(100, "", 0);
>> + print_domain_stats(ds, jiffies);
>> + print_separator(100, "", 0);
>> + }
>> + is_summary = false;
>> + }
>> + return ret;
>> +}
>> +
>> +static int perf_sched__process_schedstat(struct perf_session *session __maybe_unused,
>> + union perf_event *event)
>> +{
>> + struct perf_cpu this_cpu;
>> + static __u32 initial_cpu;
>> +
>> + switch (event->header.type) {
>> + case PERF_RECORD_SCHEDSTAT_CPU:
>> + this_cpu.cpu = event->schedstat_cpu.cpu;
>> + break;
>> + case PERF_RECORD_SCHEDSTAT_DOMAIN:
>> + this_cpu.cpu = event->schedstat_domain.cpu;
>> + break;
>> + default:
>> + return 0;
>> + }
>> +
>> + if (user_requested_cpus && !perf_cpu_map__has(user_requested_cpus, this_cpu))
>> + return 0;
>> +
>> + if (event->header.type == PERF_RECORD_SCHEDSTAT_CPU) {
>> + struct schedstat_cpu *temp = zalloc(sizeof(*temp));
>> +
>> + if (!temp)
>> + return -ENOMEM;
>> +
>> + temp->cpu_data = zalloc(sizeof(*temp->cpu_data));
>> + if (!temp->cpu_data)
>> + return -ENOMEM;
>
> Please free temp as well.
>
Ack.
>> +
>> + memcpy(temp->cpu_data, &event->schedstat_cpu, sizeof(*temp->cpu_data));
>> +
>> + if (!list_empty(&cpu_head) && temp->cpu_data->cpu == initial_cpu)
>> + after_workload_flag = true;
>> +
>> + if (!after_workload_flag) {
>> + if (list_empty(&cpu_head))
>> + initial_cpu = temp->cpu_data->cpu;
>> +
>> + list_add_tail(&temp->cpu_list, &cpu_head);
>> + INIT_LIST_HEAD(&temp->domain_head);
>> + } else {
>> + if (temp->cpu_data->cpu == initial_cpu) {
>> + cpu_second_pass = list_first_entry(&cpu_head, struct schedstat_cpu,
>> + cpu_list);
>> + cpu_second_pass->cpu_data->timestamp =
>> + temp->cpu_data->timestamp - cpu_second_pass->cpu_data->timestamp;
>> + } else {
>> + cpu_second_pass = list_next_entry(cpu_second_pass, cpu_list);
>> + }
>> + domain_second_pass = list_first_entry(&cpu_second_pass->domain_head,
>> + struct schedstat_domain, domain_list);
>> + store_schedtstat_cpu_diff(temp);
>> + free(temp);
>
> What about temp->cpu_data?
>
Sure, will free it.
>
>> + }
>> + } else if (event->header.type == PERF_RECORD_SCHEDSTAT_DOMAIN) {
>> + struct schedstat_cpu *cpu_tail;
>> + struct schedstat_domain *temp = zalloc(sizeof(*temp));
>> +
>> + if (!temp)
>> + return -ENOMEM;
>> +
>> + temp->domain_data = zalloc(sizeof(*temp->domain_data));
>> + if (!temp->domain_data)
>> + return -ENOMEM;
>
> Ditto.
>
Ack.
>> +
>> + memcpy(temp->domain_data, &event->schedstat_domain, sizeof(*temp->domain_data));
>> +
>> + if (!after_workload_flag) {
>> + cpu_tail = list_last_entry(&cpu_head, struct schedstat_cpu, cpu_list);
>> + list_add_tail(&temp->domain_list, &cpu_tail->domain_head);
>> + } else {
>> + store_schedstat_domain_diff(temp);
>> + domain_second_pass = list_next_entry(domain_second_pass, domain_list);
>> + free(temp);
>
> Ditto.
>
Ack.
--
Thanks and Regards,
Swapnil
> Thanks,
> Namhyung
>
Powered by blists - more mailing lists