[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z9UEdbcTwVyNuHI9@google.com>
Date: Fri, 14 Mar 2025 21:39:17 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Swapnil Sapkal <swapnil.sapkal@....com>
Cc: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
irogers@...gle.com, james.clark@....com, ravi.bangoria@....com,
yu.c.chen@...el.com, mark.rutland@....com,
alexander.shishkin@...ux.intel.com, jolsa@...nel.org,
rostedt@...dmis.org, vincent.guittot@...aro.org,
adrian.hunter@...el.com, kan.liang@...ux.intel.com,
gautham.shenoy@....com, kprateek.nayak@....com,
juri.lelli@...hat.com, yangjihong@...edance.com, void@...ifault.com,
tj@...nel.org, sshegde@...ux.ibm.com, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, santosh.shukla@....com,
ananth.narayan@....com, sandipan.das@....com,
James Clark <james.clark@...aro.org>
Subject: Re: [PATCH v3 4/8] perf sched stats: Add support for report
subcommand
On Tue, Mar 11, 2025 at 12:02:26PM +0000, Swapnil Sapkal wrote:
> `perf sched stats record` captures two sets of samples. For workload
> profile, first set right before workload starts and second set after
> workload finishes. For the systemwide profile, first set at the
> beginning of profile and second set on receiving SIGINT signal.
>
> Add `perf sched stats report` subcommand that will read both the set
> of samples, get the diff and render a final report. Final report prints
> scheduler stat at cpu granularity as well as sched domain granularity.
>
> Example usage:
>
> # perf sched stats record
> # perf sched stats report
>
> Co-developed-by: Ravi Bangoria <ravi.bangoria@....com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
> Tested-by: James Clark <james.clark@...aro.org>
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
> ---
> tools/lib/perf/include/perf/event.h | 12 +-
> tools/lib/perf/include/perf/schedstat-v15.h | 180 +++++--
> tools/lib/perf/include/perf/schedstat-v16.h | 182 +++++--
> tools/lib/perf/include/perf/schedstat-v17.h | 209 +++++---
> tools/perf/builtin-sched.c | 504 +++++++++++++++++++-
> tools/perf/util/event.c | 4 +-
> tools/perf/util/synthetic-events.c | 4 +-
> 7 files changed, 938 insertions(+), 157 deletions(-)
>
> diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> index 0d1983ad9a41..5e2c56c9b038 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -458,19 +458,19 @@ struct perf_record_compressed {
> };
>
> struct perf_record_schedstat_cpu_v15 {
> -#define CPU_FIELD(_type, _name, _ver) _type _name
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name
> #include "schedstat-v15.h"
> #undef CPU_FIELD
> };
>
> struct perf_record_schedstat_cpu_v16 {
> -#define CPU_FIELD(_type, _name, _ver) _type _name
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name
> #include "schedstat-v16.h"
> #undef CPU_FIELD
> };
>
> struct perf_record_schedstat_cpu_v17 {
> -#define CPU_FIELD(_type, _name, _ver) _type _name
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) _type _name
> #include "schedstat-v17.h"
> #undef CPU_FIELD
> };
> @@ -488,19 +488,19 @@ struct perf_record_schedstat_cpu {
> };
>
> struct perf_record_schedstat_domain_v15 {
> -#define DOMAIN_FIELD(_type, _name, _ver) _type _name
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name
> #include "schedstat-v15.h"
> #undef DOMAIN_FIELD
> };
>
> struct perf_record_schedstat_domain_v16 {
> -#define DOMAIN_FIELD(_type, _name, _ver) _type _name
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name
> #include "schedstat-v16.h"
> #undef DOMAIN_FIELD
> };
>
> struct perf_record_schedstat_domain_v17 {
> -#define DOMAIN_FIELD(_type, _name, _ver) _type _name
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) _type _name
> #include "schedstat-v17.h"
> #undef DOMAIN_FIELD
> };
> diff --git a/tools/lib/perf/include/perf/schedstat-v15.h b/tools/lib/perf/include/perf/schedstat-v15.h
> index 43f8060c5337..011411ac0f7e 100644
> --- a/tools/lib/perf/include/perf/schedstat-v15.h
> +++ b/tools/lib/perf/include/perf/schedstat-v15.h
> @@ -1,52 +1,142 @@
> /* SPDX-License-Identifier: GPL-2.0 */
>
> #ifdef CPU_FIELD
> -CPU_FIELD(__u32, yld_count, v15);
> -CPU_FIELD(__u32, array_exp, v15);
> -CPU_FIELD(__u32, sched_count, v15);
> -CPU_FIELD(__u32, sched_goidle, v15);
> -CPU_FIELD(__u32, ttwu_count, v15);
> -CPU_FIELD(__u32, ttwu_local, v15);
> -CPU_FIELD(__u64, rq_cpu_time, v15);
> -CPU_FIELD(__u64, run_delay, v15);
> -CPU_FIELD(__u64, pcount, v15);
> +CPU_FIELD(__u32, yld_count, "sched_yield() count",
> + "%11u", false, yld_count, v15);
> +CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
> + "%11u", false, array_exp, v15);
> +CPU_FIELD(__u32, sched_count, "schedule() called",
> + "%11u", false, sched_count, v15);
> +CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
> + "%11u", true, sched_count, v15);
> +CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
> + "%11u", false, ttwu_count, v15);
> +CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
> + "%11u", true, ttwu_count, v15);
> +CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
> + "%11llu", false, rq_cpu_time, v15);
> +CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
> + "%11llu", true, rq_cpu_time, v15);
> +CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
> + "%11llu", false, pcount, v15);
> #endif
>
> #ifdef DOMAIN_FIELD
> -DOMAIN_FIELD(__u32, idle_lb_count, v15);
> -DOMAIN_FIELD(__u32, idle_lb_balanced, v15);
> -DOMAIN_FIELD(__u32, idle_lb_failed, v15);
> -DOMAIN_FIELD(__u32, idle_lb_imbalance, v15);
> -DOMAIN_FIELD(__u32, idle_lb_gained, v15);
> -DOMAIN_FIELD(__u32, idle_lb_hot_gained, v15);
> -DOMAIN_FIELD(__u32, idle_lb_nobusyq, v15);
> -DOMAIN_FIELD(__u32, idle_lb_nobusyg, v15);
> -DOMAIN_FIELD(__u32, busy_lb_count, v15);
> -DOMAIN_FIELD(__u32, busy_lb_balanced, v15);
> -DOMAIN_FIELD(__u32, busy_lb_failed, v15);
> -DOMAIN_FIELD(__u32, busy_lb_imbalance, v15);
> -DOMAIN_FIELD(__u32, busy_lb_gained, v15);
> -DOMAIN_FIELD(__u32, busy_lb_hot_gained, v15);
> -DOMAIN_FIELD(__u32, busy_lb_nobusyq, v15);
> -DOMAIN_FIELD(__u32, busy_lb_nobusyg, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_count, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_balanced, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_failed, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_imbalance, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_gained, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v15);
> -DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v15);
> -DOMAIN_FIELD(__u32, alb_count, v15);
> -DOMAIN_FIELD(__u32, alb_failed, v15);
> -DOMAIN_FIELD(__u32, alb_pushed, v15);
> -DOMAIN_FIELD(__u32, sbe_count, v15);
> -DOMAIN_FIELD(__u32, sbe_balanced, v15);
> -DOMAIN_FIELD(__u32, sbe_pushed, v15);
> -DOMAIN_FIELD(__u32, sbf_count, v15);
> -DOMAIN_FIELD(__u32, sbf_balanced, v15);
> -DOMAIN_FIELD(__u32, sbf_pushed, v15);
> -DOMAIN_FIELD(__u32, ttwu_wake_remote, v15);
> -DOMAIN_FIELD(__u32, ttwu_move_affine, v15);
> -DOMAIN_FIELD(__u32, ttwu_move_balance, v15);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category idle> ");
> #endif
> +DOMAIN_FIELD(__u32, idle_lb_count,
> + "load_balance() count on cpu idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, idle_lb_balanced,
> + "load_balance() found balanced on cpu idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, idle_lb_failed,
> + "load_balance() move task failed on cpu idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, idle_lb_imbalance,
> + "imbalance sum on cpu idle", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, idle_lb_gained,
> + "pull_task() count on cpu idle", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, idle_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, idle_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, idle_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu idle", "%11u", true, v15);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu idle", "%11u",
> + idle_lb_count, idle_lb_balanced, idle_lb_failed, v15);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
> + idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v15);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category busy> ");
> +#endif
> +DOMAIN_FIELD(__u32, busy_lb_count,
> + "load_balance() count on cpu busy", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, busy_lb_balanced,
> + "load_balance() found balanced on cpu busy", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, busy_lb_failed,
> + "load_balance() move task failed on cpu busy", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, busy_lb_imbalance,
> + "imbalance sum on cpu busy", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, busy_lb_gained,
> + "pull_task() count on cpu busy", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, busy_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, busy_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu busy", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, busy_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu busy", "%11u", true, v15);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu busy", "%11u",
> + busy_lb_count, busy_lb_balanced, busy_lb_failed, v15);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
> + busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v15);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category newidle> ");
> +#endif
> +DOMAIN_FIELD(__u32, newidle_lb_count,
> + "load_balance() count on cpu newly idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_balanced,
> + "load_balance() found balanced on cpu newly idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_failed,
> + "load_balance() move task failed on cpu newly idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance,
> + "imbalance sum on cpu newly idle", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_gained,
> + "pull_task() count on cpu newly idle", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v15);
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v15);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu newly idle", "%11u",
> + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v15);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
> + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v15);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category active_load_balance()> ");
> +#endif
> +DOMAIN_FIELD(__u32, alb_count,
> + "active_load_balance() count", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, alb_failed,
> + "active_load_balance() move task failed", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, alb_pushed,
> + "active_load_balance() successfully moved a task", "%11u", false, v15);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
> +#endif
> +DOMAIN_FIELD(__u32, sbe_count,
> + "sbe_count is not used", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, sbe_balanced,
> + "sbe_balanced is not used", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, sbe_pushed,
> + "sbe_pushed is not used", "%11u", false, v15);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
> +#endif
> +DOMAIN_FIELD(__u32, sbf_count,
> + "sbf_count is not used", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, sbf_balanced,
> + "sbf_balanced is not used", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, sbf_pushed,
> + "sbf_pushed is not used", "%11u", false, v15);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Wakeup Info> ");
> +#endif
> +DOMAIN_FIELD(__u32, ttwu_wake_remote,
> + "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, ttwu_move_affine,
> + "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v15);
> +DOMAIN_FIELD(__u32, ttwu_move_balance,
> + "try_to_wake_up() started passive balancing", "%11u", false, v15);
> +#endif /* DOMAIN_FIELD */
> diff --git a/tools/lib/perf/include/perf/schedstat-v16.h b/tools/lib/perf/include/perf/schedstat-v16.h
> index d6a4691b2fd5..5ba53bd7d61a 100644
> --- a/tools/lib/perf/include/perf/schedstat-v16.h
> +++ b/tools/lib/perf/include/perf/schedstat-v16.h
> @@ -1,52 +1,142 @@
> /* SPDX-License-Identifier: GPL-2.0 */
>
> #ifdef CPU_FIELD
> -CPU_FIELD(__u32, yld_count, v16);
> -CPU_FIELD(__u32, array_exp, v16);
> -CPU_FIELD(__u32, sched_count, v16);
> -CPU_FIELD(__u32, sched_goidle, v16);
> -CPU_FIELD(__u32, ttwu_count, v16);
> -CPU_FIELD(__u32, ttwu_local, v16);
> -CPU_FIELD(__u64, rq_cpu_time, v16);
> -CPU_FIELD(__u64, run_delay, v16);
> -CPU_FIELD(__u64, pcount, v16);
> -#endif
> +CPU_FIELD(__u32, yld_count, "sched_yield() count",
> + "%11u", false, yld_count, v16);
> +CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
> + "%11u", false, array_exp, v16);
> +CPU_FIELD(__u32, sched_count, "schedule() called",
> + "%11u", false, sched_count, v16);
> +CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
> + "%11u", true, sched_count, v16);
> +CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
> + "%11u", false, ttwu_count, v16);
> +CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
> + "%11u", true, ttwu_count, v16);
> +CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
> + "%11llu", false, rq_cpu_time, v16);
> +CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
> + "%11llu", true, rq_cpu_time, v16);
> +CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
> + "%11llu", false, pcount, v16);
> +#endif /* CPU_FIELD */
>
> #ifdef DOMAIN_FIELD
> -DOMAIN_FIELD(__u32, busy_lb_count, v16);
> -DOMAIN_FIELD(__u32, busy_lb_balanced, v16);
> -DOMAIN_FIELD(__u32, busy_lb_failed, v16);
> -DOMAIN_FIELD(__u32, busy_lb_imbalance, v16);
> -DOMAIN_FIELD(__u32, busy_lb_gained, v16);
> -DOMAIN_FIELD(__u32, busy_lb_hot_gained, v16);
> -DOMAIN_FIELD(__u32, busy_lb_nobusyq, v16);
> -DOMAIN_FIELD(__u32, busy_lb_nobusyg, v16);
> -DOMAIN_FIELD(__u32, idle_lb_count, v16);
> -DOMAIN_FIELD(__u32, idle_lb_balanced, v16);
> -DOMAIN_FIELD(__u32, idle_lb_failed, v16);
> -DOMAIN_FIELD(__u32, idle_lb_imbalance, v16);
> -DOMAIN_FIELD(__u32, idle_lb_gained, v16);
> -DOMAIN_FIELD(__u32, idle_lb_hot_gained, v16);
> -DOMAIN_FIELD(__u32, idle_lb_nobusyq, v16);
> -DOMAIN_FIELD(__u32, idle_lb_nobusyg, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_count, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_balanced, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_failed, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_imbalance, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_gained, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v16);
> -DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v16);
> -DOMAIN_FIELD(__u32, alb_count, v16);
> -DOMAIN_FIELD(__u32, alb_failed, v16);
> -DOMAIN_FIELD(__u32, alb_pushed, v16);
> -DOMAIN_FIELD(__u32, sbe_count, v16);
> -DOMAIN_FIELD(__u32, sbe_balanced, v16);
> -DOMAIN_FIELD(__u32, sbe_pushed, v16);
> -DOMAIN_FIELD(__u32, sbf_count, v16);
> -DOMAIN_FIELD(__u32, sbf_balanced, v16);
> -DOMAIN_FIELD(__u32, sbf_pushed, v16);
> -DOMAIN_FIELD(__u32, ttwu_wake_remote, v16);
> -DOMAIN_FIELD(__u32, ttwu_move_affine, v16);
> -DOMAIN_FIELD(__u32, ttwu_move_balance, v16);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category busy> ");
> +#endif
> +DOMAIN_FIELD(__u32, busy_lb_count,
> + "load_balance() count on cpu busy", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, busy_lb_balanced,
> + "load_balance() found balanced on cpu busy", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, busy_lb_failed,
> + "load_balance() move task failed on cpu busy", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, busy_lb_imbalance,
> + "imbalance sum on cpu busy", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, busy_lb_gained,
> + "pull_task() count on cpu busy", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, busy_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, busy_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu busy", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, busy_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu busy", "%11u", true, v16);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu busy", "%11u",
> + busy_lb_count, busy_lb_balanced, busy_lb_failed, v16);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
> + busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v16);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category idle> ");
> +#endif
> +DOMAIN_FIELD(__u32, idle_lb_count,
> + "load_balance() count on cpu idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, idle_lb_balanced,
> + "load_balance() found balanced on cpu idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, idle_lb_failed,
> + "load_balance() move task failed on cpu idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, idle_lb_imbalance,
> + "imbalance sum on cpu idle", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, idle_lb_gained,
> + "pull_task() count on cpu idle", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, idle_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, idle_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, idle_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu idle", "%11u", true, v16);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu idle", "%11u",
> + idle_lb_count, idle_lb_balanced, idle_lb_failed, v16);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
> + idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v16);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category newidle> ");
> +#endif
> +DOMAIN_FIELD(__u32, newidle_lb_count,
> + "load_balance() count on cpu newly idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_balanced,
> + "load_balance() found balanced on cpu newly idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_failed,
> + "load_balance() move task failed on cpu newly idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance,
> + "imbalance sum on cpu newly idle", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_gained,
> + "pull_task() count on cpu newly idle", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v16);
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v16);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu newly idle", "%11u",
> + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v16);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
> + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v16);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category active_load_balance()> ");
> +#endif
> +DOMAIN_FIELD(__u32, alb_count,
> + "active_load_balance() count", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, alb_failed,
> + "active_load_balance() move task failed", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, alb_pushed,
> + "active_load_balance() successfully moved a task", "%11u", false, v16);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
> +#endif
> +DOMAIN_FIELD(__u32, sbe_count,
> + "sbe_count is not used", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, sbe_balanced,
> + "sbe_balanced is not used", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, sbe_pushed,
> + "sbe_pushed is not used", "%11u", false, v16);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
> +#endif
> +DOMAIN_FIELD(__u32, sbf_count,
> + "sbf_count is not used", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, sbf_balanced,
> + "sbf_balanced is not used", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, sbf_pushed,
> + "sbf_pushed is not used", "%11u", false, v16);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Wakeup Info> ");
> #endif
> +DOMAIN_FIELD(__u32, ttwu_wake_remote,
> + "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, ttwu_move_affine,
> + "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v16);
> +DOMAIN_FIELD(__u32, ttwu_move_balance,
> + "try_to_wake_up() started passive balancing", "%11u", false, v16);
> +#endif /* DOMAIN_FIELD */
> diff --git a/tools/lib/perf/include/perf/schedstat-v17.h b/tools/lib/perf/include/perf/schedstat-v17.h
> index 851d4f1f4ecb..00009bd5f006 100644
> --- a/tools/lib/perf/include/perf/schedstat-v17.h
> +++ b/tools/lib/perf/include/perf/schedstat-v17.h
> @@ -1,61 +1,160 @@
> /* SPDX-License-Identifier: GPL-2.0 */
>
> #ifdef CPU_FIELD
> -CPU_FIELD(__u32, yld_count, v17);
> -CPU_FIELD(__u32, array_exp, v17);
> -CPU_FIELD(__u32, sched_count, v17);
> -CPU_FIELD(__u32, sched_goidle, v17);
> -CPU_FIELD(__u32, ttwu_count, v17);
> -CPU_FIELD(__u32, ttwu_local, v17);
> -CPU_FIELD(__u64, rq_cpu_time, v17);
> -CPU_FIELD(__u64, run_delay, v17);
> -CPU_FIELD(__u64, pcount, v17);
> -#endif
> +CPU_FIELD(__u32, yld_count, "sched_yield() count",
> + "%11u", false, yld_count, v17);
> +CPU_FIELD(__u32, array_exp, "Legacy counter can be ignored",
> + "%11u", false, array_exp, v17);
> +CPU_FIELD(__u32, sched_count, "schedule() called",
> + "%11u", false, sched_count, v17);
> +CPU_FIELD(__u32, sched_goidle, "schedule() left the processor idle",
> + "%11u", true, sched_count, v17);
> +CPU_FIELD(__u32, ttwu_count, "try_to_wake_up() was called",
> + "%11u", false, ttwu_count, v17);
> +CPU_FIELD(__u32, ttwu_local, "try_to_wake_up() was called to wake up the local cpu",
> + "%11u", true, ttwu_count, v17);
> +CPU_FIELD(__u64, rq_cpu_time, "total runtime by tasks on this processor (in jiffies)",
> + "%11llu", false, rq_cpu_time, v17);
> +CPU_FIELD(__u64, run_delay, "total waittime by tasks on this processor (in jiffies)",
> + "%11llu", true, rq_cpu_time, v17);
> +CPU_FIELD(__u64, pcount, "total timeslices run on this cpu",
> + "%11llu", false, pcount, v17);
> +#endif /* CPU_FIELD */
>
> #ifdef DOMAIN_FIELD
> -DOMAIN_FIELD(__u32, busy_lb_count, v17);
> -DOMAIN_FIELD(__u32, busy_lb_balanced, v17);
> -DOMAIN_FIELD(__u32, busy_lb_failed, v17);
> -DOMAIN_FIELD(__u32, busy_lb_imbalance_load, v17);
> -DOMAIN_FIELD(__u32, busy_lb_imbalance_util, v17);
> -DOMAIN_FIELD(__u32, busy_lb_imbalance_task, v17);
> -DOMAIN_FIELD(__u32, busy_lb_imbalance_misfit, v17);
> -DOMAIN_FIELD(__u32, busy_lb_gained, v17);
> -DOMAIN_FIELD(__u32, busy_lb_hot_gained, v17);
> -DOMAIN_FIELD(__u32, busy_lb_nobusyq, v17);
> -DOMAIN_FIELD(__u32, busy_lb_nobusyg, v17);
> -DOMAIN_FIELD(__u32, idle_lb_count, v17);
> -DOMAIN_FIELD(__u32, idle_lb_balanced, v17);
> -DOMAIN_FIELD(__u32, idle_lb_failed, v17);
> -DOMAIN_FIELD(__u32, idle_lb_imbalance_load, v17);
> -DOMAIN_FIELD(__u32, idle_lb_imbalance_util, v17);
> -DOMAIN_FIELD(__u32, idle_lb_imbalance_task, v17);
> -DOMAIN_FIELD(__u32, idle_lb_imbalance_misfit, v17);
> -DOMAIN_FIELD(__u32, idle_lb_gained, v17);
> -DOMAIN_FIELD(__u32, idle_lb_hot_gained, v17);
> -DOMAIN_FIELD(__u32, idle_lb_nobusyq, v17);
> -DOMAIN_FIELD(__u32, idle_lb_nobusyg, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_count, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_balanced, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_failed, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_imbalance_load, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_imbalance_util, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_imbalance_task, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_imbalance_misfit, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_gained, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v17);
> -DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v17);
> -DOMAIN_FIELD(__u32, alb_count, v17);
> -DOMAIN_FIELD(__u32, alb_failed, v17);
> -DOMAIN_FIELD(__u32, alb_pushed, v17);
> -DOMAIN_FIELD(__u32, sbe_count, v17);
> -DOMAIN_FIELD(__u32, sbe_balanced, v17);
> -DOMAIN_FIELD(__u32, sbe_pushed, v17);
> -DOMAIN_FIELD(__u32, sbf_count, v17);
> -DOMAIN_FIELD(__u32, sbf_balanced, v17);
> -DOMAIN_FIELD(__u32, sbf_pushed, v17);
> -DOMAIN_FIELD(__u32, ttwu_wake_remote, v17);
> -DOMAIN_FIELD(__u32, ttwu_move_affine, v17);
> -DOMAIN_FIELD(__u32, ttwu_move_balance, v17);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category busy> ");
> +#endif
> +DOMAIN_FIELD(__u32, busy_lb_count,
> + "load_balance() count on cpu busy", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, busy_lb_balanced,
> + "load_balance() found balanced on cpu busy", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, busy_lb_failed,
> + "load_balance() move task failed on cpu busy", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, busy_lb_imbalance_load,
> + "imbalance in load on cpu busy", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, busy_lb_imbalance_util,
> + "imbalance in utilization on cpu busy", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, busy_lb_imbalance_task,
> + "imbalance in number of tasks on cpu busy", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, busy_lb_imbalance_misfit,
> + "imbalance in misfit tasks on cpu busy", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, busy_lb_gained,
> + "pull_task() count on cpu busy", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, busy_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu busy", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, busy_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu busy", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, busy_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu busy", "%11u", true, v17);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu busy", "%11u",
> + busy_lb_count, busy_lb_balanced, busy_lb_failed, v17);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu busy)", "%11.2Lf",
> + busy_lb_count, busy_lb_balanced, busy_lb_failed, busy_lb_gained, v17);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category idle> ");
> +#endif
> +DOMAIN_FIELD(__u32, idle_lb_count,
> + "load_balance() count on cpu idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, idle_lb_balanced,
> + "load_balance() found balanced on cpu idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, idle_lb_failed,
> + "load_balance() move task failed on cpu idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, idle_lb_imbalance_load,
> + "imbalance in load on cpu idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, idle_lb_imbalance_util,
> + "imbalance in utilization on cpu idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, idle_lb_imbalance_task,
> + "imbalance in number of tasks on cpu idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, idle_lb_imbalance_misfit,
> + "imbalance in misfit tasks on cpu idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, idle_lb_gained,
> + "pull_task() count on cpu idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, idle_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, idle_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, idle_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu idle", "%11u", true, v17);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu idle", "%11u",
> + idle_lb_count, idle_lb_balanced, idle_lb_failed, v17);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu idle)", "%11.2Lf",
> + idle_lb_count, idle_lb_balanced, idle_lb_failed, idle_lb_gained, v17);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category newidle> ");
> +#endif
> +DOMAIN_FIELD(__u32, newidle_lb_count,
> + "load_balance() count on cpu newly idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_balanced,
> + "load_balance() found balanced on cpu newly idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_failed,
> + "load_balance() move task failed on cpu newly idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance_load,
> + "imbalance in load on cpu newly idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance_util,
> + "imbalance in utilization on cpu newly idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance_task,
> + "imbalance in number of tasks on cpu newly idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance_misfit,
> + "imbalance in misfit tasks on cpu newly idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_gained,
> + "pull_task() count on cpu newly idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_hot_gained,
> + "pull_task() when target task was cache-hot on cpu newly idle", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyq,
> + "load_balance() failed to find busier queue on cpu newly idle", "%11u", true, v17);
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyg,
> + "load_balance() failed to find busier group on cpu newly idle", "%11u", true, v17);
> +#ifdef DERIVED_CNT_FIELD
> +DERIVED_CNT_FIELD("load_balance() success count on cpu newly idle", "%11u",
> + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, v17);
> +#endif
> +#ifdef DERIVED_AVG_FIELD
> +DERIVED_AVG_FIELD("avg task pulled per successful lb attempt (cpu newly idle)", "%11.2Lf",
> + newidle_lb_count, newidle_lb_balanced, newidle_lb_failed, newidle_lb_gained, v17);
> +#endif
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category active_load_balance()> ");
> +#endif
> +DOMAIN_FIELD(__u32, alb_count,
> + "active_load_balance() count", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, alb_failed,
> + "active_load_balance() move task failed", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, alb_pushed,
> + "active_load_balance() successfully moved a task", "%11u", false, v17);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category sched_balance_exec()> ");
> +#endif
> +DOMAIN_FIELD(__u32, sbe_count,
> + "sbe_count is not used", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, sbe_balanced,
> + "sbe_balanced is not used", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, sbe_pushed,
> + "sbe_pushed is not used", "%11u", false, v17);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Category sched_balance_fork()> ");
> +#endif
> +DOMAIN_FIELD(__u32, sbf_count,
> + "sbf_count is not used", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, sbf_balanced,
> + "sbf_balanced is not used", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, sbf_pushed,
> + "sbf_pushed is not used", "%11u", false, v17);
> +#ifdef DOMAIN_CATEGORY
> +DOMAIN_CATEGORY(" <Wakeup Info> ");
> #endif
> +DOMAIN_FIELD(__u32, ttwu_wake_remote,
> + "try_to_wake_up() awoke a task that last ran on a diff cpu", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, ttwu_move_affine,
> + "try_to_wake_up() moved task because cache-cold on own cpu", "%11u", false, v17);
> +DOMAIN_FIELD(__u32, ttwu_move_balance,
> + "try_to_wake_up() started passive balancing", "%11u", false, v17);
> +#endif /* DOMAIN_FIELD */
Probably better to put in the previous commits.
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 1c3b56013164..e2e7dbc4f0aa 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -3869,6 +3869,501 @@ static int perf_sched__schedstat_record(struct perf_sched *sched,
> return err;
> }
>
> +struct schedstat_domain {
> + struct perf_record_schedstat_domain *domain_data;
> + struct schedstat_domain *next;
> +};
> +
> +struct schedstat_cpu {
> + struct perf_record_schedstat_cpu *cpu_data;
> + struct schedstat_domain *domain_head;
> + struct schedstat_cpu *next;
> +};
> +
> +struct schedstat_cpu *cpu_head = NULL, *cpu_tail = NULL, *cpu_second_pass = NULL;
> +struct schedstat_domain *domain_tail = NULL, *domain_second_pass = NULL;
No need to reset to NULL. Also please add some comments how those
structs and lists are used.
> +bool after_workload_flag;
> +
> +static void store_schedtstat_cpu_diff(struct schedstat_cpu *after_workload)
> +{
> + struct perf_record_schedstat_cpu *before = cpu_second_pass->cpu_data;
> + struct perf_record_schedstat_cpu *after = after_workload->cpu_data;
> + __u16 version = after_workload->cpu_data->version;
> +
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
> + (before->_ver._name = after->_ver._name - before->_ver._name)
> +
> + if (version == 15) {
> +#include <perf/schedstat-v15.h>
> + } else if (version == 16) {
> +#include <perf/schedstat-v16.h>
> + } else if (version == 17) {
> +#include <perf/schedstat-v17.h>
> + }
> +
> +#undef CPU_FIELD
> +}
> +
> +static void store_schedstat_domain_diff(struct schedstat_domain *after_workload)
> +{
> + struct perf_record_schedstat_domain *before = domain_second_pass->domain_data;
> + struct perf_record_schedstat_domain *after = after_workload->domain_data;
> + __u16 version = after_workload->domain_data->version;
> +
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
> + (before->_ver._name = after->_ver._name - before->_ver._name)
> +
> + if (version == 15) {
> +#include <perf/schedstat-v15.h>
> + } else if (version == 16) {
> +#include <perf/schedstat-v16.h>
> + } else if (version == 17) {
> +#include <perf/schedstat-v17.h>
> + }
> +#undef DOMAIN_FIELD
> +}
> +
> +static void print_separator(size_t pre_dash_cnt, const char *s, size_t post_dash_cnt)
> +{
> + size_t i;
> +
> + for (i = 0; i < pre_dash_cnt; ++i)
> + printf("-");
> +
> + printf("%s", s);
> +
> + for (i = 0; i < post_dash_cnt; ++i)
> + printf("-");
> +
> + printf("\n");
This can be simplified:
printf("%.*s%s%.*s\n", pre_dash_cnt, graph_dotted_line, s,
post_dash_cnt, graph_dotted_line);
> +}
> +
> +static inline void print_cpu_stats(struct perf_record_schedstat_cpu *cs)
> +{
> + printf("%-65s %12s %12s\n", "DESC", "COUNT", "PCT_CHANGE");
> + print_separator(100, "", 0);
printf("%.*s\n", 100, graph_dotted_line);
You can define a macro for the length (100) as it's used in other places
too.
> +
> +#define CALC_PCT(_x, _y) ((_y) ? ((double)(_x) / (_y)) * 100 : 0.0)
> +
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
> + do { \
> + printf("%-65s: " _format, _desc, cs->_ver._name); \
> + if (_is_pct) { \
> + printf(" ( %8.2lf%% )", \
> + CALC_PCT(cs->_ver._name, cs->_ver._pct_of)); \
> + } \
> + printf("\n"); \
> + } while (0)
> +
> + if (cs->version == 15) {
> +#include <perf/schedstat-v15.h>
> + } else if (cs->version == 16) {
> +#include <perf/schedstat-v16.h>
> + } else if (cs->version == 17) {
> +#include <perf/schedstat-v17.h>
> + }
> +
> +#undef CPU_FIELD
> +#undef CALC_PCT
> +}
> +
> +static inline void print_domain_stats(struct perf_record_schedstat_domain *ds,
> + __u64 jiffies)
> +{
> + printf("%-65s %12s %14s\n", "DESC", "COUNT", "AVG_JIFFIES");
> +
> +#define DOMAIN_CATEGORY(_desc) \
> + do { \
> + size_t _len = strlen(_desc); \
> + size_t _pre_dash_cnt = (100 - _len) / 2; \
> + size_t _post_dash_cnt = 100 - _len - _pre_dash_cnt; \
> + print_separator(_pre_dash_cnt, _desc, _post_dash_cnt); \
This can be useful in other places, can you please factor it out as a
function somewhere in util.c?
> + } while (0)
> +
> +#define CALC_AVG(_x, _y) ((_y) ? (long double)(_x) / (_y) : 0.0)
> +
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
> + do { \
> + printf("%-65s: " _format, _desc, ds->_ver._name); \
> + if (_is_jiffies) { \
> + printf(" $ %11.2Lf $", \
> + CALC_AVG(jiffies, ds->_ver._name)); \
> + } \
> + printf("\n"); \
> + } while (0)
> +
> +#define DERIVED_CNT_FIELD(_desc, _format, _x, _y, _z, _ver) \
> + printf("*%-64s: " _format "\n", _desc, \
> + (ds->_ver._x) - (ds->_ver._y) - (ds->_ver._z))
> +
> +#define DERIVED_AVG_FIELD(_desc, _format, _x, _y, _z, _w, _ver) \
> + printf("*%-64s: " _format "\n", _desc, CALC_AVG(ds->_ver._w, \
> + ((ds->_ver._x) - (ds->_ver._y) - (ds->_ver._z))))
> +
> + if (ds->version == 15) {
> +#include <perf/schedstat-v15.h>
> + } else if (ds->version == 16) {
> +#include <perf/schedstat-v16.h>
> + } else if (ds->version == 17) {
> +#include <perf/schedstat-v17.h>
> + }
> +
> +#undef DERIVED_AVG_FIELD
> +#undef DERIVED_CNT_FIELD
> +#undef DOMAIN_FIELD
> +#undef CALC_AVG
> +#undef DOMAIN_CATEGORY
> +}
> +
> +static void print_domain_cpu_list(struct perf_record_schedstat_domain *ds)
> +{
> + char bin[16][5] = {"0000", "0001", "0010", "0011",
> + "0100", "0101", "0110", "0111",
> + "1000", "1001", "1010", "1011",
> + "1100", "1101", "1110", "1111"};
> + bool print_flag = false, low = true;
> + int cpu = 0, start, end, idx;
> +
> + idx = ((ds->nr_cpus + 7) >> 3) - 1;
> +
> + printf("<");
> + while (idx >= 0) {
> + __u8 index;
> +
> + if (low)
> + index = ds->cpu_mask[idx] & 0xf;
> + else
> + index = (ds->cpu_mask[idx--] & 0xf0) >> 4;
Isn't ds->cpu_mask a bitmap? Can we use bitmap_scnprintf() or
something?
> +
> + for (int i = 3; i >= 0; i--) {
> + if (!print_flag && bin[index][i] == '1') {
> + start = cpu;
> + print_flag = true;
> + } else if (print_flag && bin[index][i] == '0') {
> + end = cpu - 1;
> + print_flag = false;
> + if (start == end)
> + printf("%d, ", start);
> + else
> + printf("%d-%d, ", start, end);
> + }
> + cpu++;
> + }
> +
> + low = !low;
> + }
> +
> + if (print_flag) {
> + if (start == cpu-1)
> + printf("%d, ", start);
> + else
> + printf("%d-%d, ", start, cpu-1);
> + }
> + printf("\b\b>\n");
> +}
> +
> +static void summarize_schedstat_cpu(struct schedstat_cpu *summary_cpu,
> + struct schedstat_cpu *cptr,
> + int cnt, bool is_last)
> +{
> + struct perf_record_schedstat_cpu *summary_cs = summary_cpu->cpu_data,
> + *temp_cs = cptr->cpu_data;
> +
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
> + do { \
> + summary_cs->_ver._name += temp_cs->_ver._name; \
> + if (is_last) \
> + summary_cs->_ver._name /= cnt; \
> + } while (0)
> +
> + if (cptr->cpu_data->version == 15) {
> +#include <perf/schedstat-v15.h>
> + } else if (cptr->cpu_data->version == 16) {
> +#include <perf/schedstat-v16.h>
> + } else if (cptr->cpu_data->version == 17) {
> +#include <perf/schedstat-v17.h>
> + }
> +#undef CPU_FIELD
> +}
> +
> +static void summarize_schedstat_domain(struct schedstat_domain *summary_domain,
> + struct schedstat_domain *dptr,
> + int cnt, bool is_last)
> +{
> + struct perf_record_schedstat_domain *summary_ds = summary_domain->domain_data,
> + *temp_ds = dptr->domain_data;
> +
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
> + do { \
> + summary_ds->_ver._name += temp_ds->_ver._name; \
> + if (is_last) \
> + summary_ds->_ver._name /= cnt; \
> + } while (0)
> +
> + if (dptr->domain_data->version == 15) {
> +#include <perf/schedstat-v15.h>
> + } else if (dptr->domain_data->version == 16) {
> +#include <perf/schedstat-v16.h>
> + } else if (dptr->domain_data->version == 17) {
> +#include <perf/schedstat-v17.h>
> + }
> +#undef DOMAIN_FIELD
> +}
> +
> +static void get_all_cpu_stats(struct schedstat_cpu **cptr)
> +{
> + struct schedstat_domain *dptr = NULL, *tdptr = NULL, *dtail = NULL;
> + struct schedstat_cpu *tcptr = *cptr, *summary_head = NULL;
> + struct perf_record_schedstat_domain *ds = NULL;
> + struct perf_record_schedstat_cpu *cs = NULL;
> + bool is_last = false;
> + int cnt = 0;
> +
> + if (tcptr) {
> + summary_head = zalloc(sizeof(*summary_head));
> + summary_head->cpu_data = zalloc(sizeof(*cs));
No error handlings.
> + memcpy(summary_head->cpu_data, tcptr->cpu_data, sizeof(*cs));
> + summary_head->next = NULL;
> + summary_head->domain_head = NULL;
> + dptr = tcptr->domain_head;
> +
> + while (dptr) {
> + size_t cpu_mask_size = (dptr->domain_data->nr_cpus + 7) >> 3;
> +
> + tdptr = zalloc(sizeof(*tdptr));
> + tdptr->domain_data = zalloc(sizeof(*ds) + cpu_mask_size);
Ditto.
> + memcpy(tdptr->domain_data, dptr->domain_data, sizeof(*ds) + cpu_mask_size);
> +
> + tdptr->next = NULL;
> + if (!dtail) {
> + summary_head->domain_head = tdptr;
> + dtail = tdptr;
> + } else {
> + dtail->next = tdptr;
> + dtail = dtail->next;
> + }
> + dptr = dptr->next;
Hmm.. can we just use list_head?
> + }
> + }
> +
> + tcptr = (*cptr)->next;
> + while (tcptr) {
> + if (!tcptr->next)
> + is_last = true;
> +
> + cnt++;
> + summarize_schedstat_cpu(summary_head, tcptr, cnt, is_last);
> + tdptr = summary_head->domain_head;
> + dptr = tcptr->domain_head;
> +
> + while (tdptr) {
> + summarize_schedstat_domain(tdptr, dptr, cnt, is_last);
> + tdptr = tdptr->next;
> + dptr = dptr->next;
> + }
> + tcptr = tcptr->next;
> + }
> +
> + tcptr = *cptr;
> + summary_head->next = tcptr;
> + *cptr = summary_head;
> +}
> +
> +/* FIXME: The code fails (segfaults) when one or ore cpus are offline. */
Sounds scary.. Do you have any clue?
> +static void show_schedstat_data(struct schedstat_cpu *cptr)
> +{
> + struct perf_record_schedstat_domain *ds = NULL;
> + struct perf_record_schedstat_cpu *cs = NULL;
> + __u64 jiffies = cptr->cpu_data->timestamp;
> + struct schedstat_domain *dptr = NULL;
> + bool is_summary = true;
> +
> + printf("Columns description\n");
> + print_separator(100, "", 0);
> + printf("DESC\t\t\t-> Description of the field\n");
> + printf("COUNT\t\t\t-> Value of the field\n");
> + printf("PCT_CHANGE\t\t-> Percent change with corresponding base value\n");
> + printf("AVG_JIFFIES\t\t-> Avg time in jiffies between two consecutive occurrence of event\n");
> +
> + print_separator(100, "", 0);
> + printf("Time elapsed (in jiffies) : %11llu\n",
Probably better to use printf("%-*s: %11llu\n", ...).
> + jiffies);
> + print_separator(100, "", 0);
> +
> + get_all_cpu_stats(&cptr);
> +
> + while (cptr) {
> + cs = cptr->cpu_data;
> + printf("\n");
> + print_separator(100, "", 0);
> + if (is_summary)
> + printf("CPU <ALL CPUS SUMMARY>\n");
> + else
> + printf("CPU %d\n", cs->cpu);
> +
> + print_separator(100, "", 0);
> + print_cpu_stats(cs);
> + print_separator(100, "", 0);
> +
> + dptr = cptr->domain_head;
> +
> + while (dptr) {
> + ds = dptr->domain_data;
> + if (is_summary)
> + if (ds->name[0])
> + printf("CPU <ALL CPUS SUMMARY>, DOMAIN %s\n", ds->name);
> + else
> + printf("CPU <ALL CPUS SUMMARY>, DOMAIN %d\n", ds->domain);
> + else {
> + if (ds->name[0])
> + printf("CPU %d, DOMAIN %s CPUS ", cs->cpu, ds->name);
> + else
> + printf("CPU %d, DOMAIN %d CPUS ", cs->cpu, ds->domain);
> +
> + print_domain_cpu_list(ds);
> + }
> + print_separator(100, "", 0);
> + print_domain_stats(ds, jiffies);
> + print_separator(100, "", 0);
> +
> + dptr = dptr->next;
> + }
> + is_summary = false;
> + cptr = cptr->next;
> + }
> +}
> +
> +static int perf_sched__process_schedstat(struct perf_session *session __maybe_unused,
> + union perf_event *event)
> +{
> + struct perf_cpu this_cpu;
> + static __u32 initial_cpu;
> +
> + switch (event->header.type) {
> + case PERF_RECORD_SCHEDSTAT_CPU:
> + this_cpu.cpu = event->schedstat_cpu.cpu;
> + break;
> + case PERF_RECORD_SCHEDSTAT_DOMAIN:
> + this_cpu.cpu = event->schedstat_domain.cpu;
> + break;
> + default:
> + return 0;
> + }
> +
> + if (user_requested_cpus && !perf_cpu_map__has(user_requested_cpus, this_cpu))
> + return 0;
> +
> + if (event->header.type == PERF_RECORD_SCHEDSTAT_CPU) {
> + struct schedstat_cpu *temp = zalloc(sizeof(struct schedstat_cpu));
> +
> + temp->cpu_data = zalloc(sizeof(struct perf_record_schedstat_cpu));
No error checks.
> + memcpy(temp->cpu_data, &event->schedstat_cpu,
> + sizeof(struct perf_record_schedstat_cpu));
> + temp->next = NULL;
> + temp->domain_head = NULL;
> +
> + if (cpu_head && temp->cpu_data->cpu == initial_cpu)
> + after_workload_flag = true;
> +
> + if (!after_workload_flag) {
> + if (!cpu_head) {
> + initial_cpu = temp->cpu_data->cpu;
> + cpu_head = temp;
> + } else
> + cpu_tail->next = temp;
> +
> + cpu_tail = temp;
> + } else {
> + if (temp->cpu_data->cpu == initial_cpu) {
> + cpu_second_pass = cpu_head;
> + cpu_head->cpu_data->timestamp =
> + temp->cpu_data->timestamp - cpu_second_pass->cpu_data->timestamp;
> + } else {
> + cpu_second_pass = cpu_second_pass->next;
> + }
> + domain_second_pass = cpu_second_pass->domain_head;
> + store_schedtstat_cpu_diff(temp);
Is 'temp' used after this?
> + }
> + } else if (event->header.type == PERF_RECORD_SCHEDSTAT_DOMAIN) {
> + size_t cpu_mask_size = (event->schedstat_domain.nr_cpus + 7) >> 3;
> + struct schedstat_domain *temp = zalloc(sizeof(struct schedstat_domain));
> +
> + temp->domain_data = zalloc(sizeof(struct perf_record_schedstat_domain) + cpu_mask_size);
No error checks.
> + memcpy(temp->domain_data, &event->schedstat_domain,
> + sizeof(struct perf_record_schedstat_domain) + cpu_mask_size);
> + temp->next = NULL;
> +
> + if (!after_workload_flag) {
> + if (cpu_tail->domain_head == NULL) {
> + cpu_tail->domain_head = temp;
> + domain_tail = temp;
> + } else {
> + domain_tail->next = temp;
> + domain_tail = temp;
> + }
> + } else {
> + store_schedstat_domain_diff(temp);
> + domain_second_pass = domain_second_pass->next;
Is 'temp' leaking?
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void free_schedstat(struct schedstat_cpu *cptr)
> +{
> + struct schedstat_domain *dptr = NULL, *tmp_dptr;
> + struct schedstat_cpu *tmp_cptr;
> +
> + while (cptr) {
> + tmp_cptr = cptr;
> + dptr = cptr->domain_head;
> +
> + while (dptr) {
> + tmp_dptr = dptr;
> + dptr = dptr->next;
> + free(tmp_dptr);
> + }
> + cptr = cptr->next;
> + free(tmp_cptr);
> + }
> +}
> +
> +static int perf_sched__schedstat_report(struct perf_sched *sched)
> +{
> + struct perf_session *session;
> + struct perf_data data = {
> + .path = input_name,
> + .mode = PERF_DATA_MODE_READ,
> + };
> + int err;
> +
> + if (cpu_list) {
> + user_requested_cpus = perf_cpu_map__new(cpu_list);
> + if (!user_requested_cpus)
> + return -EINVAL;
> + }
> +
> + sched->tool.schedstat_cpu = perf_sched__process_schedstat;
> + sched->tool.schedstat_domain = perf_sched__process_schedstat;
> +
> + session = perf_session__new(&data, &sched->tool);
> + if (IS_ERR(session)) {
> + pr_err("Perf session creation failed.\n");
> + return PTR_ERR(session);
> + }
> +
> + err = perf_session__process_events(session);
> +
> + perf_session__delete(session);
Quite unusual location to do this. :) Probably better to call it after
finishing the actual logic as you might need some session data later.
> + if (!err) {
> + setup_pager();
> + show_schedstat_data(cpu_head);
> + free_schedstat(cpu_head);
> + }
perf_cpu_map__put(user_requested_cpus);
> + return err;
> +}
> +
> static bool schedstat_events_exposed(void)
> {
> /*
> @@ -4046,6 +4541,8 @@ int cmd_sched(int argc, const char **argv)
> OPT_PARENT(sched_options)
> };
> const struct option stats_options[] = {
> + OPT_STRING('i', "input", &input_name, "file",
> + "`stats report` with input filename"),
> OPT_STRING('o', "output", &output_name, "file",
> "`stats record` with output filename"),
> OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
> @@ -4171,7 +4668,7 @@ int cmd_sched(int argc, const char **argv)
>
> return perf_sched__timehist(&sched);
> } else if (!strcmp(argv[0], "stats")) {
> - const char *const stats_subcommands[] = {"record", NULL};
> + const char *const stats_subcommands[] = {"record", "report", NULL};
>
> argc = parse_options_subcommand(argc, argv, stats_options,
> stats_subcommands,
> @@ -4183,6 +4680,11 @@ int cmd_sched(int argc, const char **argv)
> argc = parse_options(argc, argv, stats_options,
> stats_usage, 0);
> return perf_sched__schedstat_record(&sched, argc, argv);
> + } else if (argv[0] && !strcmp(argv[0], "report")) {
> + if (argc)
> + argc = parse_options(argc, argv, stats_options,
> + stats_usage, 0);
> + return perf_sched__schedstat_report(&sched);
> }
> usage_with_options(stats_usage, stats_options);
> } else {
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index d09c3c99ab48..4071bd95192d 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -560,7 +560,7 @@ size_t perf_event__fprintf_schedstat_cpu(union perf_event *event, FILE *fp)
>
> size = fprintf(fp, "\ncpu%u ", cs->cpu);
>
> -#define CPU_FIELD(_type, _name, _ver) \
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
> size += fprintf(fp, "%" PRIu64 " ", (unsigned long)cs->_ver._name)
>
> if (version == 15) {
> @@ -641,7 +641,7 @@ size_t perf_event__fprintf_schedstat_domain(union perf_event *event, FILE *fp)
> size += fprintf(fp, "%s ", cpu_mask);
> free(cpu_mask);
>
> -#define DOMAIN_FIELD(_type, _name, _ver) \
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
> size += fprintf(fp, "%" PRIu64 " ", (unsigned long)ds->_ver._name)
>
> if (version == 15) {
> diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
> index fad0c472f297..495ed8433c0c 100644
> --- a/tools/perf/util/synthetic-events.c
> +++ b/tools/perf/util/synthetic-events.c
> @@ -2538,7 +2538,7 @@ static union perf_event *__synthesize_schedstat_cpu(struct io *io, __u16 version
> if (io__get_dec(io, (__u64 *)cpu) != ' ')
> goto out_cpu;
>
> -#define CPU_FIELD(_type, _name, _ver) \
> +#define CPU_FIELD(_type, _name, _desc, _format, _is_pct, _pct_of, _ver) \
> do { \
> __u64 _tmp; \
> ch = io__get_dec(io, &_tmp); \
> @@ -2662,7 +2662,7 @@ static union perf_event *__synthesize_schedstat_domain(struct io *io, __u16 vers
> free(d_name);
> free(cpu_mask);
>
> -#define DOMAIN_FIELD(_type, _name, _ver) \
> +#define DOMAIN_FIELD(_type, _name, _desc, _format, _is_jiffies, _ver) \
> do { \
> __u64 _tmp; \
> ch = io__get_dec(io, &_tmp); \
> --
> 2.43.0
>
Powered by blists - more mailing lists