[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZvT7VkSUpNFKfqge@google.com>
Date: Wed, 25 Sep 2024 23:12:38 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Ravi Bangoria <ravi.bangoria@....com>
Cc: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
irogers@...gle.com, swapnil.sapkal@....com, yu.c.chen@...el.com,
mark.rutland@....com, alexander.shishkin@...ux.intel.com,
jolsa@...nel.org, rostedt@...dmis.org, vincent.guittot@...aro.org,
bristot@...hat.com, adrian.hunter@...el.com, james.clark@....com,
kan.liang@...ux.intel.com, gautham.shenoy@....com,
kprateek.nayak@....com, juri.lelli@...hat.com,
yangjihong@...edance.com, void@...ifault.com, tj@...nel.org,
linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
santosh.shukla@....com, ananth.narayan@....com,
sandipan.das@....com
Subject: Re: [PATCH 2/5] perf sched stats: Add record and rawdump support
On Mon, Sep 16, 2024 at 04:47:19PM +0000, Ravi Bangoria wrote:
> From: Swapnil Sapkal <swapnil.sapkal@....com>
>
> Define new, perf tool only, sample types and their layouts. Add logic
> to parse /proc/schedstat, convert it to perf sample format and save
> samples to perf.data file with `perf sched stats record` command. Also
> add logic to read perf.data file, interpret schedstat samples and
> print rawdump of samples with `perf script -D`.
>
> Note that, /proc/schedstat file output is standardized with version
> number. The patch supports v15 but older or newer version can be added
> easily.
>
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
> Co-developed-by: Ravi Bangoria <ravi.bangoria@....com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
> ---
> tools/lib/perf/Documentation/libperf.txt | 2 +
> tools/lib/perf/Makefile | 2 +-
> tools/lib/perf/include/perf/event.h | 42 +++
> .../lib/perf/include/perf/schedstat-cpu-v15.h | 13 +
> .../perf/include/perf/schedstat-domain-v15.h | 40 +++
> tools/perf/builtin-inject.c | 2 +
> tools/perf/builtin-sched.c | 222 +++++++++++++++-
> tools/perf/util/event.c | 98 +++++++
> tools/perf/util/event.h | 2 +
> tools/perf/util/session.c | 20 ++
> tools/perf/util/synthetic-events.c | 249 ++++++++++++++++++
> tools/perf/util/synthetic-events.h | 3 +
> tools/perf/util/tool.c | 20 ++
> tools/perf/util/tool.h | 4 +-
> 14 files changed, 716 insertions(+), 3 deletions(-)
> create mode 100644 tools/lib/perf/include/perf/schedstat-cpu-v15.h
> create mode 100644 tools/lib/perf/include/perf/schedstat-domain-v15.h
>
> diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
> index fcfb9499ef9c..39c78682ad2e 100644
> --- a/tools/lib/perf/Documentation/libperf.txt
> +++ b/tools/lib/perf/Documentation/libperf.txt
> @@ -211,6 +211,8 @@ SYNOPSIS
> struct perf_record_time_conv;
> struct perf_record_header_feature;
> struct perf_record_compressed;
> + struct perf_record_schedstat_cpu;
> + struct perf_record_schedstat_domain;
> --
>
> DESCRIPTION
> diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
> index 3a9b2140aa04..ebbfea891a6a 100644
> --- a/tools/lib/perf/Makefile
> +++ b/tools/lib/perf/Makefile
> @@ -187,7 +187,7 @@ install_lib: libs
> $(call do_install_mkdir,$(libdir_SQ)); \
> cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
>
> -HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
> +HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h schedstat-cpu-v15.h schedstat-domain-v15.h
> INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
>
> INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
> diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> index 37bb7771d914..35be296d68d5 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -457,6 +457,44 @@ struct perf_record_compressed {
> char data[];
> };
>
> +struct perf_record_schedstat_cpu_v15 {
> +#define CPU_FIELD(_type, _name, _ver) _type _name;
> +#include "schedstat-cpu-v15.h"
> +#undef CPU_FIELD
> +};
> +
> +struct perf_record_schedstat_cpu {
> + struct perf_event_header header;
> + __u16 version;
> + __u64 timestamp;
> + __u32 cpu;
Can you change the layout to minimize the paddings? Probably better to
add an explicit field for unused bits.
> + union {
> + struct perf_record_schedstat_cpu_v15 v15;
> + };
> +};
> +
> +struct perf_record_schedstat_domain_v15 {
> +#define DOMAIN_FIELD(_type, _name, _ver) _type _name;
> +#include "schedstat-domain-v15.h"
> +#undef DOMAIN_FIELD
> +};
> +
> +#define DOMAIN_NAME_LEN 16
> +
> +struct perf_record_schedstat_domain {
> + struct perf_event_header header;
> + __u16 version;
> + __u64 timestamp;
> + __u32 cpu;
> + __u16 domain;
Ditto.
> + char name[DOMAIN_NAME_LEN];
> + union {
> + struct perf_record_schedstat_domain_v15 v15;
> + };
> + __u16 nr_cpus;
> + __u8 cpu_mask[];
> +};
> +
> enum perf_user_event_type { /* above any possible kernel type */
> PERF_RECORD_USER_TYPE_START = 64,
> PERF_RECORD_HEADER_ATTR = 64,
> @@ -478,6 +516,8 @@ enum perf_user_event_type { /* above any possible kernel type */
> PERF_RECORD_HEADER_FEATURE = 80,
> PERF_RECORD_COMPRESSED = 81,
> PERF_RECORD_FINISHED_INIT = 82,
> + PERF_RECORD_SCHEDSTAT_CPU = 83,
> + PERF_RECORD_SCHEDSTAT_DOMAIN = 84,
> PERF_RECORD_HEADER_MAX
> };
>
> @@ -518,6 +558,8 @@ union perf_event {
> struct perf_record_time_conv time_conv;
> struct perf_record_header_feature feat;
> struct perf_record_compressed pack;
> + struct perf_record_schedstat_cpu schedstat_cpu;
> + struct perf_record_schedstat_domain schedstat_domain;
> };
>
> #endif /* __LIBPERF_EVENT_H */
> diff --git a/tools/lib/perf/include/perf/schedstat-cpu-v15.h b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
> new file mode 100644
> index 000000000000..8e4355ee3705
> --- /dev/null
> +++ b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifdef CPU_FIELD
> +CPU_FIELD(__u32, yld_count, v15)
> +CPU_FIELD(__u32, array_exp, v15)
> +CPU_FIELD(__u32, sched_count, v15)
> +CPU_FIELD(__u32, sched_goidle, v15)
> +CPU_FIELD(__u32, ttwu_count, v15)
> +CPU_FIELD(__u32, ttwu_local, v15)
> +CPU_FIELD(__u64, rq_cpu_time, v15)
> +CPU_FIELD(__u64, run_delay, v15)
> +CPU_FIELD(__u64, pcount, v15)
> +#endif
Can we have a single schedstat.h containing both CPU fields and domain
fields? You might require users to define the macro always and get rid
of the ifdef condition here.
Also is there any macro magic to handle the version number? I think you
can have the number only (15; without 'v') and compare with input if
needed..
Thanks,
Namhyung
> diff --git a/tools/lib/perf/include/perf/schedstat-domain-v15.h b/tools/lib/perf/include/perf/schedstat-domain-v15.h
> new file mode 100644
> index 000000000000..422e713d617a
> --- /dev/null
> +++ b/tools/lib/perf/include/perf/schedstat-domain-v15.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifdef DOMAIN_FIELD
> +DOMAIN_FIELD(__u32, idle_lb_count, v15)
> +DOMAIN_FIELD(__u32, idle_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, idle_lb_failed, v15)
> +DOMAIN_FIELD(__u32, idle_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, idle_lb_gained, v15)
> +DOMAIN_FIELD(__u32, idle_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, idle_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, idle_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, busy_lb_count, v15)
> +DOMAIN_FIELD(__u32, busy_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, busy_lb_failed, v15)
> +DOMAIN_FIELD(__u32, busy_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, busy_lb_gained, v15)
> +DOMAIN_FIELD(__u32, busy_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, busy_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, busy_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_count, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_failed, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_gained, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, alb_count, v15)
> +DOMAIN_FIELD(__u32, alb_failed, v15)
> +DOMAIN_FIELD(__u32, alb_pushed, v15)
> +DOMAIN_FIELD(__u32, sbe_count, v15)
> +DOMAIN_FIELD(__u32, sbe_balanced, v15)
> +DOMAIN_FIELD(__u32, sbe_pushed, v15)
> +DOMAIN_FIELD(__u32, sbf_count, v15)
> +DOMAIN_FIELD(__u32, sbf_balanced, v15)
> +DOMAIN_FIELD(__u32, sbf_pushed, v15)
> +DOMAIN_FIELD(__u32, ttwu_wake_remote, v15)
> +DOMAIN_FIELD(__u32, ttwu_move_affine, v15)
> +DOMAIN_FIELD(__u32, ttwu_move_balance, v15)
> +#endif /* DOMAIN_FIELD */
Powered by blists - more mailing lists