lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZvT7VkSUpNFKfqge@google.com>
Date: Wed, 25 Sep 2024 23:12:38 -0700
From: Namhyung Kim <namhyung@...nel.org>
To: Ravi Bangoria <ravi.bangoria@....com>
Cc: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
	irogers@...gle.com, swapnil.sapkal@....com, yu.c.chen@...el.com,
	mark.rutland@....com, alexander.shishkin@...ux.intel.com,
	jolsa@...nel.org, rostedt@...dmis.org, vincent.guittot@...aro.org,
	bristot@...hat.com, adrian.hunter@...el.com, james.clark@....com,
	kan.liang@...ux.intel.com, gautham.shenoy@....com,
	kprateek.nayak@....com, juri.lelli@...hat.com,
	yangjihong@...edance.com, void@...ifault.com, tj@...nel.org,
	linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
	santosh.shukla@....com, ananth.narayan@....com,
	sandipan.das@....com
Subject: Re: [PATCH 2/5] perf sched stats: Add record and rawdump support

On Mon, Sep 16, 2024 at 04:47:19PM +0000, Ravi Bangoria wrote:
> From: Swapnil Sapkal <swapnil.sapkal@....com>
> 
> Define new, perf tool only, sample types and their layouts. Add logic
> to parse /proc/schedstat, convert it to perf sample format and save
> samples to perf.data file with `perf sched stats record` command. Also
> add logic to read perf.data file, interpret schedstat samples and
> print rawdump of samples with `perf script -D`.
> 
> Note that, /proc/schedstat file output is standardized with version
> number. The patch supports v15 but older or newer version can be added
> easily.
> 
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@....com>
> Co-developed-by: Ravi Bangoria <ravi.bangoria@....com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
> ---
>  tools/lib/perf/Documentation/libperf.txt      |   2 +
>  tools/lib/perf/Makefile                       |   2 +-
>  tools/lib/perf/include/perf/event.h           |  42 +++
>  .../lib/perf/include/perf/schedstat-cpu-v15.h |  13 +
>  .../perf/include/perf/schedstat-domain-v15.h  |  40 +++
>  tools/perf/builtin-inject.c                   |   2 +
>  tools/perf/builtin-sched.c                    | 222 +++++++++++++++-
>  tools/perf/util/event.c                       |  98 +++++++
>  tools/perf/util/event.h                       |   2 +
>  tools/perf/util/session.c                     |  20 ++
>  tools/perf/util/synthetic-events.c            | 249 ++++++++++++++++++
>  tools/perf/util/synthetic-events.h            |   3 +
>  tools/perf/util/tool.c                        |  20 ++
>  tools/perf/util/tool.h                        |   4 +-
>  14 files changed, 716 insertions(+), 3 deletions(-)
>  create mode 100644 tools/lib/perf/include/perf/schedstat-cpu-v15.h
>  create mode 100644 tools/lib/perf/include/perf/schedstat-domain-v15.h
> 
> diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
> index fcfb9499ef9c..39c78682ad2e 100644
> --- a/tools/lib/perf/Documentation/libperf.txt
> +++ b/tools/lib/perf/Documentation/libperf.txt
> @@ -211,6 +211,8 @@ SYNOPSIS
>    struct perf_record_time_conv;
>    struct perf_record_header_feature;
>    struct perf_record_compressed;
> +  struct perf_record_schedstat_cpu;
> +  struct perf_record_schedstat_domain;
>  --
>  
>  DESCRIPTION
> diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
> index 3a9b2140aa04..ebbfea891a6a 100644
> --- a/tools/lib/perf/Makefile
> +++ b/tools/lib/perf/Makefile
> @@ -187,7 +187,7 @@ install_lib: libs
>  		$(call do_install_mkdir,$(libdir_SQ)); \
>  		cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
>  
> -HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
> +HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h schedstat-cpu-v15.h schedstat-domain-v15.h
>  INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
>  
>  INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
> diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> index 37bb7771d914..35be296d68d5 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -457,6 +457,44 @@ struct perf_record_compressed {
>  	char			 data[];
>  };
>  
> +struct perf_record_schedstat_cpu_v15 {
> +#define CPU_FIELD(_type, _name, _ver)		_type _name;
> +#include "schedstat-cpu-v15.h"
> +#undef CPU_FIELD
> +};
> +
> +struct perf_record_schedstat_cpu {
> +	struct perf_event_header header;
> +	__u16			 version;
> +	__u64			 timestamp;
> +	__u32			 cpu;

Can you change the layout to minimize the paddings?  Probably better to
add an explicit field for unused bits.


> +	union {
> +		struct perf_record_schedstat_cpu_v15 v15;
> +	};
> +};
> +
> +struct perf_record_schedstat_domain_v15 {
> +#define DOMAIN_FIELD(_type, _name, _ver)	_type _name;
> +#include "schedstat-domain-v15.h"
> +#undef DOMAIN_FIELD
> +};
> +
> +#define DOMAIN_NAME_LEN		16
> +
> +struct perf_record_schedstat_domain {
> +	struct perf_event_header header;
> +	__u16			 version;
> +	__u64			 timestamp;
> +	__u32			 cpu;
> +	__u16			 domain;

Ditto.

> +	char			 name[DOMAIN_NAME_LEN];
> +	union {
> +		struct perf_record_schedstat_domain_v15 v15;
> +	};
> +	__u16			 nr_cpus;
> +	__u8			 cpu_mask[];
> +};
> +
>  enum perf_user_event_type { /* above any possible kernel type */
>  	PERF_RECORD_USER_TYPE_START		= 64,
>  	PERF_RECORD_HEADER_ATTR			= 64,
> @@ -478,6 +516,8 @@ enum perf_user_event_type { /* above any possible kernel type */
>  	PERF_RECORD_HEADER_FEATURE		= 80,
>  	PERF_RECORD_COMPRESSED			= 81,
>  	PERF_RECORD_FINISHED_INIT		= 82,
> +	PERF_RECORD_SCHEDSTAT_CPU		= 83,
> +	PERF_RECORD_SCHEDSTAT_DOMAIN		= 84,
>  	PERF_RECORD_HEADER_MAX
>  };
>  
> @@ -518,6 +558,8 @@ union perf_event {
>  	struct perf_record_time_conv		time_conv;
>  	struct perf_record_header_feature	feat;
>  	struct perf_record_compressed		pack;
> +	struct perf_record_schedstat_cpu	schedstat_cpu;
> +	struct perf_record_schedstat_domain	schedstat_domain;
>  };
>  
>  #endif /* __LIBPERF_EVENT_H */
> diff --git a/tools/lib/perf/include/perf/schedstat-cpu-v15.h b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
> new file mode 100644
> index 000000000000..8e4355ee3705
> --- /dev/null
> +++ b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifdef CPU_FIELD
> +CPU_FIELD(__u32, yld_count, v15)
> +CPU_FIELD(__u32, array_exp, v15)
> +CPU_FIELD(__u32, sched_count, v15)
> +CPU_FIELD(__u32, sched_goidle, v15)
> +CPU_FIELD(__u32, ttwu_count, v15)
> +CPU_FIELD(__u32, ttwu_local, v15)
> +CPU_FIELD(__u64, rq_cpu_time, v15)
> +CPU_FIELD(__u64, run_delay, v15)
> +CPU_FIELD(__u64, pcount, v15)
> +#endif

Can we have a single schedstat.h containing both CPU fields and domain
fields?  You might require users to define the macro always and get rid
of the ifdef condition here.

Also is there any macro magic to handle the version number?  I think you
can have the number only (15; without 'v') and compare with input if
needed..

Thanks,
Namhyung


> diff --git a/tools/lib/perf/include/perf/schedstat-domain-v15.h b/tools/lib/perf/include/perf/schedstat-domain-v15.h
> new file mode 100644
> index 000000000000..422e713d617a
> --- /dev/null
> +++ b/tools/lib/perf/include/perf/schedstat-domain-v15.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifdef DOMAIN_FIELD
> +DOMAIN_FIELD(__u32, idle_lb_count, v15)
> +DOMAIN_FIELD(__u32, idle_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, idle_lb_failed, v15)
> +DOMAIN_FIELD(__u32, idle_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, idle_lb_gained, v15)
> +DOMAIN_FIELD(__u32, idle_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, idle_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, idle_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, busy_lb_count, v15)
> +DOMAIN_FIELD(__u32, busy_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, busy_lb_failed, v15)
> +DOMAIN_FIELD(__u32, busy_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, busy_lb_gained, v15)
> +DOMAIN_FIELD(__u32, busy_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, busy_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, busy_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_count, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_failed, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_gained, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, alb_count, v15)
> +DOMAIN_FIELD(__u32, alb_failed, v15)
> +DOMAIN_FIELD(__u32, alb_pushed, v15)
> +DOMAIN_FIELD(__u32, sbe_count, v15)
> +DOMAIN_FIELD(__u32, sbe_balanced, v15)
> +DOMAIN_FIELD(__u32, sbe_pushed, v15)
> +DOMAIN_FIELD(__u32, sbf_count, v15)
> +DOMAIN_FIELD(__u32, sbf_balanced, v15)
> +DOMAIN_FIELD(__u32, sbf_pushed, v15)
> +DOMAIN_FIELD(__u32, ttwu_wake_remote, v15)
> +DOMAIN_FIELD(__u32, ttwu_move_affine, v15)
> +DOMAIN_FIELD(__u32, ttwu_move_balance, v15)
> +#endif /* DOMAIN_FIELD */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ