lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZfyYWcu0E-Pcn6Rf@x1>
Date: Thu, 21 Mar 2024 17:28:09 -0300
From: Arnaldo Carvalho de Melo <acme@...nel.org>
To: Ian Rogers <irogers@...gle.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
	Namhyung Kim <namhyung@...nel.org>,
	Mark Rutland <mark.rutland@....com>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Jiri Olsa <jolsa@...nel.org>,
	Adrian Hunter <adrian.hunter@...el.com>,
	James Clark <james.clark@....com>,
	Athira Rajeev <atrajeev@...ux.vnet.ibm.com>,
	Colin Ian King <colin.i.king@...il.com>,
	Ahelenia ZiemiaƄska <nabijaczleweli@...ijaczleweli.xyz>,
	Leo Yan <leo.yan@...ux.dev>, Song Liu <song@...nel.org>,
	Miguel Ojeda <ojeda@...nel.org>,
	Liam Howlett <liam.howlett@...cle.com>,
	Ilkka Koskinen <ilkka@...amperecomputing.com>,
	Ben Gainey <ben.gainey@....com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	Kan Liang <kan.liang@...ux.intel.com>,
	Yanteng Si <siyanteng@...ngson.cn>,
	Ravi Bangoria <ravi.bangoria@....com>,
	Sun Haiyong <sunhaiyong@...ngson.cn>,
	Changbin Du <changbin.du@...wei.com>,
	Masami Hiramatsu <mhiramat@...nel.org>,
	zhaimingbing <zhaimingbing@...s.chinamobile.com>,
	Paran Lee <p4ranlee@...il.com>, Li Dong <lidong@...o.com>,
	elfring@...rs.sourceforge.net, Andi Kleen <ak@...ux.intel.com>,
	Markus Elfring <Markus.Elfring@....de>,
	Chengen Du <chengen.du@...onical.com>,
	linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org,
	bpf@...r.kernel.org
Subject: Re: [PATCH v2 01/13] perf dso: Reorder variables to save space in
 struct dso

On Thu, Mar 21, 2024 at 09:02:48AM -0700, Ian Rogers wrote:
> Save 40 bytes and move from 8 to 7 cache lines. Make variable dwfl
> dependent on being a powerpc build. Squeeze bits of int/enum types
> when appropriate. Remove holes/padding by reordering variables.

Thanks, applied.

- Arnaldo
 
> Before:
> ```
> struct dso {
>         struct mutex               lock;                 /*     0    40 */
>         struct list_head           node;                 /*    40    16 */
>         struct rb_node             rb_node __attribute__((__aligned__(8))); /*    56    24 */
>         /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
>         struct rb_root *           root;                 /*    80     8 */
>         struct rb_root_cached      symbols;              /*    88    16 */
>         struct symbol * *          symbol_names;         /*   104     8 */
>         size_t                     symbol_names_len;     /*   112     8 */
>         struct rb_root_cached      inlined_nodes;        /*   120    16 */
>         /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
>         struct rb_root_cached      srclines;             /*   136    16 */
>         struct {
>                 u64                addr;                 /*   152     8 */
>                 struct symbol *    symbol;               /*   160     8 */
>         } last_find_result;                              /*   152    16 */
>         void *                     a2l;                  /*   168     8 */
>         char *                     symsrc_filename;      /*   176     8 */
>         unsigned int               a2l_fails;            /*   184     4 */
>         enum dso_space_type        kernel;               /*   188     4 */
>         /* --- cacheline 3 boundary (192 bytes) --- */
>         _Bool                      is_kmod;              /*   192     1 */
> 
>         /* XXX 3 bytes hole, try to pack */
> 
>         enum dso_swap_type         needs_swap;           /*   196     4 */
>         enum dso_binary_type       symtab_type;          /*   200     4 */
>         enum dso_binary_type       binary_type;          /*   204     4 */
>         enum dso_load_errno        load_errno;           /*   208     4 */
>         u8                         adjust_symbols:1;     /*   212: 0  1 */
>         u8                         has_build_id:1;       /*   212: 1  1 */
>         u8                         header_build_id:1;    /*   212: 2  1 */
>         u8                         has_srcline:1;        /*   212: 3  1 */
>         u8                         hit:1;                /*   212: 4  1 */
>         u8                         annotate_warned:1;    /*   212: 5  1 */
>         u8                         auxtrace_warned:1;    /*   212: 6  1 */
>         u8                         short_name_allocated:1; /*   212: 7  1 */
>         u8                         long_name_allocated:1; /*   213: 0  1 */
>         u8                         is_64_bit:1;          /*   213: 1  1 */
> 
>         /* XXX 6 bits hole, try to pack */
> 
>         _Bool                      sorted_by_name;       /*   214     1 */
>         _Bool                      loaded;               /*   215     1 */
>         u8                         rel;                  /*   216     1 */
> 
>         /* XXX 7 bytes hole, try to pack */
> 
>         struct build_id            bid;                  /*   224    32 */
>         /* --- cacheline 4 boundary (256 bytes) --- */
>         u64                        text_offset;          /*   256     8 */
>         u64                        text_end;             /*   264     8 */
>         const char  *              short_name;           /*   272     8 */
>         const char  *              long_name;            /*   280     8 */
>         u16                        long_name_len;        /*   288     2 */
>         u16                        short_name_len;       /*   290     2 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         void *                     dwfl;                 /*   296     8 */
>         struct auxtrace_cache *    auxtrace_cache;       /*   304     8 */
>         int                        comp;                 /*   312     4 */
> 
>         /* XXX 4 bytes hole, try to pack */
> 
>         /* --- cacheline 5 boundary (320 bytes) --- */
>         struct {
>                 struct rb_root     cache;                /*   320     8 */
>                 int                fd;                   /*   328     4 */
>                 int                status;               /*   332     4 */
>                 u32                status_seen;          /*   336     4 */
> 
>                 /* XXX 4 bytes hole, try to pack */
> 
>                 u64                file_size;            /*   344     8 */
>                 struct list_head   open_entry;           /*   352    16 */
>                 u64                elf_base_addr;        /*   368     8 */
>                 u64                debug_frame_offset;   /*   376     8 */
>                 /* --- cacheline 6 boundary (384 bytes) --- */
>                 u64                eh_frame_hdr_addr;    /*   384     8 */
>                 u64                eh_frame_hdr_offset;  /*   392     8 */
>         } data;                                          /*   320    80 */
>         struct {
>                 u32                id;                   /*   400     4 */
>                 u32                sub_id;               /*   404     4 */
>                 struct perf_env *  env;                  /*   408     8 */
>         } bpf_prog;                                      /*   400    16 */
>         union {
>                 void *             priv;                 /*   416     8 */
>                 u64                db_id;                /*   416     8 */
>         };                                               /*   416     8 */
>         struct nsinfo *            nsinfo;               /*   424     8 */
>         struct dso_id              id;                   /*   432    24 */
>         /* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */
>         refcount_t                 refcnt;               /*   456     4 */
>         char                       name[];               /*   460     0 */
> 
>         /* size: 464, cachelines: 8, members: 49 */
>         /* sum members: 440, holes: 4, sum holes: 18 */
>         /* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 6 bits */
>         /* padding: 4 */
>         /* forced alignments: 1 */
>         /* last cacheline: 16 bytes */
> } __attribute__((__aligned__(8)));
> ```
> 
> After:
> ```
> struct dso {
>         struct mutex               lock;                 /*     0    40 */
>         struct list_head           node;                 /*    40    16 */
>         struct rb_node             rb_node __attribute__((__aligned__(8))); /*    56    24 */
>         /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
>         struct rb_root *           root;                 /*    80     8 */
>         struct rb_root_cached      symbols;              /*    88    16 */
>         struct symbol * *          symbol_names;         /*   104     8 */
>         size_t                     symbol_names_len;     /*   112     8 */
>         struct rb_root_cached      inlined_nodes;        /*   120    16 */
>         /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
>         struct rb_root_cached      srclines;             /*   136    16 */
>         struct {
>                 u64                addr;                 /*   152     8 */
>                 struct symbol *    symbol;               /*   160     8 */
>         } last_find_result;                              /*   152    16 */
>         struct build_id            bid;                  /*   168    32 */
>         /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
>         u64                        text_offset;          /*   200     8 */
>         u64                        text_end;             /*   208     8 */
>         const char  *              short_name;           /*   216     8 */
>         const char  *              long_name;            /*   224     8 */
>         void *                     a2l;                  /*   232     8 */
>         char *                     symsrc_filename;      /*   240     8 */
>         struct nsinfo *            nsinfo;               /*   248     8 */
>         /* --- cacheline 4 boundary (256 bytes) --- */
>         struct auxtrace_cache *    auxtrace_cache;       /*   256     8 */
>         union {
>                 void *             priv;                 /*   264     8 */
>                 u64                db_id;                /*   264     8 */
>         };                                               /*   264     8 */
>         struct {
>                 struct perf_env *  env;                  /*   272     8 */
>                 u32                id;                   /*   280     4 */
>                 u32                sub_id;               /*   284     4 */
>         } bpf_prog;                                      /*   272    16 */
>         struct {
>                 struct rb_root     cache;                /*   288     8 */
>                 struct list_head   open_entry;           /*   296    16 */
>                 u64                file_size;            /*   312     8 */
>                 /* --- cacheline 5 boundary (320 bytes) --- */
>                 u64                elf_base_addr;        /*   320     8 */
>                 u64                debug_frame_offset;   /*   328     8 */
>                 u64                eh_frame_hdr_addr;    /*   336     8 */
>                 u64                eh_frame_hdr_offset;  /*   344     8 */
>                 int                fd;                   /*   352     4 */
>                 int                status;               /*   356     4 */
>                 u32                status_seen;          /*   360     4 */
>         } data;                                          /*   288    80 */
> 
>         /* XXX last struct has 4 bytes of padding */
> 
>         struct dso_id              id;                   /*   368    24 */
>         /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
>         unsigned int               a2l_fails;            /*   392     4 */
>         int                        comp;                 /*   396     4 */
>         refcount_t                 refcnt;               /*   400     4 */
>         enum dso_load_errno        load_errno;           /*   404     4 */
>         u16                        long_name_len;        /*   408     2 */
>         u16                        short_name_len;       /*   410     2 */
>         enum dso_binary_type       symtab_type:8;        /*   412: 0  4 */
>         enum dso_binary_type       binary_type:8;        /*   412: 8  4 */
>         enum dso_space_type        kernel:2;             /*   412:16  4 */
>         enum dso_swap_type         needs_swap:2;         /*   412:18  4 */
> 
>         /* Bitfield combined with next fields */
> 
>         _Bool                      is_kmod:1;            /*   414: 4  1 */
>         u8                         adjust_symbols:1;     /*   414: 5  1 */
>         u8                         has_build_id:1;       /*   414: 6  1 */
>         u8                         header_build_id:1;    /*   414: 7  1 */
>         u8                         has_srcline:1;        /*   415: 0  1 */
>         u8                         hit:1;                /*   415: 1  1 */
>         u8                         annotate_warned:1;    /*   415: 2  1 */
>         u8                         auxtrace_warned:1;    /*   415: 3  1 */
>         u8                         short_name_allocated:1; /*   415: 4  1 */
>         u8                         long_name_allocated:1; /*   415: 5  1 */
>         u8                         is_64_bit:1;          /*   415: 6  1 */
> 
>         /* XXX 1 bit hole, try to pack */
> 
>         _Bool                      sorted_by_name;       /*   416     1 */
>         _Bool                      loaded;               /*   417     1 */
>         u8                         rel;                  /*   418     1 */
>         char                       name[];               /*   419     0 */
> 
>         /* size: 424, cachelines: 7, members: 48 */
>         /* sum members: 415 */
>         /* sum bitfield members: 31 bits, bit holes: 1, sum bit holes: 1 bits */
>         /* padding: 5 */
>         /* paddings: 1, sum paddings: 4 */
>         /* forced alignments: 1 */
>         /* last cacheline: 40 bytes */
> } __attribute__((__aligned__(8)));
> ```
> 
> Signed-off-by: Ian Rogers <irogers@...gle.com>
> ---
>  tools/perf/util/dso.h | 84 +++++++++++++++++++++----------------------
>  1 file changed, 42 insertions(+), 42 deletions(-)
> 
> diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
> index 2cdcd1e2ef8b..17dab230a2ca 100644
> --- a/tools/perf/util/dso.h
> +++ b/tools/perf/util/dso.h
> @@ -161,66 +161,66 @@ struct dso {
>  		u64		addr;
>  		struct symbol	*symbol;
>  	} last_find_result;
> -	void		 *a2l;
> -	char		 *symsrc_filename;
> -	unsigned int	 a2l_fails;
> -	enum dso_space_type	kernel;
> -	bool			is_kmod;
> -	enum dso_swap_type	needs_swap;
> -	enum dso_binary_type	symtab_type;
> -	enum dso_binary_type	binary_type;
> -	enum dso_load_errno	load_errno;
> -	u8		 adjust_symbols:1;
> -	u8		 has_build_id:1;
> -	u8		 header_build_id:1;
> -	u8		 has_srcline:1;
> -	u8		 hit:1;
> -	u8		 annotate_warned:1;
> -	u8		 auxtrace_warned:1;
> -	u8		 short_name_allocated:1;
> -	u8		 long_name_allocated:1;
> -	u8		 is_64_bit:1;
> -	bool		 sorted_by_name;
> -	bool		 loaded;
> -	u8		 rel;
>  	struct build_id	 bid;
>  	u64		 text_offset;
>  	u64		 text_end;
>  	const char	 *short_name;
>  	const char	 *long_name;
> -	u16		 long_name_len;
> -	u16		 short_name_len;
> +	void		 *a2l;
> +	char		 *symsrc_filename;
> +#if defined(__powerpc__)
>  	void		*dwfl;			/* DWARF debug info */
> +#endif
> +	struct nsinfo	*nsinfo;
>  	struct auxtrace_cache *auxtrace_cache;
> -	int		 comp;
> -
> +	union { /* Tool specific area */
> +		void	 *priv;
> +		u64	 db_id;
> +	};
> +	/* bpf prog information */
> +	struct {
> +		struct perf_env	*env;
> +		u32		id;
> +		u32		sub_id;
> +	} bpf_prog;
>  	/* dso data file */
>  	struct {
>  		struct rb_root	 cache;
> -		int		 fd;
> -		int		 status;
> -		u32		 status_seen;
> -		u64		 file_size;
>  		struct list_head open_entry;
> +		u64		 file_size;
>  		u64		 elf_base_addr;
>  		u64		 debug_frame_offset;
>  		u64		 eh_frame_hdr_addr;
>  		u64		 eh_frame_hdr_offset;
> +		int		 fd;
> +		int		 status;
> +		u32		 status_seen;
>  	} data;
> -	/* bpf prog information */
> -	struct {
> -		u32		id;
> -		u32		sub_id;
> -		struct perf_env	*env;
> -	} bpf_prog;
> -
> -	union { /* Tool specific area */
> -		void	 *priv;
> -		u64	 db_id;
> -	};
> -	struct nsinfo	*nsinfo;
>  	struct dso_id	 id;
> +	unsigned int	 a2l_fails;
> +	int		 comp;
>  	refcount_t	 refcnt;
> +	enum dso_load_errno	load_errno;
> +	u16		 long_name_len;
> +	u16		 short_name_len;
> +	enum dso_binary_type	symtab_type:8;
> +	enum dso_binary_type	binary_type:8;
> +	enum dso_space_type	kernel:2;
> +	enum dso_swap_type	needs_swap:2;
> +	bool			is_kmod:1;
> +	u8		 adjust_symbols:1;
> +	u8		 has_build_id:1;
> +	u8		 header_build_id:1;
> +	u8		 has_srcline:1;
> +	u8		 hit:1;
> +	u8		 annotate_warned:1;
> +	u8		 auxtrace_warned:1;
> +	u8		 short_name_allocated:1;
> +	u8		 long_name_allocated:1;
> +	u8		 is_64_bit:1;
> +	bool		 sorted_by_name;
> +	bool		 loaded;
> +	u8		 rel;
>  	char		 name[];
>  };
>  
> -- 
> 2.44.0.396.g6e790dbe36-goog
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ