lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <75dd56a2d467515dc354d8fc8d5acd841d63b565.1738592865.git.dvyukov@google.com>
Date: Mon,  3 Feb 2025 15:30:43 +0100
From: Dmitry Vyukov <dvyukov@...gle.com>
To: namhyung@...nel.org, irogers@...gle.com, acme@...nel.org
Cc: linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org, 
	Dmitry Vyukov <dvyukov@...gle.com>
Subject: [PATCH v3 8/8] perf hist: Shrink struct hist_entry size

Reorder the struct fields by size to reduce paddings and reduce
struct simd_flags size from 8 to 1 byte.

This reduces struct hist_entry size by 8 bytes (592->584),
and leaves a single more usable 6 byte padding hole.

Signed-off-by: Dmitry Vyukov <dvyukov@...gle.com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>
Cc: Ian Rogers <irogers@...gle.com>
Cc: linux-perf-users@...r.kernel.org
Cc: linux-kernel@...r.kernel.org

---
Pahole output before:

struct hist_entry {
	struct rb_node             rb_node_in __attribute__((__aligned__(8))); /*     0    24 */
	struct rb_node             rb_node __attribute__((__aligned__(8))); /*    24    24 */
	union {
		struct list_head   node;                 /*    48    16 */
		struct list_head   head;                 /*    48    16 */
	} pairs;                                         /*    48    16 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct he_stat             stat;                 /*    64    80 */

	/* XXX last struct has 4 bytes of padding */

	/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
	struct he_stat *           stat_acc;             /*   144     8 */
	struct map_symbol          ms;                   /*   152    24 */
	struct thread *            thread;               /*   176     8 */
	struct comm *              comm;                 /*   184     8 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	struct namespace_id        cgroup_id;            /*   192    16 */
	u64                        cgroup;               /*   208     8 */
	u64                        ip;                   /*   216     8 */
	u64                        transaction;          /*   224     8 */
	s32                        socket;               /*   232     4 */
	s32                        cpu;                  /*   236     4 */
	int                        parallelism;          /*   240     4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        code_page_size;       /*   248     8 */
	/* --- cacheline 4 boundary (256 bytes) --- */
	u64                        weight;               /*   256     8 */
	u64                        ins_lat;              /*   264     8 */
	u64                        p_stage_cyc;          /*   272     8 */
	u8                         cpumode;              /*   280     1 */
	u8                         depth;                /*   281     1 */

	/* XXX 2 bytes hole, try to pack */

	int                        mem_type_off;         /*   284     4 */
	struct simd_flags          simd_flags;           /*   288     8 */
	_Bool                      dummy;                /*   296     1 */
	_Bool                      leaf;                 /*   297     1 */
	char                       level;                /*   298     1 */

	/* XXX 1 byte hole, try to pack */

	filter_mask_t              filtered;             /*   300     2 */
	u16                        callchain_size;       /*   302     2 */
	union {
		struct hist_entry_diff diff;             /*   304   120 */
		struct {
			u16        row_offset;           /*   304     2 */
			u16        nr_rows;              /*   306     2 */
			_Bool      init_have_children;   /*   308     1 */
			_Bool      unfolded;             /*   309     1 */
			_Bool      has_children;         /*   310     1 */
			_Bool      has_no_entry;         /*   311     1 */
		};                                       /*   304     8 */
	};                                               /*   304   120 */
	/* --- cacheline 6 boundary (384 bytes) was 40 bytes ago --- */
	char *                     srcline;              /*   424     8 */
	char *                     srcfile;              /*   432     8 */
	struct symbol *            parent;               /*   440     8 */
	/* --- cacheline 7 boundary (448 bytes) --- */
	struct branch_info *       branch_info;          /*   448     8 */
	long int                   time;                 /*   456     8 */
	struct hists *             hists;                /*   464     8 */
	struct mem_info *          mem_info;             /*   472     8 */
	struct block_info *        block_info;           /*   480     8 */
	struct kvm_info *          kvm_info;             /*   488     8 */
	void *                     raw_data;             /*   496     8 */
	u32                        raw_size;             /*   504     4 */
	int                        num_res;              /*   508     4 */
	/* --- cacheline 8 boundary (512 bytes) --- */
	struct res_sample *        res_samples;          /*   512     8 */
	void *                     trace_output;         /*   520     8 */
	struct perf_hpp_list *     hpp_list;             /*   528     8 */
	struct hist_entry *        parent_he;            /*   536     8 */
	struct hist_entry_ops *    ops;                  /*   544     8 */
	struct annotated_data_type * mem_type;           /*   552     8 */
	union {
		struct {
			struct rb_root_cached hroot_in;  /*   560    16 */
			/* --- cacheline 9 boundary (576 bytes) --- */
			struct rb_root_cached hroot_out; /*   576    16 */
		};                                       /*   560    32 */
		struct rb_root     sorted_chain;         /*   560     8 */
	};                                               /*   560    32 */
	/* --- cacheline 9 boundary (576 bytes) was 16 bytes ago --- */
	struct callchain_root      callchain[] __attribute__((__aligned__(8))); /*   592     0 */

	/* size: 592, cachelines: 10, members: 49 */
	/* sum members: 585, holes: 3, sum holes: 7 */
	/* paddings: 1, sum paddings: 4 */
	/* forced alignments: 3 */
	/* last cacheline: 16 bytes */
} __attribute__((__aligned__(8)));

After:

struct hist_entry {
	struct rb_node             rb_node_in __attribute__((__aligned__(8))); /*     0    24 */
	struct rb_node             rb_node __attribute__((__aligned__(8))); /*    24    24 */
	union {
		struct list_head   node;                 /*    48    16 */
		struct list_head   head;                 /*    48    16 */
	} pairs;                                         /*    48    16 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	struct he_stat             stat;                 /*    64    80 */

	/* XXX last struct has 4 bytes of padding */

	/* --- cacheline 2 boundary (128 bytes) was 16 bytes ago --- */
	struct he_stat *           stat_acc;             /*   144     8 */
	struct map_symbol          ms;                   /*   152    24 */
	struct thread *            thread;               /*   176     8 */
	struct comm *              comm;                 /*   184     8 */
	/* --- cacheline 3 boundary (192 bytes) --- */
	struct namespace_id        cgroup_id;            /*   192    16 */
	u64                        cgroup;               /*   208     8 */
	u64                        ip;                   /*   216     8 */
	u64                        transaction;          /*   224     8 */
	u64                        code_page_size;       /*   232     8 */
	u64                        weight;               /*   240     8 */
	u64                        ins_lat;              /*   248     8 */
	/* --- cacheline 4 boundary (256 bytes) --- */
	u64                        p_stage_cyc;          /*   256     8 */
	s32                        socket;               /*   264     4 */
	s32                        cpu;                  /*   268     4 */
	int                        parallelism;          /*   272     4 */
	int                        mem_type_off;         /*   276     4 */
	u8                         cpumode;              /*   280     1 */
	u8                         depth;                /*   281     1 */
	struct simd_flags          simd_flags;           /*   282     1 */
	_Bool                      dummy;                /*   283     1 */
	_Bool                      leaf;                 /*   284     1 */
	char                       level;                /*   285     1 */
	filter_mask_t              filtered;             /*   286     2 */
	u16                        callchain_size;       /*   288     2 */

	/* XXX 6 bytes hole, try to pack */

	union {
		struct hist_entry_diff diff;             /*   296   120 */
		struct {
			u16        row_offset;           /*   296     2 */
			u16        nr_rows;              /*   298     2 */
			_Bool      init_have_children;   /*   300     1 */
			_Bool      unfolded;             /*   301     1 */
			_Bool      has_children;         /*   302     1 */
			_Bool      has_no_entry;         /*   303     1 */
		};                                       /*   296     8 */
	};                                               /*   296   120 */
	/* --- cacheline 6 boundary (384 bytes) was 32 bytes ago --- */
	char *                     srcline;              /*   416     8 */
	char *                     srcfile;              /*   424     8 */
	struct symbol *            parent;               /*   432     8 */
	struct branch_info *       branch_info;          /*   440     8 */
	/* --- cacheline 7 boundary (448 bytes) --- */
	long int                   time;                 /*   448     8 */
	struct hists *             hists;                /*   456     8 */
	struct mem_info *          mem_info;             /*   464     8 */
	struct block_info *        block_info;           /*   472     8 */
	struct kvm_info *          kvm_info;             /*   480     8 */
	void *                     raw_data;             /*   488     8 */
	u32                        raw_size;             /*   496     4 */
	int                        num_res;              /*   500     4 */
	struct res_sample *        res_samples;          /*   504     8 */
	/* --- cacheline 8 boundary (512 bytes) --- */
	void *                     trace_output;         /*   512     8 */
	struct perf_hpp_list *     hpp_list;             /*   520     8 */
	struct hist_entry *        parent_he;            /*   528     8 */
	struct hist_entry_ops *    ops;                  /*   536     8 */
	struct annotated_data_type * mem_type;           /*   544     8 */
	union {
		struct {
			struct rb_root_cached hroot_in;  /*   552    16 */
			struct rb_root_cached hroot_out; /*   568    16 */
		};                                       /*   552    32 */
		struct rb_root     sorted_chain;         /*   552     8 */
	};                                               /*   552    32 */
	/* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */
	struct callchain_root      callchain[] __attribute__((__aligned__(8))); /*   584     0 */

	/* size: 584, cachelines: 10, members: 49 */
	/* sum members: 578, holes: 1, sum holes: 6 */
	/* paddings: 1, sum paddings: 4 */
	/* forced alignments: 3 */
	/* last cacheline: 8 bytes */
} __attribute__((__aligned__(8)));
---
 tools/perf/util/hist.h   | 8 ++++----
 tools/perf/util/sample.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 29d4c7a3d1747..317d06cca8b88 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -239,16 +239,16 @@ struct hist_entry {
 	u64			cgroup;
 	u64			ip;
 	u64			transaction;
-	s32			socket;
-	s32			cpu;
-	int			parallelism;
 	u64			code_page_size;
 	u64			weight;
 	u64			ins_lat;
 	u64			p_stage_cyc;
+	s32			socket;
+	s32			cpu;
+	int			parallelism;
+	int			mem_type_off;
 	u8			cpumode;
 	u8			depth;
-	int			mem_type_off;
 	struct simd_flags	simd_flags;
 
 	/* We are added by hists__add_dummy_entry. */
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 70b2c3135555e..ab756d61cbcd6 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -67,7 +67,7 @@ struct aux_sample {
 };
 
 struct simd_flags {
-	u64	arch:1,	/* architecture (isa) */
+	u8	arch:1,	/* architecture (isa) */
 		pred:2;	/* predication */
 };
 
-- 
2.48.1.362.g079036d154-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ