lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250123140721.2496639-20-dapeng1.mi@linux.intel.com>
Date: Thu, 23 Jan 2025 14:07:20 +0000
From: Dapeng Mi <dapeng1.mi@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
	Ingo Molnar <mingo@...hat.com>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Namhyung Kim <namhyung@...nel.org>,
	Ian Rogers <irogers@...gle.com>,
	Adrian Hunter <adrian.hunter@...el.com>,
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
	Kan Liang <kan.liang@...ux.intel.com>,
	Andi Kleen <ak@...ux.intel.com>,
	Eranian Stephane <eranian@...gle.com>
Cc: linux-kernel@...r.kernel.org,
	linux-perf-users@...r.kernel.org,
	Dapeng Mi <dapeng1.mi@...el.com>,
	Dapeng Mi <dapeng1.mi@...ux.intel.com>
Subject: [PATCH 19/20] perf tools: Support to capture more vector registers (x86/Intel part)

Intel architectural PEBS supports to capture more vector registers like
OPMASK/YMM/ZMM registers besides already supported XMM registers.

This patch adds Intel specific support to capture these new vector
registers for perf tools.

Besides, add SSP in perf regs. SSP is stored in general register group
and is selected by sample_regs_intr.

Co-developed-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
---
 tools/arch/x86/include/uapi/asm/perf_regs.h   | 83 +++++++++++++++-
 tools/perf/arch/x86/util/perf_regs.c          | 99 +++++++++++++++++++
 .../perf/util/perf-regs-arch/perf_regs_x86.c  | 88 +++++++++++++++++
 3 files changed, 269 insertions(+), 1 deletion(-)

diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h
index 158e353070c3..f723e8bf9963 100644
--- a/tools/arch/x86/include/uapi/asm/perf_regs.h
+++ b/tools/arch/x86/include/uapi/asm/perf_regs.h
@@ -33,7 +33,7 @@ enum perf_event_x86_regs {
 	PERF_REG_X86_64_MAX = PERF_REG_X86_SSP + 1,
 	PERF_REG_INTEL_PT_MAX = PERF_REG_X86_R15 + 1,
 
-	/* These all need two bits set because they are 128bit */
+	/* These all need two bits set because they are 128 bits */
 	PERF_REG_X86_XMM0  = 32,
 	PERF_REG_X86_XMM1  = 34,
 	PERF_REG_X86_XMM2  = 36,
@@ -53,6 +53,87 @@ enum perf_event_x86_regs {
 
 	/* These include both GPRs and XMMX registers */
 	PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
+
+	/*
+	 * YMM upper bits need two bits set because they are 128 bits.
+	 * PERF_REG_X86_YMMH0 = 64
+	 */
+	PERF_REG_X86_YMMH0	= PERF_REG_X86_XMM_MAX,
+	PERF_REG_X86_YMMH1	= PERF_REG_X86_YMMH0 + 2,
+	PERF_REG_X86_YMMH2	= PERF_REG_X86_YMMH1 + 2,
+	PERF_REG_X86_YMMH3	= PERF_REG_X86_YMMH2 + 2,
+	PERF_REG_X86_YMMH4	= PERF_REG_X86_YMMH3 + 2,
+	PERF_REG_X86_YMMH5	= PERF_REG_X86_YMMH4 + 2,
+	PERF_REG_X86_YMMH6	= PERF_REG_X86_YMMH5 + 2,
+	PERF_REG_X86_YMMH7	= PERF_REG_X86_YMMH6 + 2,
+	PERF_REG_X86_YMMH8	= PERF_REG_X86_YMMH7 + 2,
+	PERF_REG_X86_YMMH9	= PERF_REG_X86_YMMH8 + 2,
+	PERF_REG_X86_YMMH10	= PERF_REG_X86_YMMH9 + 2,
+	PERF_REG_X86_YMMH11	= PERF_REG_X86_YMMH10 + 2,
+	PERF_REG_X86_YMMH12	= PERF_REG_X86_YMMH11 + 2,
+	PERF_REG_X86_YMMH13	= PERF_REG_X86_YMMH12 + 2,
+	PERF_REG_X86_YMMH14	= PERF_REG_X86_YMMH13 + 2,
+	PERF_REG_X86_YMMH15	= PERF_REG_X86_YMMH14 + 2,
+	PERF_REG_X86_YMMH_MAX	= PERF_REG_X86_YMMH15 + 2,
+
+	/*
+	 * ZMM0-15 upper bits need four bits set because they are 256 bits
+	 * PERF_REG_X86_ZMMH0 = 96
+	 */
+	PERF_REG_X86_ZMMH0	= PERF_REG_X86_YMMH_MAX,
+	PERF_REG_X86_ZMMH1	= PERF_REG_X86_ZMMH0 + 4,
+	PERF_REG_X86_ZMMH2	= PERF_REG_X86_ZMMH1 + 4,
+	PERF_REG_X86_ZMMH3	= PERF_REG_X86_ZMMH2 + 4,
+	PERF_REG_X86_ZMMH4	= PERF_REG_X86_ZMMH3 + 4,
+	PERF_REG_X86_ZMMH5	= PERF_REG_X86_ZMMH4 + 4,
+	PERF_REG_X86_ZMMH6	= PERF_REG_X86_ZMMH5 + 4,
+	PERF_REG_X86_ZMMH7	= PERF_REG_X86_ZMMH6 + 4,
+	PERF_REG_X86_ZMMH8	= PERF_REG_X86_ZMMH7 + 4,
+	PERF_REG_X86_ZMMH9	= PERF_REG_X86_ZMMH8 + 4,
+	PERF_REG_X86_ZMMH10	= PERF_REG_X86_ZMMH9 + 4,
+	PERF_REG_X86_ZMMH11	= PERF_REG_X86_ZMMH10 + 4,
+	PERF_REG_X86_ZMMH12	= PERF_REG_X86_ZMMH11 + 4,
+	PERF_REG_X86_ZMMH13	= PERF_REG_X86_ZMMH12 + 4,
+	PERF_REG_X86_ZMMH14	= PERF_REG_X86_ZMMH13 + 4,
+	PERF_REG_X86_ZMMH15	= PERF_REG_X86_ZMMH14 + 4,
+	PERF_REG_X86_ZMMH_MAX	= PERF_REG_X86_ZMMH15 + 4,
+
+	/*
+	 * ZMM16-31 need eight bits set because they are 512 bits
+	 * PERF_REG_X86_ZMM16 = 160
+	 */
+	PERF_REG_X86_ZMM16	= PERF_REG_X86_ZMMH_MAX,
+	PERF_REG_X86_ZMM17	= PERF_REG_X86_ZMM16 + 8,
+	PERF_REG_X86_ZMM18	= PERF_REG_X86_ZMM17 + 8,
+	PERF_REG_X86_ZMM19	= PERF_REG_X86_ZMM18 + 8,
+	PERF_REG_X86_ZMM20	= PERF_REG_X86_ZMM19 + 8,
+	PERF_REG_X86_ZMM21	= PERF_REG_X86_ZMM20 + 8,
+	PERF_REG_X86_ZMM22	= PERF_REG_X86_ZMM21 + 8,
+	PERF_REG_X86_ZMM23	= PERF_REG_X86_ZMM22 + 8,
+	PERF_REG_X86_ZMM24	= PERF_REG_X86_ZMM23 + 8,
+	PERF_REG_X86_ZMM25	= PERF_REG_X86_ZMM24 + 8,
+	PERF_REG_X86_ZMM26	= PERF_REG_X86_ZMM25 + 8,
+	PERF_REG_X86_ZMM27	= PERF_REG_X86_ZMM26 + 8,
+	PERF_REG_X86_ZMM28	= PERF_REG_X86_ZMM27 + 8,
+	PERF_REG_X86_ZMM29	= PERF_REG_X86_ZMM28 + 8,
+	PERF_REG_X86_ZMM30	= PERF_REG_X86_ZMM29 + 8,
+	PERF_REG_X86_ZMM31	= PERF_REG_X86_ZMM30 + 8,
+	PERF_REG_X86_ZMM_MAX	= PERF_REG_X86_ZMM31 + 8,
+
+	/*
+	 * OPMASK Registers
+	 * PERF_REG_X86_OPMASK0 = 288
+	 */
+	PERF_REG_X86_OPMASK0	= PERF_REG_X86_ZMM_MAX,
+	PERF_REG_X86_OPMASK1	= PERF_REG_X86_OPMASK0 + 1,
+	PERF_REG_X86_OPMASK2	= PERF_REG_X86_OPMASK1 + 1,
+	PERF_REG_X86_OPMASK3	= PERF_REG_X86_OPMASK2 + 1,
+	PERF_REG_X86_OPMASK4	= PERF_REG_X86_OPMASK3 + 1,
+	PERF_REG_X86_OPMASK5	= PERF_REG_X86_OPMASK4 + 1,
+	PERF_REG_X86_OPMASK6	= PERF_REG_X86_OPMASK5 + 1,
+	PERF_REG_X86_OPMASK7	= PERF_REG_X86_OPMASK6 + 1,
+
+	PERF_REG_X86_VEC_MAX	= PERF_REG_X86_OPMASK7 + 1,
 };
 
 #define PERF_REG_EXTENDED_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 52f08498d005..e233e6fe2c72 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -54,6 +54,67 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG2(XMM13, PERF_REG_X86_XMM13),
 	SMPL_REG2(XMM14, PERF_REG_X86_XMM14),
 	SMPL_REG2(XMM15, PERF_REG_X86_XMM15),
+
+	SMPL_REG2_EXT(YMMH0, PERF_REG_X86_YMMH0),
+	SMPL_REG2_EXT(YMMH1, PERF_REG_X86_YMMH1),
+	SMPL_REG2_EXT(YMMH2, PERF_REG_X86_YMMH2),
+	SMPL_REG2_EXT(YMMH3, PERF_REG_X86_YMMH3),
+	SMPL_REG2_EXT(YMMH4, PERF_REG_X86_YMMH4),
+	SMPL_REG2_EXT(YMMH5, PERF_REG_X86_YMMH5),
+	SMPL_REG2_EXT(YMMH6, PERF_REG_X86_YMMH6),
+	SMPL_REG2_EXT(YMMH7, PERF_REG_X86_YMMH7),
+	SMPL_REG2_EXT(YMMH8, PERF_REG_X86_YMMH8),
+	SMPL_REG2_EXT(YMMH9, PERF_REG_X86_YMMH9),
+	SMPL_REG2_EXT(YMMH10, PERF_REG_X86_YMMH10),
+	SMPL_REG2_EXT(YMMH11, PERF_REG_X86_YMMH11),
+	SMPL_REG2_EXT(YMMH12, PERF_REG_X86_YMMH12),
+	SMPL_REG2_EXT(YMMH13, PERF_REG_X86_YMMH13),
+	SMPL_REG2_EXT(YMMH14, PERF_REG_X86_YMMH14),
+	SMPL_REG2_EXT(YMMH15, PERF_REG_X86_YMMH15),
+
+	SMPL_REG4_EXT(ZMMH0, PERF_REG_X86_ZMMH0),
+	SMPL_REG4_EXT(ZMMH1, PERF_REG_X86_ZMMH1),
+	SMPL_REG4_EXT(ZMMH2, PERF_REG_X86_ZMMH2),
+	SMPL_REG4_EXT(ZMMH3, PERF_REG_X86_ZMMH3),
+	SMPL_REG4_EXT(ZMMH4, PERF_REG_X86_ZMMH4),
+	SMPL_REG4_EXT(ZMMH5, PERF_REG_X86_ZMMH5),
+	SMPL_REG4_EXT(ZMMH6, PERF_REG_X86_ZMMH6),
+	SMPL_REG4_EXT(ZMMH7, PERF_REG_X86_ZMMH7),
+	SMPL_REG4_EXT(ZMMH8, PERF_REG_X86_ZMMH8),
+	SMPL_REG4_EXT(ZMMH9, PERF_REG_X86_ZMMH9),
+	SMPL_REG4_EXT(ZMMH10, PERF_REG_X86_ZMMH10),
+	SMPL_REG4_EXT(ZMMH11, PERF_REG_X86_ZMMH11),
+	SMPL_REG4_EXT(ZMMH12, PERF_REG_X86_ZMMH12),
+	SMPL_REG4_EXT(ZMMH13, PERF_REG_X86_ZMMH13),
+	SMPL_REG4_EXT(ZMMH14, PERF_REG_X86_ZMMH14),
+	SMPL_REG4_EXT(ZMMH15, PERF_REG_X86_ZMMH15),
+
+	SMPL_REG8_EXT(ZMM16, PERF_REG_X86_ZMM16),
+	SMPL_REG8_EXT(ZMM17, PERF_REG_X86_ZMM17),
+	SMPL_REG8_EXT(ZMM18, PERF_REG_X86_ZMM18),
+	SMPL_REG8_EXT(ZMM19, PERF_REG_X86_ZMM19),
+	SMPL_REG8_EXT(ZMM20, PERF_REG_X86_ZMM20),
+	SMPL_REG8_EXT(ZMM21, PERF_REG_X86_ZMM21),
+	SMPL_REG8_EXT(ZMM22, PERF_REG_X86_ZMM22),
+	SMPL_REG8_EXT(ZMM23, PERF_REG_X86_ZMM23),
+	SMPL_REG8_EXT(ZMM24, PERF_REG_X86_ZMM24),
+	SMPL_REG8_EXT(ZMM25, PERF_REG_X86_ZMM25),
+	SMPL_REG8_EXT(ZMM26, PERF_REG_X86_ZMM26),
+	SMPL_REG8_EXT(ZMM27, PERF_REG_X86_ZMM27),
+	SMPL_REG8_EXT(ZMM28, PERF_REG_X86_ZMM28),
+	SMPL_REG8_EXT(ZMM29, PERF_REG_X86_ZMM29),
+	SMPL_REG8_EXT(ZMM30, PERF_REG_X86_ZMM30),
+	SMPL_REG8_EXT(ZMM31, PERF_REG_X86_ZMM31),
+
+	SMPL_REG_EXT(OPMASK0, PERF_REG_X86_OPMASK0),
+	SMPL_REG_EXT(OPMASK1, PERF_REG_X86_OPMASK1),
+	SMPL_REG_EXT(OPMASK2, PERF_REG_X86_OPMASK2),
+	SMPL_REG_EXT(OPMASK3, PERF_REG_X86_OPMASK3),
+	SMPL_REG_EXT(OPMASK4, PERF_REG_X86_OPMASK4),
+	SMPL_REG_EXT(OPMASK5, PERF_REG_X86_OPMASK5),
+	SMPL_REG_EXT(OPMASK6, PERF_REG_X86_OPMASK6),
+	SMPL_REG_EXT(OPMASK7, PERF_REG_X86_OPMASK7),
+
 	SMPL_REG_END
 };
 
@@ -283,6 +344,32 @@ const struct sample_reg *arch__sample_reg_masks(void)
 	return sample_reg_masks;
 }
 
+static void check_intr_reg_ext_mask(struct perf_event_attr *attr, int idx,
+				    u64 fmask, unsigned long *mask)
+{
+	u64 src_mask[PERF_NUM_INTR_REGS] = { 0 };
+	int fd;
+
+	attr->sample_regs_intr = 0;
+	attr->sample_regs_intr_ext[idx] = fmask;
+	src_mask[idx + 1] = fmask;
+
+	fd = sys_perf_event_open(attr, 0, -1, -1, 0);
+	if (fd != -1) {
+		close(fd);
+		bitmap_or(mask, mask, (unsigned long *)src_mask,
+			  PERF_NUM_INTR_REGS * 64);
+	}
+}
+
+#define PERF_REG_EXTENDED_YMMH_MASK	GENMASK_ULL(31, 0)
+#define PERF_REG_EXTENDED_ZMMH_1ST_MASK	GENMASK_ULL(63, 32)
+#define PERF_REG_EXTENDED_ZMMH_2ND_MASK	GENMASK_ULL(31, 0)
+#define PERF_REG_EXTENDED_ZMM_1ST_MASK	GENMASK_ULL(63, 32)
+#define PERF_REG_EXTENDED_ZMM_2ND_MASK	GENMASK_ULL(63, 0)
+#define PERF_REG_EXTENDED_ZMM_3RD_MASK	GENMASK_ULL(31, 0)
+#define PERF_REG_EXTENDED_OPMASK_MASK	GENMASK_ULL(39, 32)
+
 void arch__intr_reg_mask(unsigned long *mask)
 {
 	struct perf_event_attr attr = {
@@ -325,6 +412,18 @@ void arch__intr_reg_mask(unsigned long *mask)
 		close(fd);
 		*(u64 *)mask |= PERF_REG_EXTENDED_MASK;
 	}
+
+	/* Check YMMH regs */
+	check_intr_reg_ext_mask(&attr, 0, PERF_REG_EXTENDED_YMMH_MASK, mask);
+	/* Check ZMMLH0-15 regs */
+	check_intr_reg_ext_mask(&attr, 0, PERF_REG_EXTENDED_ZMMH_1ST_MASK, mask);
+	check_intr_reg_ext_mask(&attr, 1, PERF_REG_EXTENDED_ZMMH_2ND_MASK, mask);
+	/* Check ZMM16-31 regs */
+	check_intr_reg_ext_mask(&attr, 1, PERF_REG_EXTENDED_ZMM_1ST_MASK, mask);
+	check_intr_reg_ext_mask(&attr, 2, PERF_REG_EXTENDED_ZMM_2ND_MASK, mask);
+	check_intr_reg_ext_mask(&attr, 3, PERF_REG_EXTENDED_ZMM_3RD_MASK, mask);
+	/* Check OPMASK regs */
+	check_intr_reg_ext_mask(&attr, 3, PERF_REG_EXTENDED_OPMASK_MASK, mask);
 }
 
 uint64_t arch__user_reg_mask(void)
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
index 9a909f02bc04..c926046ebddc 100644
--- a/tools/perf/util/perf-regs-arch/perf_regs_x86.c
+++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
@@ -78,6 +78,94 @@ const char *__perf_reg_name_x86(int id)
 	XMM(14)
 	XMM(15)
 #undef XMM
+
+#define YMMH(x)					\
+	case PERF_REG_X86_YMMH ## x:		\
+	case PERF_REG_X86_YMMH ## x + 1:	\
+		return "YMMH" #x;
+	YMMH(0)
+	YMMH(1)
+	YMMH(2)
+	YMMH(3)
+	YMMH(4)
+	YMMH(5)
+	YMMH(6)
+	YMMH(7)
+	YMMH(8)
+	YMMH(9)
+	YMMH(10)
+	YMMH(11)
+	YMMH(12)
+	YMMH(13)
+	YMMH(14)
+	YMMH(15)
+#undef YMMH
+
+#define ZMMH(x)					\
+	case PERF_REG_X86_ZMMH ## x:		\
+	case PERF_REG_X86_ZMMH ## x + 1:	\
+	case PERF_REG_X86_ZMMH ## x + 2:	\
+	case PERF_REG_X86_ZMMH ## x + 3:	\
+		return "ZMMLH" #x;
+	ZMMH(0)
+	ZMMH(1)
+	ZMMH(2)
+	ZMMH(3)
+	ZMMH(4)
+	ZMMH(5)
+	ZMMH(6)
+	ZMMH(7)
+	ZMMH(8)
+	ZMMH(9)
+	ZMMH(10)
+	ZMMH(11)
+	ZMMH(12)
+	ZMMH(13)
+	ZMMH(14)
+	ZMMH(15)
+#undef ZMMH
+
+#define ZMM(x)				\
+	case PERF_REG_X86_ZMM ## x:		\
+	case PERF_REG_X86_ZMM ## x + 1:	\
+	case PERF_REG_X86_ZMM ## x + 2:	\
+	case PERF_REG_X86_ZMM ## x + 3:	\
+	case PERF_REG_X86_ZMM ## x + 4:	\
+	case PERF_REG_X86_ZMM ## x + 5:	\
+	case PERF_REG_X86_ZMM ## x + 6:	\
+	case PERF_REG_X86_ZMM ## x + 7:	\
+		return "ZMM" #x;
+	ZMM(16)
+	ZMM(17)
+	ZMM(18)
+	ZMM(19)
+	ZMM(20)
+	ZMM(21)
+	ZMM(22)
+	ZMM(23)
+	ZMM(24)
+	ZMM(25)
+	ZMM(26)
+	ZMM(27)
+	ZMM(28)
+	ZMM(29)
+	ZMM(30)
+	ZMM(31)
+#undef ZMM
+
+#define OPMASK(x)				\
+	case PERF_REG_X86_OPMASK ## x:		\
+		return "opmask" #x;
+
+	OPMASK(0)
+	OPMASK(1)
+	OPMASK(2)
+	OPMASK(3)
+	OPMASK(4)
+	OPMASK(5)
+	OPMASK(6)
+	OPMASK(7)
+#undef OPMASK
 	default:
 		return NULL;
 	}
-- 
2.40.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ