lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250613134943.3186517-8-kan.liang@linux.intel.com>
Date: Fri, 13 Jun 2025 06:49:38 -0700
From: kan.liang@...ux.intel.com
To: peterz@...radead.org,
	mingo@...hat.com,
	acme@...nel.org,
	namhyung@...nel.org,
	tglx@...utronix.de,
	dave.hansen@...ux.intel.com,
	irogers@...gle.com,
	adrian.hunter@...el.com,
	jolsa@...nel.org,
	alexander.shishkin@...ux.intel.com,
	linux-kernel@...r.kernel.org
Cc: dapeng1.mi@...ux.intel.com,
	ak@...ux.intel.com,
	zide.chen@...el.com,
	Kan Liang <kan.liang@...ux.intel.com>
Subject: [RFC PATCH 07/12] perf/x86: Add YMMH in extended regs

From: Kan Liang <kan.liang@...ux.intel.com>

Support YMMH as the extended registers. It can be configured in the
sample_ext_regs_intr/user.

Only the PMU with PERF_PMU_CAP_EXTENDED_REGS2 supports the feature.
The value can be retrieved via the XSAVES.

Add sanity check in the perf_reg_validate.

Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
---
 arch/x86/events/core.c                | 26 ++++++++++++++
 arch/x86/events/perf_event.h          | 22 ++++++++++++
 arch/x86/include/asm/perf_event.h     |  1 +
 arch/x86/include/uapi/asm/perf_regs.h | 28 +++++++++++++++
 arch/x86/kernel/perf_regs.c           | 49 +++++++++++++++++++++++++--
 5 files changed, 124 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6b1c347cc17a..91039c0256b3 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -422,6 +422,14 @@ static void x86_pmu_get_ext_regs(struct x86_perf_regs *perf_regs, u64 mask)
 	xcomp_bv = xregs_xsave->header.xcomp_bv;
 	if (mask & XFEATURE_MASK_SSE && xcomp_bv & XFEATURE_SSE)
 		perf_regs->xmm_regs = (u64 *)xregs_xsave->i387.xmm_space;
+
+	xsave += FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+	/* The XSAVES instruction always uses the compacted format */
+	if (mask & XFEATURE_MASK_YMM && xcomp_bv & XFEATURE_MASK_YMM) {
+		perf_regs->ymmh_regs = xsave;
+		xsave += XSAVE_YMM_SIZE;
+	}
 }
 
 static void release_ext_regs_buffers(void)
@@ -447,6 +455,9 @@ static void reserve_ext_regs_buffers(void)
 
 	size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
 
+	if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_YMM))
+		size += XSAVE_YMM_SIZE;
+
 	/* XSAVE feature requires 64-byte alignment. */
 	size += 64;
 
@@ -712,6 +723,13 @@ int x86_pmu_hw_config(struct perf_event *event)
 			if (!(x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_XMM)))
 				return -EINVAL;
 		}
+		if (event_has_extended_regs2(event)) {
+			if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS2))
+				return -EINVAL;
+			if (x86_pmu_get_event_num_ext_regs(event, X86_EXT_REGS_YMM) &&
+			    !(x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_YMM)))
+				return -EINVAL;
+		}
 	}
 	return x86_setup_perfctr(event);
 }
@@ -1765,6 +1783,7 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
 	struct x86_perf_regs *perf_regs = container_of(regs, struct x86_perf_regs, regs);
 	u64 sample_type = event->attr.sample_type;
 	u64 mask = 0;
+	int num;
 
 	if (!(event->attr.sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)))
 		return;
@@ -1799,6 +1818,13 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
 		mask |= XFEATURE_MASK_SSE;
 	}
 
+	num = x86_pmu_get_event_num_ext_regs(event, X86_EXT_REGS_YMM);
+	if (num) {
+		perf_regs->ymmh_regs = NULL;
+		mask |= XFEATURE_MASK_YMM;
+		data->dyn_size += num * PERF_X86_EXT_REG_YMMH_SIZE * sizeof(u64);
+	}
+
 	mask &= ~ignore_mask;
 	if (mask)
 		x86_pmu_get_ext_regs(perf_regs, mask);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b48f4215f37c..911916bc8e36 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -689,6 +689,7 @@ enum {
 
 enum {
 	X86_EXT_REGS_XMM = 0,
+	X86_EXT_REGS_YMM,
 };
 
 #define PERF_PEBS_DATA_SOURCE_MAX	0x100
@@ -1319,6 +1320,27 @@ static inline u64 x86_pmu_get_event_config(struct perf_event *event)
 	return event->attr.config & hybrid(event->pmu, config_mask);
 }
 
+static inline int get_num_ext_regs(u64 *ext_regs, unsigned int type)
+{
+	u64 mask;
+
+	switch (type) {
+	case X86_EXT_REGS_YMM:
+		mask = GENMASK_ULL(PERF_REG_X86_YMMH15, PERF_REG_X86_YMMH0);
+		return hweight64(ext_regs[0] & mask);
+	default:
+		return 0;
+	}
+	return 0;
+}
+
+static inline int x86_pmu_get_event_num_ext_regs(struct perf_event *event,
+						 unsigned int type)
+{
+	return get_num_ext_regs(event->attr.sample_ext_regs_intr, type) +
+	       get_num_ext_regs(event->attr.sample_ext_regs_user, type);
+}
+
 extern struct event_constraint emptyconstraint;
 
 extern struct event_constraint unconstrained;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 70d1d94aca7e..c30571f4de26 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -593,6 +593,7 @@ struct pt_regs;
 struct x86_perf_regs {
 	struct pt_regs	regs;
 	u64		*xmm_regs;
+	u64		*ymmh_regs;
 };
 
 extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index 7c9d2bb3833b..f37644513e33 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -55,4 +55,32 @@ enum perf_event_x86_regs {
 
 #define PERF_REG_EXTENDED_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
 
+enum perf_event_x86_ext_regs {
+	/* YMMH Registers */
+	PERF_REG_X86_YMMH0	= 0,
+	PERF_REG_X86_YMMH1,
+	PERF_REG_X86_YMMH2,
+	PERF_REG_X86_YMMH3,
+	PERF_REG_X86_YMMH4,
+	PERF_REG_X86_YMMH5,
+	PERF_REG_X86_YMMH6,
+	PERF_REG_X86_YMMH7,
+	PERF_REG_X86_YMMH8,
+	PERF_REG_X86_YMMH9,
+	PERF_REG_X86_YMMH10,
+	PERF_REG_X86_YMMH11,
+	PERF_REG_X86_YMMH12,
+	PERF_REG_X86_YMMH13,
+	PERF_REG_X86_YMMH14,
+	PERF_REG_X86_YMMH15,
+
+	PERF_REG_X86_EXT_REGS_MAX = PERF_REG_X86_YMMH15,
+};
+
+enum perf_event_x86_ext_reg_size {
+	PERF_X86_EXT_REG_YMMH_SIZE	= 2,
+
+	/* max of PERF_REG_X86_XXX_SIZE */
+	PERF_X86_EXT_REG_SIZE_MAX	= PERF_X86_EXT_REG_YMMH_SIZE,
+};
 #endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index b9d5106afc26..f12ef60a1a8a 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -57,10 +57,46 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 #endif
 };
 
+static_assert(PERF_REG_X86_EXT_REGS_MAX < PERF_ATTR_EXT_REGS_SIZE * 64);
+static_assert(PERF_X86_EXT_REG_SIZE_MAX <= PERF_EXT_REGS_SIZE_MAX);
+
+static inline u64 __perf_ext_reg_value(u64 *ext, int *ext_size,
+				       int idx, u64 *regs, int size)
+{
+	if (!regs)
+		return 0;
+	memcpy(ext, &regs[idx * size], sizeof(u64) * size);
+	*ext_size = size;
+	return ext[0];
+}
+
+static u64 perf_ext_reg_value(struct pt_regs *regs, int idx,
+			      u64 *ext, int *ext_size)
+{
+	struct x86_perf_regs *perf_regs;
+
+	perf_regs = container_of(regs, struct x86_perf_regs, regs);
+	switch (idx) {
+		case PERF_REG_X86_YMMH0 ... PERF_REG_X86_YMMH15:
+			return __perf_ext_reg_value(ext, ext_size,
+						    idx - PERF_REG_X86_YMMH0,
+						    perf_regs->ymmh_regs,
+						    PERF_X86_EXT_REG_YMMH_SIZE);
+		default:
+			WARN_ON_ONCE(1);
+			*ext_size = 0;
+			break;
+	}
+	return 0;
+}
+
 u64 perf_reg_value(struct pt_regs *regs, int idx, u64 *ext, int *ext_size)
 {
 	struct x86_perf_regs *perf_regs;
 
+	if (ext && ext_size)
+		return perf_ext_reg_value(regs, idx, ext, ext_size);
+
 	if (WARN_ON_ONCE(ext || ext_size))
 		return 0;
 
@@ -117,13 +153,22 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 		       (1ULL << PERF_REG_X86_FS) | \
 		       (1ULL << PERF_REG_X86_GS))
 
+static_assert (PERF_ATTR_EXT_REGS_SIZE == 2);
+
 int perf_reg_validate(u64 mask, u64 *mask_ext)
 {
-	if (mask_ext)
+	if (!mask && !mask_ext)
 		return -EINVAL;
-	if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
+	if (mask && (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
 		return -EINVAL;
 
+	if (mask_ext) {
+		int h = mask_ext[1] ? fls64(mask_ext[1]) + 64 : fls64(mask_ext[0]);
+
+		if (h > PERF_REG_X86_EXT_REGS_MAX + 1)
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
-- 
2.38.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ