[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250613134943.3186517-8-kan.liang@linux.intel.com>
Date: Fri, 13 Jun 2025 06:49:38 -0700
From: kan.liang@...ux.intel.com
To: peterz@...radead.org,
mingo@...hat.com,
acme@...nel.org,
namhyung@...nel.org,
tglx@...utronix.de,
dave.hansen@...ux.intel.com,
irogers@...gle.com,
adrian.hunter@...el.com,
jolsa@...nel.org,
alexander.shishkin@...ux.intel.com,
linux-kernel@...r.kernel.org
Cc: dapeng1.mi@...ux.intel.com,
ak@...ux.intel.com,
zide.chen@...el.com,
Kan Liang <kan.liang@...ux.intel.com>
Subject: [RFC PATCH 07/12] perf/x86: Add YMMH in extended regs
From: Kan Liang <kan.liang@...ux.intel.com>
Support YMMH as the extended registers. It can be configured in the
sample_ext_regs_intr/user.
Only the PMU with PERF_PMU_CAP_EXTENDED_REGS2 supports the feature.
The value can be retrieved via the XSAVES.
Add sanity check in the perf_reg_validate.
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
---
arch/x86/events/core.c | 26 ++++++++++++++
arch/x86/events/perf_event.h | 22 ++++++++++++
arch/x86/include/asm/perf_event.h | 1 +
arch/x86/include/uapi/asm/perf_regs.h | 28 +++++++++++++++
arch/x86/kernel/perf_regs.c | 49 +++++++++++++++++++++++++--
5 files changed, 124 insertions(+), 2 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6b1c347cc17a..91039c0256b3 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -422,6 +422,14 @@ static void x86_pmu_get_ext_regs(struct x86_perf_regs *perf_regs, u64 mask)
xcomp_bv = xregs_xsave->header.xcomp_bv;
if (mask & XFEATURE_MASK_SSE && xcomp_bv & XFEATURE_SSE)
perf_regs->xmm_regs = (u64 *)xregs_xsave->i387.xmm_space;
+
+ xsave += FXSAVE_SIZE + XSAVE_HDR_SIZE;
+
+ /* The XSAVES instruction always uses the compacted format */
+ if (mask & XFEATURE_MASK_YMM && xcomp_bv & XFEATURE_MASK_YMM) {
+ perf_regs->ymmh_regs = xsave;
+ xsave += XSAVE_YMM_SIZE;
+ }
}
static void release_ext_regs_buffers(void)
@@ -447,6 +455,9 @@ static void reserve_ext_regs_buffers(void)
size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+ if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_YMM))
+ size += XSAVE_YMM_SIZE;
+
/* XSAVE feature requires 64-byte alignment. */
size += 64;
@@ -712,6 +723,13 @@ int x86_pmu_hw_config(struct perf_event *event)
if (!(x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_XMM)))
return -EINVAL;
}
+ if (event_has_extended_regs2(event)) {
+ if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS2))
+ return -EINVAL;
+ if (x86_pmu_get_event_num_ext_regs(event, X86_EXT_REGS_YMM) &&
+ !(x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_YMM)))
+ return -EINVAL;
+ }
}
return x86_setup_perfctr(event);
}
@@ -1765,6 +1783,7 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
struct x86_perf_regs *perf_regs = container_of(regs, struct x86_perf_regs, regs);
u64 sample_type = event->attr.sample_type;
u64 mask = 0;
+ int num;
if (!(event->attr.sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)))
return;
@@ -1799,6 +1818,13 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
mask |= XFEATURE_MASK_SSE;
}
+ num = x86_pmu_get_event_num_ext_regs(event, X86_EXT_REGS_YMM);
+ if (num) {
+ perf_regs->ymmh_regs = NULL;
+ mask |= XFEATURE_MASK_YMM;
+ data->dyn_size += num * PERF_X86_EXT_REG_YMMH_SIZE * sizeof(u64);
+ }
+
mask &= ~ignore_mask;
if (mask)
x86_pmu_get_ext_regs(perf_regs, mask);
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index b48f4215f37c..911916bc8e36 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -689,6 +689,7 @@ enum {
enum {
X86_EXT_REGS_XMM = 0,
+ X86_EXT_REGS_YMM,
};
#define PERF_PEBS_DATA_SOURCE_MAX 0x100
@@ -1319,6 +1320,27 @@ static inline u64 x86_pmu_get_event_config(struct perf_event *event)
return event->attr.config & hybrid(event->pmu, config_mask);
}
+static inline int get_num_ext_regs(u64 *ext_regs, unsigned int type)
+{
+ u64 mask;
+
+ switch (type) {
+ case X86_EXT_REGS_YMM:
+ mask = GENMASK_ULL(PERF_REG_X86_YMMH15, PERF_REG_X86_YMMH0);
+ return hweight64(ext_regs[0] & mask);
+ default:
+ return 0;
+ }
+ return 0;
+}
+
+static inline int x86_pmu_get_event_num_ext_regs(struct perf_event *event,
+ unsigned int type)
+{
+ return get_num_ext_regs(event->attr.sample_ext_regs_intr, type) +
+ get_num_ext_regs(event->attr.sample_ext_regs_user, type);
+}
+
extern struct event_constraint emptyconstraint;
extern struct event_constraint unconstrained;
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 70d1d94aca7e..c30571f4de26 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -593,6 +593,7 @@ struct pt_regs;
struct x86_perf_regs {
struct pt_regs regs;
u64 *xmm_regs;
+ u64 *ymmh_regs;
};
extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index 7c9d2bb3833b..f37644513e33 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -55,4 +55,32 @@ enum perf_event_x86_regs {
#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
+enum perf_event_x86_ext_regs {
+ /* YMMH Registers */
+ PERF_REG_X86_YMMH0 = 0,
+ PERF_REG_X86_YMMH1,
+ PERF_REG_X86_YMMH2,
+ PERF_REG_X86_YMMH3,
+ PERF_REG_X86_YMMH4,
+ PERF_REG_X86_YMMH5,
+ PERF_REG_X86_YMMH6,
+ PERF_REG_X86_YMMH7,
+ PERF_REG_X86_YMMH8,
+ PERF_REG_X86_YMMH9,
+ PERF_REG_X86_YMMH10,
+ PERF_REG_X86_YMMH11,
+ PERF_REG_X86_YMMH12,
+ PERF_REG_X86_YMMH13,
+ PERF_REG_X86_YMMH14,
+ PERF_REG_X86_YMMH15,
+
+ PERF_REG_X86_EXT_REGS_MAX = PERF_REG_X86_YMMH15,
+};
+
+enum perf_event_x86_ext_reg_size {
+ PERF_X86_EXT_REG_YMMH_SIZE = 2,
+
+ /* max of PERF_REG_X86_XXX_SIZE */
+ PERF_X86_EXT_REG_SIZE_MAX = PERF_X86_EXT_REG_YMMH_SIZE,
+};
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index b9d5106afc26..f12ef60a1a8a 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -57,10 +57,46 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
#endif
};
+static_assert(PERF_REG_X86_EXT_REGS_MAX < PERF_ATTR_EXT_REGS_SIZE * 64);
+static_assert(PERF_X86_EXT_REG_SIZE_MAX <= PERF_EXT_REGS_SIZE_MAX);
+
+static inline u64 __perf_ext_reg_value(u64 *ext, int *ext_size,
+ int idx, u64 *regs, int size)
+{
+ if (!regs)
+ return 0;
+ memcpy(ext, ®s[idx * size], sizeof(u64) * size);
+ *ext_size = size;
+ return ext[0];
+}
+
+static u64 perf_ext_reg_value(struct pt_regs *regs, int idx,
+ u64 *ext, int *ext_size)
+{
+ struct x86_perf_regs *perf_regs;
+
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ switch (idx) {
+ case PERF_REG_X86_YMMH0 ... PERF_REG_X86_YMMH15:
+ return __perf_ext_reg_value(ext, ext_size,
+ idx - PERF_REG_X86_YMMH0,
+ perf_regs->ymmh_regs,
+ PERF_X86_EXT_REG_YMMH_SIZE);
+ default:
+ WARN_ON_ONCE(1);
+ *ext_size = 0;
+ break;
+ }
+ return 0;
+}
+
u64 perf_reg_value(struct pt_regs *regs, int idx, u64 *ext, int *ext_size)
{
struct x86_perf_regs *perf_regs;
+ if (ext && ext_size)
+ return perf_ext_reg_value(regs, idx, ext, ext_size);
+
if (WARN_ON_ONCE(ext || ext_size))
return 0;
@@ -117,13 +153,22 @@ void perf_get_regs_user(struct perf_regs *regs_user,
(1ULL << PERF_REG_X86_FS) | \
(1ULL << PERF_REG_X86_GS))
+static_assert (PERF_ATTR_EXT_REGS_SIZE == 2);
+
int perf_reg_validate(u64 mask, u64 *mask_ext)
{
- if (mask_ext)
+ if (!mask && !mask_ext)
return -EINVAL;
- if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
+ if (mask && (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
return -EINVAL;
+ if (mask_ext) {
+ int h = mask_ext[1] ? fls64(mask_ext[1]) + 64 : fls64(mask_ext[0]);
+
+ if (h > PERF_REG_X86_EXT_REGS_MAX + 1)
+ return -EINVAL;
+ }
+
return 0;
}
--
2.38.1
Powered by blists - more mailing lists