[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250613134943.3186517-9-kan.liang@linux.intel.com>
Date: Fri, 13 Jun 2025 06:49:39 -0700
From: kan.liang@...ux.intel.com
To: peterz@...radead.org,
mingo@...hat.com,
acme@...nel.org,
namhyung@...nel.org,
tglx@...utronix.de,
dave.hansen@...ux.intel.com,
irogers@...gle.com,
adrian.hunter@...el.com,
jolsa@...nel.org,
alexander.shishkin@...ux.intel.com,
linux-kernel@...r.kernel.org
Cc: dapeng1.mi@...ux.intel.com,
ak@...ux.intel.com,
zide.chen@...el.com,
Kan Liang <kan.liang@...ux.intel.com>
Subject: [RFC PATCH 08/12] perf/x86: Add APX in extended regs
From: Kan Liang <kan.liang@...ux.intel.com>
Support APX as the extended registers. It can be configured in the
sample_ext_regs_intr/user.
Only the PMU with PERF_PMU_CAP_EXTENDED_REGS2 supports the feature.
The value can be retrieved via the XSAVES.
Define several macros to simplify the code.
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
---
arch/x86/events/core.c | 48 +++++++++++++++++++--------
arch/x86/events/perf_event.h | 4 +++
arch/x86/include/asm/perf_event.h | 1 +
arch/x86/include/uapi/asm/perf_regs.h | 21 +++++++++++-
arch/x86/kernel/perf_regs.c | 5 +++
5 files changed, 65 insertions(+), 14 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 91039c0256b3..67f62268f063 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -408,6 +408,14 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
static DEFINE_PER_CPU(void *, ext_regs_buf);
+#define __x86_pmu_get_regs(_mask, _regs, _size) \
+do { \
+ if (mask & _mask && xcomp_bv & _mask) { \
+ _regs = xsave; \
+ xsave += _size; \
+ } \
+} while (0)
+
static void x86_pmu_get_ext_regs(struct x86_perf_regs *perf_regs, u64 mask)
{
void *xsave = (void *)ALIGN((unsigned long)per_cpu(ext_regs_buf, smp_processor_id()), 64);
@@ -426,10 +434,8 @@ static void x86_pmu_get_ext_regs(struct x86_perf_regs *perf_regs, u64 mask)
xsave += FXSAVE_SIZE + XSAVE_HDR_SIZE;
/* The XSAVES instruction always uses the compacted format */
- if (mask & XFEATURE_MASK_YMM && xcomp_bv & XFEATURE_MASK_YMM) {
- perf_regs->ymmh_regs = xsave;
- xsave += XSAVE_YMM_SIZE;
- }
+ __x86_pmu_get_regs(XFEATURE_MASK_YMM, perf_regs->ymmh_regs, XSAVE_YMM_SIZE);
+ __x86_pmu_get_regs(XFEATURE_MASK_APX, perf_regs->apx_regs, sizeof(struct apx_state));
}
static void release_ext_regs_buffers(void)
@@ -457,6 +463,8 @@ static void reserve_ext_regs_buffers(void)
if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_YMM))
size += XSAVE_YMM_SIZE;
+ if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_APX))
+ size += sizeof(struct apx_state);
/* XSAVE feature requires 64-byte alignment. */
size += 64;
@@ -642,6 +650,13 @@ int x86_pmu_max_precise(void)
return precise;
}
+#define check_ext_regs(_type) \
+do { \
+ if (x86_pmu_get_event_num_ext_regs(event, _type) && \
+ !(x86_pmu.ext_regs_mask & BIT_ULL(_type))) \
+ return -EINVAL; \
+} while (0)
+
int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
@@ -726,9 +741,8 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event_has_extended_regs2(event)) {
if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS2))
return -EINVAL;
- if (x86_pmu_get_event_num_ext_regs(event, X86_EXT_REGS_YMM) &&
- !(x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_YMM)))
- return -EINVAL;
+ check_ext_regs(X86_EXT_REGS_YMM);
+ check_ext_regs(X86_EXT_REGS_APX);
}
}
return x86_setup_perfctr(event);
@@ -1775,6 +1789,16 @@ x86_pmu_perf_get_regs_user(struct perf_sample_data *data,
return x86_regs_user;
}
+#define init_ext_regs_data(_type, _regs, _mask, _size) \
+do { \
+ num = x86_pmu_get_event_num_ext_regs(event, _type); \
+ if (num) { \
+ _regs = NULL; \
+ mask |= _mask; \
+ data->dyn_size += num * _size * sizeof(u64); \
+ } \
+} while (0)
+
void x86_pmu_setup_regs_data(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs,
@@ -1818,12 +1842,10 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
mask |= XFEATURE_MASK_SSE;
}
- num = x86_pmu_get_event_num_ext_regs(event, X86_EXT_REGS_YMM);
- if (num) {
- perf_regs->ymmh_regs = NULL;
- mask |= XFEATURE_MASK_YMM;
- data->dyn_size += num * PERF_X86_EXT_REG_YMMH_SIZE * sizeof(u64);
- }
+ init_ext_regs_data(X86_EXT_REGS_YMM, perf_regs->ymmh_regs,
+ XFEATURE_MASK_YMM, PERF_X86_EXT_REG_YMMH_SIZE);
+ init_ext_regs_data(X86_EXT_REGS_APX, perf_regs->apx_regs,
+ XFEATURE_MASK_APX, PERF_X86_EXT_REG_APX_SIZE);
mask &= ~ignore_mask;
if (mask)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 911916bc8e36..1c40b5d9c025 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -690,6 +690,7 @@ enum {
enum {
X86_EXT_REGS_XMM = 0,
X86_EXT_REGS_YMM,
+ X86_EXT_REGS_APX,
};
#define PERF_PEBS_DATA_SOURCE_MAX 0x100
@@ -1328,6 +1329,9 @@ static inline int get_num_ext_regs(u64 *ext_regs, unsigned int type)
case X86_EXT_REGS_YMM:
mask = GENMASK_ULL(PERF_REG_X86_YMMH15, PERF_REG_X86_YMMH0);
return hweight64(ext_regs[0] & mask);
+ case X86_EXT_REGS_APX:
+ mask = GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16);
+ return hweight64(ext_regs[0] & mask);
default:
return 0;
}
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index c30571f4de26..9e4d60f3a9a2 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -594,6 +594,7 @@ struct x86_perf_regs {
struct pt_regs regs;
u64 *xmm_regs;
u64 *ymmh_regs;
+ u64 *apx_regs;
};
extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f37644513e33..e23fb112faac 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -74,11 +74,30 @@ enum perf_event_x86_ext_regs {
PERF_REG_X86_YMMH14,
PERF_REG_X86_YMMH15,
- PERF_REG_X86_EXT_REGS_MAX = PERF_REG_X86_YMMH15,
+ /* APX Registers */
+ PERF_REG_X86_R16,
+ PERF_REG_X86_R17,
+ PERF_REG_X86_R18,
+ PERF_REG_X86_R19,
+ PERF_REG_X86_R20,
+ PERF_REG_X86_R21,
+ PERF_REG_X86_R22,
+ PERF_REG_X86_R23,
+ PERF_REG_X86_R24,
+ PERF_REG_X86_R25,
+ PERF_REG_X86_R26,
+ PERF_REG_X86_R27,
+ PERF_REG_X86_R28,
+ PERF_REG_X86_R29,
+ PERF_REG_X86_R30,
+ PERF_REG_X86_R31,
+
+ PERF_REG_X86_EXT_REGS_MAX = PERF_REG_X86_R31,
};
enum perf_event_x86_ext_reg_size {
PERF_X86_EXT_REG_YMMH_SIZE = 2,
+ PERF_X86_EXT_REG_APX_SIZE = 1,
/* max of PERF_REG_X86_XXX_SIZE */
PERF_X86_EXT_REG_SIZE_MAX = PERF_X86_EXT_REG_YMMH_SIZE,
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index f12ef60a1a8a..518497bafdf0 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -82,6 +82,11 @@ static u64 perf_ext_reg_value(struct pt_regs *regs, int idx,
idx - PERF_REG_X86_YMMH0,
perf_regs->ymmh_regs,
PERF_X86_EXT_REG_YMMH_SIZE);
+ case PERF_REG_X86_R16 ... PERF_REG_X86_R31:
+ return __perf_ext_reg_value(ext, ext_size,
+ idx - PERF_REG_X86_R16,
+ perf_regs->apx_regs,
+ PERF_X86_EXT_REG_APX_SIZE);
default:
WARN_ON_ONCE(1);
*ext_size = 0;
--
2.38.1
Powered by blists - more mailing lists