[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20250925061213.178796-17-dapeng1.mi@linux.intel.com>
Date: Thu, 25 Sep 2025 14:12:12 +0800
From: Dapeng Mi <dapeng1.mi@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
Jiri Olsa <jolsa@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Kan Liang <kan.liang@...ux.intel.com>,
Andi Kleen <ak@...ux.intel.com>,
Eranian Stephane <eranian@...gle.com>
Cc: Mark Rutland <mark.rutland@....com>,
broonie@...nel.org,
Ravi Bangoria <ravi.bangoria@....com>,
linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
Dapeng Mi <dapeng1.mi@...el.com>,
Dapeng Mi <dapeng1.mi@...ux.intel.com>
Subject: [Patch v4 16/17] perf tools: parse-regs: Support the new SIMD format
From: Kan Liang <kan.liang@...ux.intel.com>
Add has_cap_simd_regs() to check if the new SIMD format is available.
If yes, get the possible mask and qwords.
Add several __weak functions to return qwords and mask for vector and
pred registers.
Only support collecting the vector and pred as a whole, and only the
superset. For example, -I XMM,YMM. Only collect all 16 YMMs.
Examples:
$perf record -I?
available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10
R11 R12 R13 R14 R15 SSP XMM0-31 YMM0-31 ZMM0-31 OPMASK0-7
$perf record --user-regs=?
available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10
R11 R12 R13 R14 R15 SSP XMM0-31 YMM0-31 ZMM0-31 OPMASK0-7
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
Co-developed-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
---
tools/perf/arch/x86/util/perf_regs.c | 443 +++++++++++++++++++++-
tools/perf/util/evsel.c | 25 ++
tools/perf/util/parse-regs-options.c | 133 ++++++-
tools/perf/util/perf_event_attr_fprintf.c | 6 +
tools/perf/util/perf_regs.c | 54 +++
tools/perf/util/perf_regs.h | 10 +
tools/perf/util/record.h | 6 +
7 files changed, 663 insertions(+), 14 deletions(-)
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 12fd93f04802..2e7a93d34cd1 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -13,6 +13,49 @@
#include "../../../util/pmu.h"
#include "../../../util/pmus.h"
+static const struct sample_reg sample_reg_masks_ext[] = {
+ SMPL_REG(AX, PERF_REG_X86_AX),
+ SMPL_REG(BX, PERF_REG_X86_BX),
+ SMPL_REG(CX, PERF_REG_X86_CX),
+ SMPL_REG(DX, PERF_REG_X86_DX),
+ SMPL_REG(SI, PERF_REG_X86_SI),
+ SMPL_REG(DI, PERF_REG_X86_DI),
+ SMPL_REG(BP, PERF_REG_X86_BP),
+ SMPL_REG(SP, PERF_REG_X86_SP),
+ SMPL_REG(IP, PERF_REG_X86_IP),
+ SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
+ SMPL_REG(CS, PERF_REG_X86_CS),
+ SMPL_REG(SS, PERF_REG_X86_SS),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ SMPL_REG(R8, PERF_REG_X86_R8),
+ SMPL_REG(R9, PERF_REG_X86_R9),
+ SMPL_REG(R10, PERF_REG_X86_R10),
+ SMPL_REG(R11, PERF_REG_X86_R11),
+ SMPL_REG(R12, PERF_REG_X86_R12),
+ SMPL_REG(R13, PERF_REG_X86_R13),
+ SMPL_REG(R14, PERF_REG_X86_R14),
+ SMPL_REG(R15, PERF_REG_X86_R15),
+ SMPL_REG(R16, PERF_REG_X86_R16),
+ SMPL_REG(R17, PERF_REG_X86_R17),
+ SMPL_REG(R18, PERF_REG_X86_R18),
+ SMPL_REG(R19, PERF_REG_X86_R19),
+ SMPL_REG(R20, PERF_REG_X86_R20),
+ SMPL_REG(R21, PERF_REG_X86_R21),
+ SMPL_REG(R22, PERF_REG_X86_R22),
+ SMPL_REG(R23, PERF_REG_X86_R23),
+ SMPL_REG(R24, PERF_REG_X86_R24),
+ SMPL_REG(R25, PERF_REG_X86_R25),
+ SMPL_REG(R26, PERF_REG_X86_R26),
+ SMPL_REG(R27, PERF_REG_X86_R27),
+ SMPL_REG(R28, PERF_REG_X86_R28),
+ SMPL_REG(R29, PERF_REG_X86_R29),
+ SMPL_REG(R30, PERF_REG_X86_R30),
+ SMPL_REG(R31, PERF_REG_X86_R31),
+ SMPL_REG(SSP, PERF_REG_X86_SSP),
+#endif
+ SMPL_REG_END
+};
+
static const struct sample_reg sample_reg_masks[] = {
SMPL_REG(AX, PERF_REG_X86_AX),
SMPL_REG(BX, PERF_REG_X86_BX),
@@ -276,27 +319,377 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+static bool support_simd_reg(u64 sample_type, u16 qwords, u64 mask, bool pred)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = sample_type,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .sample_simd_regs_enabled = 1,
+ };
+ int fd;
+
+ attr.sample_period = 1;
+
+ if (!pred) {
+ attr.sample_simd_vec_reg_qwords = qwords;
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ attr.sample_simd_vec_reg_intr = mask;
+ else
+ attr.sample_simd_vec_reg_user = mask;
+ } else {
+ attr.sample_simd_pred_reg_qwords = PERF_X86_OPMASK_QWORDS;
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ attr.sample_simd_pred_reg_intr = PERF_X86_SIMD_PRED_MASK;
+ else
+ attr.sample_simd_pred_reg_user = PERF_X86_SIMD_PRED_MASK;
+ }
+
+ if (perf_pmus__num_core_pmus() > 1) {
+ struct perf_pmu *pmu = NULL;
+ __u64 type = PERF_TYPE_RAW;
+
+ /*
+ * The same register set is supported among different hybrid PMUs.
+ * Only check the first available one.
+ */
+ while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+ type = pmu->type;
+ break;
+ }
+ attr.config |= type << PERF_PMU_TYPE_SHIFT;
+ }
+
+ event_attr_init(&attr);
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ return true;
+ }
+
+ return false;
+}
+
+static bool __arch_simd_reg_mask(u64 sample_type, int reg, uint64_t *mask, u16 *qwords)
+{
+ bool supported = false;
+ u64 bits;
+
+ *mask = 0;
+ *qwords = 0;
+
+ switch (reg) {
+ case PERF_REG_X86_XMM:
+ bits = BIT_ULL(PERF_X86_SIMD_XMM_REGS) - 1;
+ supported = support_simd_reg(sample_type, PERF_X86_XMM_QWORDS, bits, false);
+ if (supported) {
+ *mask = bits;
+ *qwords = PERF_X86_XMM_QWORDS;
+ }
+ break;
+ case PERF_REG_X86_YMM:
+ bits = BIT_ULL(PERF_X86_SIMD_YMM_REGS) - 1;
+ supported = support_simd_reg(sample_type, PERF_X86_YMM_QWORDS, bits, false);
+ if (supported) {
+ *mask = bits;
+ *qwords = PERF_X86_YMM_QWORDS;
+ }
+ break;
+ case PERF_REG_X86_ZMM:
+ bits = BIT_ULL(PERF_X86_SIMD_ZMM_REGS) - 1;
+ supported = support_simd_reg(sample_type, PERF_X86_ZMM_QWORDS, bits, false);
+ if (supported) {
+ *mask = bits;
+ *qwords = PERF_X86_ZMM_QWORDS;
+ break;
+ }
+
+ bits = BIT_ULL(PERF_X86_SIMD_ZMMH_REGS) - 1;
+ supported = support_simd_reg(sample_type, PERF_X86_ZMM_QWORDS, bits, false);
+ if (supported) {
+ *mask = bits;
+ *qwords = PERF_X86_ZMMH_QWORDS;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return supported;
+}
+
+static bool __arch_pred_reg_mask(u64 sample_type, int reg, uint64_t *mask, u16 *qwords)
+{
+ bool supported = false;
+ u64 bits;
+
+ *mask = 0;
+ *qwords = 0;
+
+ switch (reg) {
+ case PERF_REG_X86_OPMASK:
+ bits = BIT_ULL(PERF_X86_SIMD_OPMASK_REGS) - 1;
+ supported = support_simd_reg(sample_type, PERF_X86_OPMASK_QWORDS, bits, true);
+ if (supported) {
+ *mask = bits;
+ *qwords = PERF_X86_OPMASK_QWORDS;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return supported;
+}
+
+static bool has_cap_simd_regs(void)
+{
+ uint64_t mask = BIT_ULL(PERF_X86_SIMD_XMM_REGS) - 1;
+ u16 qwords = PERF_X86_XMM_QWORDS;
+ static bool has_cap_simd_regs;
+ static bool cached;
+
+ if (cached)
+ return has_cap_simd_regs;
+
+ has_cap_simd_regs = __arch_simd_reg_mask(PERF_SAMPLE_REGS_INTR,
+ PERF_REG_X86_XMM, &mask, &qwords);
+ has_cap_simd_regs |= __arch_simd_reg_mask(PERF_SAMPLE_REGS_USER,
+ PERF_REG_X86_XMM, &mask, &qwords);
+ cached = true;
+
+ return has_cap_simd_regs;
+}
+
+
+static const struct sample_reg sample_simd_reg_masks[] = {
+ SMPL_REG(XMM, PERF_REG_X86_XMM),
+ SMPL_REG(YMM, PERF_REG_X86_YMM),
+ SMPL_REG(ZMM, PERF_REG_X86_ZMM),
+ SMPL_REG_END
+};
+
+static const struct sample_reg sample_pred_reg_masks[] = {
+ SMPL_REG(OPMASK, PERF_REG_X86_OPMASK),
+ SMPL_REG_END
+};
+
+const struct sample_reg *arch__sample_simd_reg_masks(void)
+{
+ return sample_simd_reg_masks;
+}
+
+const struct sample_reg *arch__sample_pred_reg_masks(void)
+{
+ return sample_pred_reg_masks;
+}
+
+static bool x86_intr_simd_updated;
+static u64 x86_intr_simd_mask[PERF_REG_X86_MAX_SIMD_REGS];
+static u16 x86_intr_simd_qwords[PERF_REG_X86_MAX_SIMD_REGS];
+static bool x86_user_simd_updated;
+static u64 x86_user_simd_mask[PERF_REG_X86_MAX_SIMD_REGS];
+static u16 x86_user_simd_qwords[PERF_REG_X86_MAX_SIMD_REGS];
+
+static bool x86_intr_pred_updated;
+static u64 x86_intr_pred_mask[PERF_REG_X86_MAX_PRED_REGS];
+static u16 x86_intr_pred_qwords[PERF_REG_X86_MAX_PRED_REGS];
+static bool x86_user_pred_updated;
+static u64 x86_user_pred_mask[PERF_REG_X86_MAX_PRED_REGS];
+static u16 x86_user_pred_qwords[PERF_REG_X86_MAX_PRED_REGS];
+
+static uint64_t __arch__simd_reg_mask(u64 sample_type)
+{
+ const struct sample_reg *r = NULL;
+ bool supported;
+ u64 mask = 0;
+ int reg;
+
+ if (!has_cap_simd_regs())
+ return 0;
+
+ for (r = arch__sample_simd_reg_masks(); r->name; r++) {
+ supported = false;
+
+ if (!r->mask)
+ continue;
+ reg = fls64(r->mask) - 1;
+
+ if (reg >= PERF_REG_X86_MAX_SIMD_REGS)
+ break;
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ supported = __arch_simd_reg_mask(sample_type, reg,
+ &x86_intr_simd_mask[reg],
+ &x86_intr_simd_qwords[reg]);
+ else if (sample_type == PERF_SAMPLE_REGS_USER)
+ supported = __arch_simd_reg_mask(sample_type, reg,
+ &x86_user_simd_mask[reg],
+ &x86_user_simd_qwords[reg]);
+ if (supported)
+ mask |= BIT_ULL(reg);
+ }
+
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ x86_intr_simd_updated = true;
+ else
+ x86_user_simd_updated = true;
+
+ return mask;
+}
+
+static uint64_t __arch__pred_reg_mask(u64 sample_type)
+{
+ const struct sample_reg *r = NULL;
+ bool supported;
+ u64 mask = 0;
+ int reg;
+
+ if (!has_cap_simd_regs())
+ return 0;
+
+ for (r = arch__sample_pred_reg_masks(); r->name; r++) {
+ supported = false;
+
+ if (!r->mask)
+ continue;
+ reg = fls64(r->mask) - 1;
+
+ if (reg >= PERF_REG_X86_MAX_PRED_REGS)
+ break;
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ supported = __arch_pred_reg_mask(sample_type, reg,
+ &x86_intr_pred_mask[reg],
+ &x86_intr_pred_qwords[reg]);
+ else if (sample_type == PERF_SAMPLE_REGS_USER)
+ supported = __arch_pred_reg_mask(sample_type, reg,
+ &x86_user_pred_mask[reg],
+ &x86_user_pred_qwords[reg]);
+ if (supported)
+ mask |= BIT_ULL(reg);
+ }
+
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ x86_intr_pred_updated = true;
+ else
+ x86_user_pred_updated = true;
+
+ return mask;
+}
+
+uint64_t arch__intr_simd_reg_mask(void)
+{
+ return __arch__simd_reg_mask(PERF_SAMPLE_REGS_INTR);
+}
+
+uint64_t arch__user_simd_reg_mask(void)
+{
+ return __arch__simd_reg_mask(PERF_SAMPLE_REGS_USER);
+}
+
+uint64_t arch__intr_pred_reg_mask(void)
+{
+ return __arch__pred_reg_mask(PERF_SAMPLE_REGS_INTR);
+}
+
+uint64_t arch__user_pred_reg_mask(void)
+{
+ return __arch__pred_reg_mask(PERF_SAMPLE_REGS_USER);
+}
+
+static uint64_t arch__simd_reg_bitmap_qwords(int reg, u16 *qwords, bool intr)
+{
+ uint64_t mask = 0;
+
+ *qwords = 0;
+ if (reg < PERF_REG_X86_MAX_SIMD_REGS) {
+ if (intr) {
+ *qwords = x86_intr_simd_qwords[reg];
+ mask = x86_intr_simd_mask[reg];
+ } else {
+ *qwords = x86_user_simd_qwords[reg];
+ mask = x86_user_simd_mask[reg];
+ }
+ }
+
+ return mask;
+}
+
+static uint64_t arch__pred_reg_bitmap_qwords(int reg, u16 *qwords, bool intr)
+{
+ uint64_t mask = 0;
+
+ *qwords = 0;
+ if (reg < PERF_REG_X86_MAX_PRED_REGS) {
+ if (intr) {
+ *qwords = x86_intr_pred_qwords[reg];
+ mask = x86_intr_pred_mask[reg];
+ } else {
+ *qwords = x86_user_pred_qwords[reg];
+ mask = x86_user_pred_mask[reg];
+ }
+ }
+
+ return mask;
+}
+
+uint64_t arch__intr_simd_reg_bitmap_qwords(int reg, u16 *qwords)
+{
+ if (!x86_intr_simd_updated)
+ arch__intr_simd_reg_mask();
+ return arch__simd_reg_bitmap_qwords(reg, qwords, true);
+}
+
+uint64_t arch__user_simd_reg_bitmap_qwords(int reg, u16 *qwords)
+{
+ if (!x86_user_simd_updated)
+ arch__user_simd_reg_mask();
+ return arch__simd_reg_bitmap_qwords(reg, qwords, false);
+}
+
+uint64_t arch__intr_pred_reg_bitmap_qwords(int reg, u16 *qwords)
+{
+ if (!x86_intr_pred_updated)
+ arch__intr_pred_reg_mask();
+ return arch__pred_reg_bitmap_qwords(reg, qwords, true);
+}
+
+uint64_t arch__user_pred_reg_bitmap_qwords(int reg, u16 *qwords)
+{
+ if (!x86_user_pred_updated)
+ arch__user_pred_reg_mask();
+ return arch__pred_reg_bitmap_qwords(reg, qwords, false);
+}
+
const struct sample_reg *arch__sample_reg_masks(void)
{
+ if (has_cap_simd_regs())
+ return sample_reg_masks_ext;
return sample_reg_masks;
}
-uint64_t arch__intr_reg_mask(void)
+static uint64_t __arch__reg_mask(u64 sample_type, u64 mask, bool has_simd_regs)
{
struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .sample_type = PERF_SAMPLE_REGS_INTR,
- .sample_regs_intr = PERF_REG_EXTENDED_MASK,
- .precise_ip = 1,
- .disabled = 1,
- .exclude_kernel = 1,
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = sample_type,
+ .disabled = 1,
+ .precise_ip = 1,
+ .exclude_kernel = 1,
+ .sample_simd_regs_enabled = has_simd_regs,
};
int fd;
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ attr.sample_regs_intr = mask;
+ else
+ attr.sample_regs_user = mask;
if (perf_pmus__num_core_pmus() > 1) {
struct perf_pmu *pmu = NULL;
@@ -318,13 +711,41 @@ uint64_t arch__intr_reg_mask(void)
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd != -1) {
close(fd);
- return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
+ return mask;
}
- return PERF_REGS_MASK;
+ return 0;
+}
+
+uint64_t arch__intr_reg_mask(void)
+{
+ uint64_t mask = PERF_REGS_MASK;
+
+ if (has_cap_simd_regs()) {
+ mask |= __arch__reg_mask(PERF_SAMPLE_REGS_INTR,
+ GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16),
+ true);
+ mask |= __arch__reg_mask(PERF_SAMPLE_REGS_INTR,
+ BIT_ULL(PERF_REG_X86_SSP),
+ true);
+ } else
+ mask |= __arch__reg_mask(PERF_SAMPLE_REGS_INTR, PERF_REG_EXTENDED_MASK, false);
+
+ return mask;
}
uint64_t arch__user_reg_mask(void)
{
- return PERF_REGS_MASK;
+ uint64_t mask = PERF_REGS_MASK;
+
+ if (has_cap_simd_regs()) {
+ mask |= __arch__reg_mask(PERF_SAMPLE_REGS_USER,
+ GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16),
+ true);
+ mask |= __arch__reg_mask(PERF_SAMPLE_REGS_USER,
+ BIT_ULL(PERF_REG_X86_SSP),
+ true);
+ }
+
+ return mask;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d264c143b592..98996e672794 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1387,12 +1387,37 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
evsel__set_sample_bit(evsel, REGS_INTR);
}
+ if ((opts->sample_intr_vec_regs || opts->sample_intr_pred_regs) &&
+ !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
+ /* The pred qwords is to implies the set of SIMD registers is used */
+ if (opts->sample_pred_regs_qwords)
+ attr->sample_simd_pred_reg_qwords = opts->sample_pred_regs_qwords;
+ else
+ attr->sample_simd_pred_reg_qwords = 1;
+ attr->sample_simd_vec_reg_intr = opts->sample_intr_vec_regs;
+ attr->sample_simd_vec_reg_qwords = opts->sample_vec_regs_qwords;
+ attr->sample_simd_pred_reg_intr = opts->sample_intr_pred_regs;
+ evsel__set_sample_bit(evsel, REGS_INTR);
+ }
+
if (opts->sample_user_regs && !evsel->no_aux_samples &&
!evsel__is_dummy_event(evsel)) {
attr->sample_regs_user |= opts->sample_user_regs;
evsel__set_sample_bit(evsel, REGS_USER);
}
+ if ((opts->sample_user_vec_regs || opts->sample_user_pred_regs) &&
+ !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
+ if (opts->sample_pred_regs_qwords)
+ attr->sample_simd_pred_reg_qwords = opts->sample_pred_regs_qwords;
+ else
+ attr->sample_simd_pred_reg_qwords = 1;
+ attr->sample_simd_vec_reg_user = opts->sample_user_vec_regs;
+ attr->sample_simd_vec_reg_qwords = opts->sample_vec_regs_qwords;
+ attr->sample_simd_pred_reg_user = opts->sample_user_pred_regs;
+ evsel__set_sample_bit(evsel, REGS_USER);
+ }
+
if (target__has_cpu(&opts->target) || opts->sample_cpu)
evsel__set_sample_bit(evsel, CPU);
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index cda1c620968e..c66d0769096b 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -4,19 +4,104 @@
#include <stdint.h>
#include <string.h>
#include <stdio.h>
+#include <linux/bitops.h>
#include "util/debug.h"
#include <subcmd/parse-options.h>
#include "util/perf_regs.h"
#include "util/parse-regs-options.h"
+#include "record.h"
+
+static void __print_simd_regs(bool intr, uint64_t simd_mask, uint64_t pred_mask)
+{
+ const struct sample_reg *r = NULL;
+ uint64_t bitmap = 0;
+ u16 qwords = 0;
+ int idx;
+
+ for (r = arch__sample_simd_reg_masks(); r->name; r++) {
+ if (r->mask & simd_mask) {
+ idx = fls64(r->mask) - 1;
+ if (intr)
+ bitmap = arch__intr_simd_reg_bitmap_qwords(idx, &qwords);
+ else
+ bitmap = arch__user_simd_reg_bitmap_qwords(idx, &qwords);
+ if (bitmap)
+ fprintf(stderr, "%s0-%d ", r->name, fls64(bitmap) - 1);
+ }
+ }
+
+ for (r = arch__sample_pred_reg_masks(); r->name; r++) {
+ if (r->mask & pred_mask) {
+ idx = fls64(r->mask) - 1;
+ if (intr)
+ bitmap = arch__intr_pred_reg_bitmap_qwords(idx, &qwords);
+ else
+ bitmap = arch__user_pred_reg_bitmap_qwords(idx, &qwords);
+ if (bitmap)
+ fprintf(stderr, "%s0-%d ", r->name, fls64(bitmap) - 1);
+ }
+ }
+}
+
+static uint64_t __get_simd_reg_bitmask_qwords(bool intr, char *reg_name, u16 *qwords)
+{
+ const struct sample_reg *r = NULL;
+ uint64_t bitmap = 0;
+ int idx;
+
+ *qwords = 0;
+ for (r = arch__sample_simd_reg_masks(); r->name; r++) {
+ if (!strcasecmp(reg_name, r->name)) {
+ if (!fls64(r->mask))
+ continue;
+ idx = fls64(r->mask) - 1;
+ if (intr)
+ bitmap = arch__intr_simd_reg_bitmap_qwords(idx, qwords);
+ else
+ bitmap = arch__user_simd_reg_bitmap_qwords(idx, qwords);
+ break;
+ }
+ }
+
+ return bitmap;
+}
+
+static uint64_t __get_pred_reg_bitmask_qwords(bool intr, char *reg_name, u16 *qwords)
+{
+ const struct sample_reg *r = NULL;
+ uint64_t bitmap = 0;
+ int idx;
+
+ *qwords = 0;
+ for (r = arch__sample_pred_reg_masks(); r->name; r++) {
+ if (!strcasecmp(reg_name, r->name)) {
+ if (!fls64(r->mask))
+ continue;
+ idx = fls64(r->mask) - 1;
+ if (intr)
+ bitmap = arch__intr_pred_reg_bitmap_qwords(idx, qwords);
+ else
+ bitmap = arch__user_pred_reg_bitmap_qwords(idx, qwords);
+ break;
+ }
+ }
+
+ return bitmap;
+}
static int
__parse_regs(const struct option *opt, const char *str, int unset, bool intr)
{
uint64_t *mode = (uint64_t *)opt->value;
const struct sample_reg *r = NULL;
+ struct record_opts *opts;
char *s, *os = NULL, *p;
int ret = -1;
uint64_t mask;
+ uint64_t simd_mask;
+ uint64_t pred_mask;
+ uint64_t bitmap = 0;
+ u16 qwords = 0;
if (unset)
return 0;
@@ -27,10 +112,17 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
if (*mode)
return -1;
- if (intr)
+ if (intr) {
+ opts = container_of(opt->value, struct record_opts, sample_intr_regs);
mask = arch__intr_reg_mask();
- else
+ simd_mask = arch__intr_simd_reg_mask();
+ pred_mask = arch__intr_pred_reg_mask();
+ } else {
+ opts = container_of(opt->value, struct record_opts, sample_user_regs);
mask = arch__user_reg_mask();
+ simd_mask = arch__user_simd_reg_mask();
+ pred_mask = arch__user_pred_reg_mask();
+ }
/* str may be NULL in case no arg is passed to -I */
if (str) {
@@ -50,10 +142,45 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
if (r->mask & mask)
fprintf(stderr, "%s ", r->name);
}
+ if (simd_mask || pred_mask)
+ __print_simd_regs(intr, simd_mask, pred_mask);
+
fputc('\n', stderr);
/* just printing available regs */
goto error;
}
+
+ if (simd_mask) {
+ bitmap = __get_simd_reg_bitmask_qwords(intr, s, &qwords);
+
+ /* Just need the highest qwords */
+ if (qwords > opts->sample_vec_regs_qwords) {
+ opts->sample_vec_regs_qwords = qwords;
+ if (intr)
+ opts->sample_intr_vec_regs = bitmap;
+ else
+ opts->sample_user_vec_regs = bitmap;
+ }
+
+ if (bitmap)
+ goto next;
+ }
+ if (pred_mask) {
+ bitmap = __get_pred_reg_bitmask_qwords(intr, s, &qwords);
+
+ /* Just need the highest qwords */
+ if (qwords > opts->sample_pred_regs_qwords) {
+ opts->sample_pred_regs_qwords = qwords;
+ if (intr)
+ opts->sample_intr_pred_regs = bitmap;
+ else
+ opts->sample_user_pred_regs = bitmap;
+ }
+
+ if (bitmap)
+ goto next;
+ }
+
for (r = arch__sample_reg_masks(); r->name; r++) {
if ((r->mask & mask) && !strcasecmp(s, r->name))
break;
@@ -65,7 +192,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
}
*mode |= r->mask;
-
+next:
if (!p)
break;
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 66b666d9ce64..fb0366d050cf 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -360,6 +360,12 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(aux_start_paused, p_unsigned);
PRINT_ATTRf(aux_pause, p_unsigned);
PRINT_ATTRf(aux_resume, p_unsigned);
+ PRINT_ATTRf(sample_simd_pred_reg_qwords, p_unsigned);
+ PRINT_ATTRf(sample_simd_pred_reg_intr, p_hex);
+ PRINT_ATTRf(sample_simd_pred_reg_user, p_hex);
+ PRINT_ATTRf(sample_simd_vec_reg_qwords, p_unsigned);
+ PRINT_ATTRf(sample_simd_vec_reg_intr, p_hex);
+ PRINT_ATTRf(sample_simd_vec_reg_user, p_hex);
return ret;
}
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 44b90bbf2d07..107bbf7dbcfe 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -21,6 +21,50 @@ uint64_t __weak arch__user_reg_mask(void)
return 0;
}
+uint64_t __weak arch__intr_simd_reg_mask(void)
+{
+ return 0;
+}
+
+uint64_t __weak arch__user_simd_reg_mask(void)
+{
+ return 0;
+}
+
+uint64_t __weak arch__intr_pred_reg_mask(void)
+{
+ return 0;
+}
+
+uint64_t __weak arch__user_pred_reg_mask(void)
+{
+ return 0;
+}
+
+uint64_t __weak arch__intr_simd_reg_bitmap_qwords(int reg __maybe_unused, u16 *qwords)
+{
+ *qwords = 0;
+ return 0;
+}
+
+uint64_t __weak arch__user_simd_reg_bitmap_qwords(int reg __maybe_unused, u16 *qwords)
+{
+ *qwords = 0;
+ return 0;
+}
+
+uint64_t __weak arch__intr_pred_reg_bitmap_qwords(int reg __maybe_unused, u16 *qwords)
+{
+ *qwords = 0;
+ return 0;
+}
+
+uint64_t __weak arch__user_pred_reg_bitmap_qwords(int reg __maybe_unused, u16 *qwords)
+{
+ *qwords = 0;
+ return 0;
+}
+
static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
@@ -30,6 +74,16 @@ const struct sample_reg * __weak arch__sample_reg_masks(void)
return sample_reg_masks;
}
+const struct sample_reg * __weak arch__sample_simd_reg_masks(void)
+{
+ return sample_reg_masks;
+}
+
+const struct sample_reg * __weak arch__sample_pred_reg_masks(void)
+{
+ return sample_reg_masks;
+}
+
const char *perf_reg_name(int id, const char *arch)
{
const char *reg_name = NULL;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index f2d0736d65cc..cd98f9b9f964 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -27,6 +27,16 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op);
uint64_t arch__intr_reg_mask(void);
uint64_t arch__user_reg_mask(void);
const struct sample_reg *arch__sample_reg_masks(void);
+const struct sample_reg *arch__sample_simd_reg_masks(void);
+const struct sample_reg *arch__sample_pred_reg_masks(void);
+uint64_t arch__intr_simd_reg_mask(void);
+uint64_t arch__user_simd_reg_mask(void);
+uint64_t arch__intr_pred_reg_mask(void);
+uint64_t arch__user_pred_reg_mask(void);
+uint64_t arch__intr_simd_reg_bitmap_qwords(int reg, u16 *qwords);
+uint64_t arch__user_simd_reg_bitmap_qwords(int reg, u16 *qwords);
+uint64_t arch__intr_pred_reg_bitmap_qwords(int reg, u16 *qwords);
+uint64_t arch__user_pred_reg_bitmap_qwords(int reg, u16 *qwords);
const char *perf_reg_name(int id, const char *arch);
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index ea3a6c4657ee..825ffb4cc53f 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -59,7 +59,13 @@ struct record_opts {
unsigned int user_freq;
u64 branch_stack;
u64 sample_intr_regs;
+ u64 sample_intr_vec_regs;
u64 sample_user_regs;
+ u64 sample_user_vec_regs;
+ u16 sample_pred_regs_qwords;
+ u16 sample_vec_regs_qwords;
+ u16 sample_intr_pred_regs;
+ u16 sample_user_pred_regs;
u64 default_interval;
u64 user_interval;
size_t auxtrace_snapshot_size;
--
2.34.1
Powered by blists - more mailing lists