[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fWjBZFsFqQX_wCLweNGv_NdHwp=zV6G7BRTVdBiwRmRkQ@mail.gmail.com>
Date: Mon, 9 Feb 2026 14:39:16 -0800
From: Ian Rogers <irogers@...gle.com>
To: Dapeng Mi <dapeng1.mi@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Adrian Hunter <adrian.hunter@...el.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org, Zide Chen <zide.chen@...el.com>,
Falcon Thomas <thomas.falcon@...el.com>, Dapeng Mi <dapeng1.mi@...el.com>,
Xudong Hao <xudong.hao@...el.com>
Subject: Re: [Patch v6 3/4] perf regs: Support x86 SIMD registers sampling
On Mon, Feb 9, 2026 at 12:39 AM Dapeng Mi <dapeng1.mi@...ux.intel.com> wrote:
>
> This patch adds support for the newly introduced SIMD register sampling
> format by adding the following 5 functions:
>
> uint64_t perf_intr_simd_reg_class_mask(uint16_t e_machine, bool pred);
> uint64_t perf_user_simd_reg_class_mask(uint16_t e_machine, bool pred);
> uint64_t perf_intr_simd_reg_class_bitmap_qwords(uint16_t e_machine, int reg_c,
> uint16_t *qwords, bool pred);
> uint64_t perf_user_simd_reg_class_bitmap_qwords(uint16_t e_machine, int reg_c,
> uint16_t *qwords, bool pred);
> const char *perf_simd_reg_class_name(uint16_t e_machine, int id, bool pred);
>
> The perf_{intr|user}_simd_reg_class_mask() functions retrieve the bitmap
> of kernel supported SIMD/PRED register classes on current platform for
> intr-regs and user-regs sampling, such as OPMASK/XMM/YMM/ZMM on
> x86 platforms.
>
> The perf_{intr|user}_simd_reg_class_bitmap_qwords() functions retrieve
> the bitmap and qwords length of a certain class of SIMD/PRED register
> on current platform for intr-regs and user-regs sampling. For example,
> for the XMM registers on x86 platforms, the returned bitmap is 0xffff
> (XMM0 ~ XMM15) and the qwords length is 2 (128 bits for each XMM
> register).
>
> The perf_simd_reg_class_name() function gets the register class name for
> a certain register class index.
>
> Additionally, the function __parse_regs() is enhanced to support parsing
> these newly introduced SIMD/PRED registers. Currently, each class of
> register can only be sampled collectively; sampling a specific SIMD
> register is not supported. For example, all XMM registers are sampled
> together rather than sampling only XMM0.
>
> When multiple overlapping register types, such as XMM and YMM, are
> sampled simultaneously, only the superset (YMM registers) is sampled.
>
> With this patch, all supported sampling registers on x86 platforms are
> displayed as follows.
>
> $perf record --intr-regs=?
> available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10
> R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28
> R29 R30 R31 SSP XMM0-15 YMM0-15 ZMM0-31 OPMASK0-7
>
> $perf record --user-regs=?
> available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10
> R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28
> R29 R30 R31 SSP XMM0-15 YMM0-15 ZMM0-31 OPMASK0-7
>
> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
Reviewed-by: Ian Rogers <irogers@...gle.com>
Thanks,
Ian
> ---
> tools/perf/util/evsel.c | 27 ++
> tools/perf/util/parse-regs-options.c | 161 +++++++++-
> .../perf/util/perf-regs-arch/perf_regs_x86.c | 292 ++++++++++++++++++
> tools/perf/util/perf_event_attr_fprintf.c | 6 +
> tools/perf/util/perf_regs.c | 72 +++++
> tools/perf/util/perf_regs.h | 11 +
> tools/perf/util/record.h | 6 +
> 7 files changed, 565 insertions(+), 10 deletions(-)
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index b7fb3f936ae3..a86d2434a4ad 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1583,12 +1583,39 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
> if (opts->sample_intr_regs && !evsel->no_aux_samples &&
> !evsel__is_dummy_event(evsel)) {
> attr->sample_regs_intr = opts->sample_intr_regs;
> + attr->sample_simd_regs_enabled = !!opts->sample_pred_reg_qwords;
> + evsel__set_sample_bit(evsel, REGS_INTR);
> + }
> +
> + if ((opts->sample_intr_vec_regs || opts->sample_intr_pred_regs) &&
> + !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
> + /* The pred qwords is to implies the set of SIMD registers is used */
> + if (opts->sample_pred_reg_qwords)
> + attr->sample_simd_pred_reg_qwords = opts->sample_pred_reg_qwords;
> + else
> + attr->sample_simd_pred_reg_qwords = 1;
> + attr->sample_simd_vec_reg_intr = opts->sample_intr_vec_regs;
> + attr->sample_simd_vec_reg_qwords = opts->sample_vec_reg_qwords;
> + attr->sample_simd_pred_reg_intr = opts->sample_intr_pred_regs;
> evsel__set_sample_bit(evsel, REGS_INTR);
> }
>
> if (opts->sample_user_regs && !evsel->no_aux_samples &&
> !evsel__is_dummy_event(evsel)) {
> attr->sample_regs_user |= opts->sample_user_regs;
> + attr->sample_simd_regs_enabled = !!opts->sample_pred_reg_qwords;
> + evsel__set_sample_bit(evsel, REGS_USER);
> + }
> +
> + if ((opts->sample_user_vec_regs || opts->sample_user_pred_regs) &&
> + !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
> + if (opts->sample_pred_reg_qwords)
> + attr->sample_simd_pred_reg_qwords = opts->sample_pred_reg_qwords;
> + else
> + attr->sample_simd_pred_reg_qwords = 1;
> + attr->sample_simd_vec_reg_user = opts->sample_user_vec_regs;
> + attr->sample_simd_vec_reg_qwords = opts->sample_vec_reg_qwords;
> + attr->sample_simd_pred_reg_user = opts->sample_user_pred_regs;
> evsel__set_sample_bit(evsel, REGS_USER);
> }
>
> diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
> index 518327883b18..f27960846edc 100644
> --- a/tools/perf/util/parse-regs-options.c
> +++ b/tools/perf/util/parse-regs-options.c
> @@ -9,13 +9,13 @@
> #include <subcmd/parse-options.h>
> #include "util/perf_regs.h"
> #include "util/parse-regs-options.h"
> +#include "record.h"
>
> static void
> -list_perf_regs(FILE *fp, uint64_t mask, int abi)
> +__list_gp_regs(FILE *fp, uint64_t mask, int abi)
> {
> const char *last_name = NULL;
>
> - fprintf(fp, "available registers: ");
> for (int reg = 0; reg < 64; reg++) {
> const char *name;
>
> @@ -27,14 +27,68 @@ list_perf_regs(FILE *fp, uint64_t mask, int abi)
> fprintf(fp, "%s%s", reg > 0 ? " " : "", name);
> last_name = name;
> }
> +}
> +
> +static void
> +__list_simd_regs(FILE *fp, uint64_t mask, bool intr, bool pred)
> +{
> + uint64_t bitmap = 0;
> + uint16_t qwords = 0;
> + const char *name;
> + int i = 0;
> +
> + for (int reg_c = 0; reg_c < 64; reg_c++) {
> + if (((1ULL << reg_c) & mask) == 0)
> + continue;
> +
> + name = perf_simd_reg_class_name(EM_HOST, reg_c, pred);
> + bitmap = intr ?
> + perf_intr_simd_reg_class_bitmap_qwords(EM_HOST, reg_c, &qwords, pred) :
> + perf_user_simd_reg_class_bitmap_qwords(EM_HOST, reg_c, &qwords, pred);
> + if (name && bitmap)
> + fprintf(fp, "%s%s0-%d", i++ > 0 ? " " : "",
> + name, fls64(bitmap) - 1);
> + }
> +}
> +
> +static void
> +list_perf_regs(FILE *fp, uint64_t mask, uint64_t simd_mask,
> + uint64_t pred_mask, int abi, bool intr)
> +{
> + bool printed = false;
> +
> + fprintf(fp, "available registers: ");
> +
> + if (mask) {
> + __list_gp_regs(fp, mask, abi);
> + printed = true;
> + }
> +
> + if (simd_mask) {
> + if (printed)
> + fprintf(fp, " ");
> + __list_simd_regs(fp, simd_mask, intr, /*pred=*/false);
> + printed = true;
> + }
> +
> + if (pred_mask) {
> + if (printed)
> + fprintf(fp, " ");
> + __list_simd_regs(fp, pred_mask, intr, /*pred=*/true);
> + printed = true;
> + }
> +
> fputc('\n', fp);
> }
>
> static uint64_t
> -name_to_perf_reg_mask(const char *to_match, uint64_t mask, int abi)
> +name_to_gp_reg_mask(const char *to_match, uint64_t mask, int abi)
> {
> uint64_t reg_mask = 0;
>
> + if (!mask)
> + return reg_mask;
> +
> for (int reg = 0; reg < 64; reg++) {
> const char *name;
>
> @@ -51,13 +105,78 @@ name_to_perf_reg_mask(const char *to_match, uint64_t mask, int abi)
> return reg_mask;
> }
>
> +static bool
> +name_to_simd_reg_mask(struct record_opts *opts, const char *to_match,
> + uint64_t mask, bool intr, bool pred)
> +{
> + bool matched = false;
> + uint64_t bitmap;
> + uint16_t qwords;
> + int reg_c;
> +
> + if (!mask)
> + return false;
> +
> + for (reg_c = 0; reg_c < 64; reg_c++) {
> + const char *name;
> +
> + if (((1ULL << reg_c) & mask) == 0)
> + continue;
> +
> + name = perf_simd_reg_class_name(EM_HOST, reg_c, pred);
> + if (!name)
> + continue;
> +
> + if (!strcasecmp(to_match, name)) {
> + matched = true;
> + break;
> + }
> + }
> +
> + if (!matched)
> + return false;
> +
> + if (intr) {
> + bitmap = perf_intr_simd_reg_class_bitmap_qwords(EM_HOST,
> + reg_c, &qwords, pred);
> + } else {
> + bitmap = perf_user_simd_reg_class_bitmap_qwords(EM_HOST,
> + reg_c, &qwords, pred);
> + }
> +
> + /* Just need the highest qwords */
> + if (pred) {
> + if (qwords >= opts->sample_pred_reg_qwords) {
> + opts->sample_pred_reg_qwords = qwords;
> + if (intr)
> + opts->sample_intr_pred_regs = bitmap;
> + else
> + opts->sample_user_pred_regs = bitmap;
> + }
> + } else {
> + if (qwords >= opts->sample_vec_reg_qwords) {
> + opts->sample_vec_reg_qwords = qwords;
> + if (intr)
> + opts->sample_intr_vec_regs = bitmap;
> + else
> + opts->sample_user_vec_regs = bitmap;
> + }
> + }
> +
> + return true;
> +}
> +
> static int
> __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
> {
> uint64_t *mode = (uint64_t *)opt->value;
> + struct record_opts *opts;
> char *s, *os = NULL, *p;
> - int ret = -1;
> + uint64_t simd_mask;
> + uint64_t pred_mask;
> uint64_t mask;
> + bool matched;
> + int ret = -1;
> int abi;
>
> if (unset)
> @@ -69,11 +188,16 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
> if (*mode)
> return -1;
>
> - mask = intr ? perf_intr_reg_mask(EM_HOST, &abi) : perf_user_reg_mask(EM_HOST, &abi);
> + mask = intr ? perf_intr_reg_mask(EM_HOST, &abi) :
> + perf_user_reg_mask(EM_HOST, &abi);
> + opts = intr ? container_of(opt->value, struct record_opts, sample_intr_regs) :
> + container_of(opt->value, struct record_opts, sample_user_regs);
>
> /* str may be NULL in case no arg is passed to -I */
> if (!str) {
> *mode = mask;
> + if (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> + opts->sample_pred_reg_qwords = 1;
> return 0;
> }
>
> @@ -82,6 +206,14 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
> if (!s)
> return -1;
>
> + if (intr) {
> + simd_mask = perf_intr_simd_reg_class_mask(EM_HOST, /*pred=*/false);
> + pred_mask = perf_intr_simd_reg_class_mask(EM_HOST, /*pred=*/true);
> + } else {
> + simd_mask = perf_user_simd_reg_class_mask(EM_HOST, /*pred=*/false);
> + pred_mask = perf_user_simd_reg_class_mask(EM_HOST, /*pred=*/true);
> + }
> +
> for (;;) {
> uint64_t reg_mask;
>
> @@ -90,15 +222,24 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
> *p = '\0';
>
> if (!strcmp(s, "?")) {
> - list_perf_regs(stderr, mask, abi);
> + list_perf_regs(stderr, mask, simd_mask, pred_mask, abi, intr);
> goto error;
> }
>
> - reg_mask = name_to_perf_reg_mask(s, mask, abi);
> - if (reg_mask == 0) {
> - ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
> + reg_mask = name_to_gp_reg_mask(s, mask, abi);
> + if (reg_mask) {
> + if (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> + opts->sample_pred_reg_qwords = 1;
> + } else {
> + matched = name_to_simd_reg_mask(opts, s, simd_mask,
> + intr, /*pred=*/false) ||
> + name_to_simd_reg_mask(opts, s, pred_mask,
> + intr, /*pred=*/true);
> + if (!matched) {
> + ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
> s, intr ? "-I" : "--user-regs=");
> - goto error;
> + goto error;
> + }
> }
> *mode |= reg_mask;
>
> diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
> index 3e9241a11a95..867059fc3cb0 100644
> --- a/tools/perf/util/perf-regs-arch/perf_regs_x86.c
> +++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
> @@ -461,3 +461,295 @@ uint64_t __perf_reg_sp_x86(void)
> {
> return PERF_REG_X86_SP;
> }
> +
> +enum {
> + PERF_REG_CLASS_X86_OPMASK = 0,
> + PERF_REG_CLASS_X86_XMM,
> + PERF_REG_CLASS_X86_YMM,
> + PERF_REG_CLASS_X86_ZMM,
> + PERF_REG_X86_MAX_SIMD_CLASSES,
> +};
> +
> +#define PERF_REG_CLASS_X86_PRED_MASK (BIT(PERF_REG_CLASS_X86_OPMASK))
> +#define PERF_REG_CLASS_X86_SIMD_MASK (BIT(PERF_REG_CLASS_X86_XMM) | \
> + BIT(PERF_REG_CLASS_X86_YMM) | \
> + BIT(PERF_REG_CLASS_X86_ZMM))
> +
> +/*
> + * This function is used to determin whether kernel perf subsystem supports
> + * which kinds of SIMD registers (OPMASK/XMM/YMM/ZMM) sampling.
> + *
> + * @sample_type: PERF_SAMPLE_REGS_INTR or PERF_SAMPLE_REGS_USER
> + * @qwords: the length of SIMD register, like 1/2/4/8 qwords for
> + * OPMASK/XMM/YMM/ZMM regisers.
> + * @mask: the bitamsk of SIMD register, like 0xffff for XMM0 ~ XMM15
> + * @pred: whether It's a preceding SIMD register, like OPMASK register.
> + *
> + * Return value: true indicates support, otherwise no support.
> + */
> +static bool
> +__support_simd_reg_class(uint64_t sample_type, uint16_t qwords,
> + uint64_t mask, bool pred)
> +{
> + struct perf_event_attr attr = {
> + .type = PERF_TYPE_HARDWARE,
> + .config = PERF_COUNT_HW_CPU_CYCLES,
> + .sample_type = sample_type,
> + .disabled = 1,
> + .exclude_kernel = 1,
> + .sample_simd_regs_enabled = 1,
> + };
> + int fd;
> +
> + attr.sample_period = 1;
> +
> + if (!pred) {
> + attr.sample_simd_vec_reg_qwords = qwords;
> + if (sample_type == PERF_SAMPLE_REGS_INTR)
> + attr.sample_simd_vec_reg_intr = mask;
> + else
> + attr.sample_simd_vec_reg_user = mask;
> + } else {
> + attr.sample_simd_pred_reg_qwords = PERF_X86_OPMASK_QWORDS;
> + if (sample_type == PERF_SAMPLE_REGS_INTR)
> + attr.sample_simd_pred_reg_intr = PERF_X86_SIMD_PRED_MASK;
> + else
> + attr.sample_simd_pred_reg_user = PERF_X86_SIMD_PRED_MASK;
> + }
> +
> + if (perf_pmus__num_core_pmus() > 1) {
> + __u64 type = perf_pmus__find_core_pmu()->type;
> +
> + attr.config |= type << PERF_PMU_TYPE_SHIFT;
> + }
> +
> + event_attr_init(&attr);
> +
> + fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
> + if (fd != -1) {
> + close(fd);
> + return true;
> + }
> +
> + return false;
> +}
> +
> +#define PERF_X86_SIMD_ZMMH_REGS (PERF_X86_SIMD_ZMM_REGS / 2)
> +
> +static bool __arch_has_simd_reg_class(uint64_t sample_type, int reg_class,
> + uint64_t *mask, uint16_t *qwords)
> +{
> + bool supported = false;
> + uint64_t bits;
> +
> + *mask = 0;
> + *qwords = 0;
> +
> + switch (reg_class) {
> + case PERF_REG_CLASS_X86_OPMASK:
> + bits = BIT_ULL(PERF_X86_SIMD_OPMASK_REGS) - 1;
> + supported = __support_simd_reg_class(sample_type,
> + PERF_X86_OPMASK_QWORDS,
> + bits, true);
> + if (supported) {
> + *mask = bits;
> + *qwords = PERF_X86_OPMASK_QWORDS;
> + }
> + break;
> + case PERF_REG_CLASS_X86_XMM:
> + bits = BIT_ULL(PERF_X86_SIMD_XMM_REGS) - 1;
> + supported = __support_simd_reg_class(sample_type,
> + PERF_X86_XMM_QWORDS,
> + bits, false);
> + if (supported) {
> + *mask = bits;
> + *qwords = PERF_X86_XMM_QWORDS;
> + }
> + break;
> + case PERF_REG_CLASS_X86_YMM:
> + bits = BIT_ULL(PERF_X86_SIMD_YMM_REGS) - 1;
> + supported = __support_simd_reg_class(sample_type,
> + PERF_X86_YMM_QWORDS,
> + bits, false);
> + if (supported) {
> + *mask = bits;
> + *qwords = PERF_X86_YMM_QWORDS;
> + }
> + break;
> + case PERF_REG_CLASS_X86_ZMM:
> + bits = BIT_ULL(PERF_X86_SIMD_ZMM_REGS) - 1;
> + supported = __support_simd_reg_class(sample_type,
> + PERF_X86_ZMM_QWORDS,
> + bits, false);
> + if (supported) {
> + *mask = bits;
> + *qwords = PERF_X86_ZMM_QWORDS;
> + break;
> + }
> +
> + bits = BIT_ULL(PERF_X86_SIMD_ZMMH_REGS) - 1;
> + supported = __support_simd_reg_class(sample_type,
> + PERF_X86_ZMM_QWORDS,
> + bits, false);
> + if (supported) {
> + *mask = bits;
> + *qwords = PERF_X86_ZMM_QWORDS;
> + }
> + break;
> + default:
> + break;
> + }
> +
> + return supported;
> +}
> +
> +static bool __support_simd_sampling(void)
> +{
> + uint64_t mask = BIT_ULL(PERF_X86_SIMD_XMM_REGS) - 1;
> + uint16_t qwords = PERF_X86_XMM_QWORDS;
> + static bool simd_sampling_supported;
> + static bool cached;
> +
> + if (cached)
> + return simd_sampling_supported;
> +
> + simd_sampling_supported =
> + __arch_has_simd_reg_class(PERF_SAMPLE_REGS_INTR,
> + PERF_REG_CLASS_X86_XMM,
> + &mask, &qwords);
> + simd_sampling_supported |=
> + __arch_has_simd_reg_class(PERF_SAMPLE_REGS_USER,
> + PERF_REG_CLASS_X86_XMM,
> + &mask, &qwords);
> + cached = true;
> +
> + return simd_sampling_supported;
> +}
> +
> +/*
> + * @x86_intr_simd_cached: indicates the data of below 3
> + * x86_intr_simd_* items has been retrieved from kernel and cached.
> + * @x86_intr_simd_reg_class_mask: indicates which kinds of PRED/SIMD
> + * registers are supported for intr-regs option. Assume kernel perf
> + * subsystem supports XMM/YMM sampling, then the mask is
> + * PERF_REG_CLASS_X86_XMM|PERF_REG_CLASS_X86_YMM.
> + * @x86_intr_simd_mask: indicates register bitmask for each kind of
> + * supported PRED/SIMD register, like
> + * x86_intr_simd_mask[PERF_REG_CLASS_X86_XMM] = 0xffff.
> + * @x86_intr_simd_mask: indicates the register length (qwords uinit)
> + * for each kind of supported PRED/SIMD register, like
> + * x86_intr_simd_qwords[PERF_REG_CLASS_X86_XMM] = 2.
> + */
> +static bool x86_intr_simd_cached;
> +static uint64_t x86_intr_simd_reg_class_mask;
> +static uint64_t x86_intr_simd_mask[PERF_REG_X86_MAX_SIMD_CLASSES];
> +static uint16_t x86_intr_simd_qwords[PERF_REG_X86_MAX_SIMD_CLASSES];
> +
> +/*
> + * Similar with above x86_intr_simd_* items, the difference is these
> + * items are used for user-regs option.
> + */
> +static bool x86_user_simd_cached;
> +static uint64_t x86_user_simd_reg_class_mask;
> +static uint64_t x86_user_simd_mask[PERF_REG_X86_MAX_SIMD_CLASSES];
> +static uint16_t x86_user_simd_qwords[PERF_REG_X86_MAX_SIMD_CLASSES];
> +
> +static uint64_t __arch__simd_reg_class_mask(bool intr)
> +{
> + uint64_t mask = 0;
> + bool supported;
> + int reg_c;
> +
> + if (!__support_simd_sampling())
> + return 0;
> +
> + if (intr && x86_intr_simd_cached)
> + return x86_intr_simd_reg_class_mask;
> +
> + if (!intr && x86_user_simd_cached)
> + return x86_user_simd_reg_class_mask;
> +
> + for (reg_c = 0; reg_c < PERF_REG_X86_MAX_SIMD_CLASSES; reg_c++) {
> + supported = false;
> +
> + if (intr) {
> + supported = __arch_has_simd_reg_class(
> + PERF_SAMPLE_REGS_INTR,
> + reg_c,
> + &x86_intr_simd_mask[reg_c],
> + &x86_intr_simd_qwords[reg_c]);
> + } else {
> + supported = __arch_has_simd_reg_class(
> + PERF_SAMPLE_REGS_USER,
> + reg_c,
> + &x86_user_simd_mask[reg_c],
> + &x86_user_simd_qwords[reg_c]);
> + }
> + if (supported)
> + mask |= BIT_ULL(reg_c);
> + }
> +
> + if (intr) {
> + x86_intr_simd_reg_class_mask = mask;
> + x86_intr_simd_cached = true;
> + } else {
> + x86_user_simd_reg_class_mask = mask;
> + x86_user_simd_cached = true;
> + }
> +
> + return mask;
> +}
> +
> +static uint64_t
> +__arch__simd_reg_class_bitmap_qwords(bool intr, int reg_c, uint16_t *qwords)
> +{
> + uint64_t mask = 0;
> +
> + *qwords = 0;
> + if (reg_c >= PERF_REG_X86_MAX_SIMD_CLASSES)
> + return mask;
> +
> + if (intr) {
> + mask = x86_intr_simd_mask[reg_c];
> + *qwords = x86_intr_simd_qwords[reg_c];
> + } else {
> + mask = x86_user_simd_mask[reg_c];
> + *qwords = x86_user_simd_qwords[reg_c];
> + }
> +
> + return mask;
> +}
> +
> +uint64_t __perf_simd_reg_class_mask_x86(bool intr, bool pred)
> +{
> + uint64_t mask = __arch__simd_reg_class_mask(intr);
> +
> + return pred ? mask & PERF_REG_CLASS_X86_PRED_MASK :
> + mask & PERF_REG_CLASS_X86_SIMD_MASK;
> +}
> +
> +uint64_t __perf_simd_reg_class_bitmap_qwords_x86(int reg_c, uint16_t *qwords,
> + bool intr, bool pred)
> +{
> + if (!x86_intr_simd_cached)
> + __perf_simd_reg_class_mask_x86(intr, pred);
> + return __arch__simd_reg_class_bitmap_qwords(intr, reg_c, qwords);
> +}
> +
> +const char *__perf_simd_reg_class_name_x86(int id, bool pred __maybe_unused)
> +{
> + switch (id) {
> + case PERF_REG_CLASS_X86_OPMASK:
> + return "OPMASK";
> + case PERF_REG_CLASS_X86_XMM:
> + return "XMM";
> + case PERF_REG_CLASS_X86_YMM:
> + return "YMM";
> + case PERF_REG_CLASS_X86_ZMM:
> + return "ZMM";
> + default:
> + return NULL;
> + }
> +
> + return NULL;
> +}
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> index 741c3d657a8b..c6b8e53e06fd 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -362,6 +362,12 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
> PRINT_ATTRf(aux_start_paused, p_unsigned);
> PRINT_ATTRf(aux_pause, p_unsigned);
> PRINT_ATTRf(aux_resume, p_unsigned);
> + PRINT_ATTRf(sample_simd_pred_reg_qwords, p_unsigned);
> + PRINT_ATTRf(sample_simd_pred_reg_intr, p_hex);
> + PRINT_ATTRf(sample_simd_pred_reg_user, p_hex);
> + PRINT_ATTRf(sample_simd_vec_reg_qwords, p_unsigned);
> + PRINT_ATTRf(sample_simd_vec_reg_intr, p_hex);
> + PRINT_ATTRf(sample_simd_vec_reg_user, p_hex);
>
> return ret;
> }
> diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
> index bdd2eef13bc3..0ad40421f34e 100644
> --- a/tools/perf/util/perf_regs.c
> +++ b/tools/perf/util/perf_regs.c
> @@ -248,3 +248,75 @@ uint64_t perf_arch_reg_sp(uint16_t e_machine)
> return 0;
> }
> }
> +
> +uint64_t perf_intr_simd_reg_class_mask(uint16_t e_machine, bool pred)
> +{
> + switch (e_machine) {
> + case EM_386:
> + case EM_X86_64:
> + return __perf_simd_reg_class_mask_x86(/*intr=*/true, pred);
> + default:
> + return 0;
> + }
> +}
> +
> +uint64_t perf_user_simd_reg_class_mask(uint16_t e_machine, bool pred)
> +{
> + switch (e_machine) {
> + case EM_386:
> + case EM_X86_64:
> + return __perf_simd_reg_class_mask_x86(/*intr=*/false, pred);
> + default:
> + return 0;
> + }
> +}
> +
> +uint64_t perf_intr_simd_reg_class_bitmap_qwords(uint16_t e_machine, int reg_c,
> + uint16_t *qwords, bool pred)
> +{
> + switch (e_machine) {
> + case EM_386:
> + case EM_X86_64:
> + return __perf_simd_reg_class_bitmap_qwords_x86(reg_c, qwords,
> + /*intr=*/true,
> + pred);
> + default:
> + *qwords = 0;
> + return 0;
> + }
> +}
> +
> +uint64_t perf_user_simd_reg_class_bitmap_qwords(uint16_t e_machine, int reg_c,
> + uint16_t *qwords, bool pred)
> +{
> + switch (e_machine) {
> + case EM_386:
> + case EM_X86_64:
> + return __perf_simd_reg_class_bitmap_qwords_x86(reg_c, qwords,
> + /*intr=*/false,
> + pred);
> + default:
> + *qwords = 0;
> + return 0;
> + }
> +}
> +
> +const char *perf_simd_reg_class_name(uint16_t e_machine, int id, bool pred)
> +{
> + const char *name = NULL;
> +
> + switch (e_machine) {
> + case EM_386:
> + case EM_X86_64:
> + name = __perf_simd_reg_class_name_x86(id, pred);
> + break;
> + default:
> + break;
> + }
> + if (name)
> + return name;
> +
> + pr_debug("Failed to find %s register %d for ELF machine type %u\n",
> + pred ? "PRED" : "SIMD", id, e_machine);
> + return "unknown";
> +}
> diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
> index c9501ca8045d..80d1d7316188 100644
> --- a/tools/perf/util/perf_regs.h
> +++ b/tools/perf/util/perf_regs.h
> @@ -20,6 +20,13 @@ const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags, int abi)
> int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
> uint64_t perf_arch_reg_ip(uint16_t e_machine);
> uint64_t perf_arch_reg_sp(uint16_t e_machine);
> +uint64_t perf_intr_simd_reg_class_mask(uint16_t e_machine, bool pred);
> +uint64_t perf_user_simd_reg_class_mask(uint16_t e_machine, bool pred);
> +uint64_t perf_intr_simd_reg_class_bitmap_qwords(uint16_t e_machine, int reg_c,
> + uint16_t *qwords, bool pred);
> +uint64_t perf_user_simd_reg_class_bitmap_qwords(uint16_t e_machine, int reg_c,
> + uint16_t *qwords, bool pred);
> +const char *perf_simd_reg_class_name(uint16_t e_machine, int id, bool pred);
>
> int __perf_sdt_arg_parse_op_arm64(char *old_op, char **new_op);
> uint64_t __perf_reg_mask_arm64(bool intr);
> @@ -68,6 +75,10 @@ uint64_t __perf_reg_mask_x86(bool intr, int *abi);
> const char *__perf_reg_name_x86(int id, int abi);
> uint64_t __perf_reg_ip_x86(void);
> uint64_t __perf_reg_sp_x86(void);
> +uint64_t __perf_simd_reg_class_mask_x86(bool intr, bool pred);
> +uint64_t __perf_simd_reg_class_bitmap_qwords_x86(int reg_c, uint16_t *qwords,
> + bool intr, bool pred);
> +const char *__perf_simd_reg_class_name_x86(int id, bool pred);
>
> static inline uint64_t DWARF_MINIMAL_REGS(uint16_t e_machine)
> {
> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> index 93627c9a7338..37ed44b5f15b 100644
> --- a/tools/perf/util/record.h
> +++ b/tools/perf/util/record.h
> @@ -62,6 +62,12 @@ struct record_opts {
> u64 branch_stack;
> u64 sample_intr_regs;
> u64 sample_user_regs;
> + u64 sample_intr_vec_regs;
> + u64 sample_user_vec_regs;
> + u32 sample_intr_pred_regs;
> + u32 sample_user_pred_regs;
> + u16 sample_vec_reg_qwords;
> + u16 sample_pred_reg_qwords;
> u64 default_interval;
> u64 user_interval;
> size_t auxtrace_snapshot_size;
> --
> 2.34.1
>
Powered by blists - more mailing lists