lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fV+NzS0-dd3a5aaKWCbPQfu2h0isvCWc_R65QgV=9mKOw@mail.gmail.com>
Date: Mon, 9 Feb 2026 14:36:10 -0800
From: Ian Rogers <irogers@...gle.com>
To: Dapeng Mi <dapeng1.mi@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Adrian Hunter <adrian.hunter@...el.com>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, linux-perf-users@...r.kernel.org, 
	linux-kernel@...r.kernel.org, Zide Chen <zide.chen@...el.com>, 
	Falcon Thomas <thomas.falcon@...el.com>, Dapeng Mi <dapeng1.mi@...el.com>, 
	Xudong Hao <xudong.hao@...el.com>
Subject: Re: [Patch v6 2/4] perf regs: Support x86 eGPRs/SSP sampling

On Mon, Feb 9, 2026 at 12:39 AM Dapeng Mi <dapeng1.mi@...ux.intel.com> wrote:
>
> This patch adds support for sampling x86 extended GP registers (R16-R31)
> and the shadow stack pointer (SSP) register.
>
> The original XMM registers space in sample_regs_user/sample_regs_intr is
> reclaimed to represent the eGPRs and SSP when SIMD registers sampling is
> supported with the new SIMD sampling fields in the perf_event_attr
> structure. This necessitates a way to distinguish which register layout
> is used for the sample_regs_user/sample_regs_intr bitmap.
>
> To address this, a new "abi" argument is added to the helpers
> perf_intr_reg_mask(), perf_user_reg_mask(), and perf_reg_name(). When
> "abi & PERF_SAMPLE_REGS_ABI_SIMD" is true, it indicates the eGPRs and SSP
> layout is represented; otherwise, the legacy XMM registers are
> represented.
>
> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
> ---
>  tools/perf/builtin-script.c                   |   2 +-
>  tools/perf/util/evsel.c                       |   6 +-
>  tools/perf/util/parse-regs-options.c          |  17 ++-
>  .../perf/util/perf-regs-arch/perf_regs_x86.c  | 120 +++++++++++++++---
>  tools/perf/util/perf_regs.c                   |  14 +-
>  tools/perf/util/perf_regs.h                   |  10 +-
>  .../scripting-engines/trace-event-python.c    |   2 +-
>  tools/perf/util/session.c                     |   9 +-
>  8 files changed, 139 insertions(+), 41 deletions(-)
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 14c6f6c3c4f2..ffe51f895666 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -730,7 +730,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
>         for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
>                 u64 val = regs->regs[i++];
>                 printed += fprintf(fp, "%5s:0x%"PRIx64" ",
> -                                  perf_reg_name(r, e_machine, e_flags),
> +                                  perf_reg_name(r, e_machine, e_flags, regs->abi),

It is tempting for clarity to add the ABI to perf_reg_name as the first patch.

>                                    val);
>         }
>
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index f59228c1a39e..b7fb3f936ae3 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1049,19 +1049,21 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
>         }
>
>         if (param->record_mode == CALLCHAIN_DWARF) {
> +               int abi;
> +
>                 if (!function) {
>                         uint16_t e_machine = evsel__e_machine(evsel, /*e_flags=*/NULL);
>
>                         evsel__set_sample_bit(evsel, REGS_USER);
>                         evsel__set_sample_bit(evsel, STACK_USER);
>                         if (opts->sample_user_regs &&
> -                           DWARF_MINIMAL_REGS(e_machine) != perf_user_reg_mask(EM_HOST)) {
> +                           DWARF_MINIMAL_REGS(e_machine) != perf_user_reg_mask(EM_HOST, &abi)) {
>                                 attr->sample_regs_user |= DWARF_MINIMAL_REGS(e_machine);
>                                 pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
>                                            "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
>                                            "so the minimal registers set (IP, SP) is explicitly forced.\n");
>                         } else {
> -                               attr->sample_regs_user |= perf_user_reg_mask(EM_HOST);
> +                               attr->sample_regs_user |= perf_user_reg_mask(EM_HOST, &abi);
>                         }
>                         attr->sample_stack_user = param->dump_size;
>                         attr->exclude_callchain_user = 1;
> diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
> index c93c2f0c8105..518327883b18 100644
> --- a/tools/perf/util/parse-regs-options.c
> +++ b/tools/perf/util/parse-regs-options.c
> @@ -10,7 +10,8 @@
>  #include "util/perf_regs.h"
>  #include "util/parse-regs-options.h"
>
> -static void list_perf_regs(FILE *fp, uint64_t mask)
> +static void
> +list_perf_regs(FILE *fp, uint64_t mask, int abi)
>  {
>         const char *last_name = NULL;
>
> @@ -21,7 +22,7 @@ static void list_perf_regs(FILE *fp, uint64_t mask)
>                 if (((1ULL << reg) & mask) == 0)
>                         continue;
>
> -               name = perf_reg_name(reg, EM_HOST, EF_HOST);
> +               name = perf_reg_name(reg, EM_HOST, EF_HOST, abi);
>                 if (name && (!last_name || strcmp(last_name, name)))
>                         fprintf(fp, "%s%s", reg > 0 ? " " : "", name);
>                 last_name = name;
> @@ -29,7 +30,8 @@ static void list_perf_regs(FILE *fp, uint64_t mask)
>         fputc('\n', fp);
>  }
>
> -static uint64_t name_to_perf_reg_mask(const char *to_match, uint64_t mask)
> +static uint64_t
> +name_to_perf_reg_mask(const char *to_match, uint64_t mask, int abi)
>  {
>         uint64_t reg_mask = 0;
>
> @@ -39,7 +41,7 @@ static uint64_t name_to_perf_reg_mask(const char *to_match, uint64_t mask)
>                 if (((1ULL << reg) & mask) == 0)
>                         continue;
>
> -               name = perf_reg_name(reg, EM_HOST, EF_HOST);
> +               name = perf_reg_name(reg, EM_HOST, EF_HOST, abi);
>                 if (!name)
>                         continue;
>
> @@ -56,6 +58,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>         char *s, *os = NULL, *p;
>         int ret = -1;
>         uint64_t mask;
> +       int abi;
>
>         if (unset)
>                 return 0;
> @@ -66,7 +69,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>         if (*mode)
>                 return -1;
>
> -       mask = intr ? perf_intr_reg_mask(EM_HOST) : perf_user_reg_mask(EM_HOST);
> +       mask = intr ? perf_intr_reg_mask(EM_HOST, &abi) : perf_user_reg_mask(EM_HOST, &abi);
>
>         /* str may be NULL in case no arg is passed to -I */
>         if (!str) {
> @@ -87,11 +90,11 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>                         *p = '\0';
>
>                 if (!strcmp(s, "?")) {
> -                       list_perf_regs(stderr, mask);
> +                       list_perf_regs(stderr, mask, abi);
>                         goto error;
>                 }
>
> -               reg_mask = name_to_perf_reg_mask(s, mask);
> +               reg_mask = name_to_perf_reg_mask(s, mask, abi);
>                 if (reg_mask == 0) {
>                         ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
>                                 s, intr ? "-I" : "--user-regs=");
> diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
> index b6d20522b4e8..3e9241a11a95 100644
> --- a/tools/perf/util/perf-regs-arch/perf_regs_x86.c
> +++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
> @@ -235,26 +235,26 @@ int __perf_sdt_arg_parse_op_x86(char *old_op, char **new_op)
>         return SDT_ARG_VALID;
>  }
>
> -uint64_t __perf_reg_mask_x86(bool intr)
> +static uint64_t __arch__reg_mask(u64 sample_type, u64 mask, bool has_simd_regs)
>  {
>         struct perf_event_attr attr = {
> -               .type                   = PERF_TYPE_HARDWARE,
> -               .config                 = PERF_COUNT_HW_CPU_CYCLES,
> -               .sample_type            = PERF_SAMPLE_REGS_INTR,
> -               .sample_regs_intr       = PERF_REG_EXTENDED_MASK,
> -               .precise_ip             = 1,
> -               .disabled               = 1,
> -               .exclude_kernel         = 1,
> +               .type                           = PERF_TYPE_HARDWARE,
> +               .config                         = PERF_COUNT_HW_CPU_CYCLES,
> +               .sample_type                    = sample_type,
> +               .precise_ip                     = 1,
> +               .disabled                       = 1,
> +               .exclude_kernel                 = 1,
> +               .sample_simd_regs_enabled       = has_simd_regs,
>         };
>         int fd;
> -
> -       if (!intr)
> -               return PERF_REGS_MASK;
> -
>         /*
>          * In an unnamed union, init it here to build on older gcc versions
>          */
>         attr.sample_period = 1;
> +       if (sample_type == PERF_SAMPLE_REGS_INTR)
> +               attr.sample_regs_intr = mask;
> +       else
> +               attr.sample_regs_user = mask;
>
>         if (perf_pmus__num_core_pmus() > 1) {
>                 struct perf_pmu *pmu = NULL;
> @@ -276,13 +276,34 @@ uint64_t __perf_reg_mask_x86(bool intr)
>                                  /*group_fd=*/-1, /*flags=*/0);
>         if (fd != -1) {
>                 close(fd);
> -               return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
> +               return mask;
> +       }
> +
> +       return 0;
> +}
> +
> +uint64_t __perf_reg_mask_x86(bool intr, int *abi)
> +{
> +       u64 sample_type = intr ? PERF_SAMPLE_REGS_INTR : PERF_SAMPLE_REGS_USER;
> +       uint64_t mask = PERF_REGS_MASK;
> +
> +       *abi = 0;
> +       mask |= __arch__reg_mask(sample_type,
> +                                GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16),
> +                                true);
> +       mask |= __arch__reg_mask(sample_type, BIT_ULL(PERF_REG_X86_SSP), true);
> +
> +       if (mask != PERF_REGS_MASK) {
> +               *abi |= PERF_SAMPLE_REGS_ABI_SIMD;
> +       } else {
> +               mask |= __arch__reg_mask(sample_type, PERF_REG_EXTENDED_MASK,
> +                                        false);
>         }
>
> -       return PERF_REGS_MASK;
> +       return mask;
>  }
>
> -const char *__perf_reg_name_x86(int id)
> +static const char *__arch_reg_gpr_name(int id)
>  {
>         switch (id) {
>         case PERF_REG_X86_AX:
> @@ -333,7 +354,60 @@ const char *__perf_reg_name_x86(int id)
>                 return "R14";
>         case PERF_REG_X86_R15:
>                 return "R15";
> +       default:
> +               return NULL;
> +       }
> +
> +       return NULL;
> +}
>
> +static const char *__arch_reg_egpr_name(int id)
> +{
> +       switch (id) {
> +       case PERF_REG_X86_R16:
> +               return "R16";
> +       case PERF_REG_X86_R17:
> +               return "R17";
> +       case PERF_REG_X86_R18:
> +               return "R18";
> +       case PERF_REG_X86_R19:
> +               return "R19";
> +       case PERF_REG_X86_R20:
> +               return "R20";
> +       case PERF_REG_X86_R21:
> +               return "R21";
> +       case PERF_REG_X86_R22:
> +               return "R22";
> +       case PERF_REG_X86_R23:
> +               return "R23";
> +       case PERF_REG_X86_R24:
> +               return "R24";
> +       case PERF_REG_X86_R25:
> +               return "R25";
> +       case PERF_REG_X86_R26:
> +               return "R26";
> +       case PERF_REG_X86_R27:
> +               return "R27";
> +       case PERF_REG_X86_R28:
> +               return "R28";
> +       case PERF_REG_X86_R29:
> +               return "R29";
> +       case PERF_REG_X86_R30:
> +               return "R30";
> +       case PERF_REG_X86_R31:
> +               return "R31";
> +       case PERF_REG_X86_SSP:
> +               return "SSP";
> +       default:
> +               return NULL;
> +       }
> +
> +       return NULL;
> +}
> +
> +static const char *__arch_reg_xmm_name(int id)
> +{
> +       switch (id) {
>  #define XMM(x) \
>         case PERF_REG_X86_XMM ## x:     \
>         case PERF_REG_X86_XMM ## x + 1: \
> @@ -362,6 +436,22 @@ const char *__perf_reg_name_x86(int id)
>         return NULL;
>  }
>
> +const char *__perf_reg_name_x86(int id, int abi)
> +{
> +       const char *name;
> +
> +       name = __arch_reg_gpr_name(id);
> +       if (name)
> +               return name;
> +
> +       if (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> +               name = __arch_reg_egpr_name(id);
> +       else
> +               name = __arch_reg_xmm_name(id);
> +
> +       return name;
> +}
> +
>  uint64_t __perf_reg_ip_x86(void)
>  {
>         return PERF_REG_X86_IP;
> diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
> index 5b8f34beb24e..bdd2eef13bc3 100644
> --- a/tools/perf/util/perf_regs.c
> +++ b/tools/perf/util/perf_regs.c
> @@ -32,10 +32,11 @@ int perf_sdt_arg_parse_op(uint16_t e_machine, char *old_op, char **new_op)
>         return ret;
>  }
>
> -uint64_t perf_intr_reg_mask(uint16_t e_machine)
> +uint64_t perf_intr_reg_mask(uint16_t e_machine, int *abi)

I wonder if abi is the right out argument name here. Before the SIMD
change the ABI meant either 32 or 64-bit. So we could imagine if it
were 32-bit then registers R8 to R15 wouldn't be in the mask for x86.
Perhaps just a "bool *" for sample_simd_regs_enabled.

Everything else looks good. Thanks for the weak function clean up,
this code is much more generic and better than before. I know it
wasn't trivial to do, but I appreciate it!

Thanks,
Ian

>  {
>         uint64_t mask = 0;
>
> +       *abi = 0;
>         switch (e_machine) {
>         case EM_ARM:
>                 mask = __perf_reg_mask_arm(/*intr=*/true);
> @@ -64,7 +65,7 @@ uint64_t perf_intr_reg_mask(uint16_t e_machine)
>                 break;
>         case EM_386:
>         case EM_X86_64:
> -               mask = __perf_reg_mask_x86(/*intr=*/true);
> +               mask = __perf_reg_mask_x86(/*intr=*/true, abi);
>                 break;
>         default:
>                 pr_debug("Unknown ELF machine %d, interrupt sampling register mask will be empty.\n",
> @@ -75,10 +76,11 @@ uint64_t perf_intr_reg_mask(uint16_t e_machine)
>         return mask;
>  }
>
> -uint64_t perf_user_reg_mask(uint16_t e_machine)
> +uint64_t perf_user_reg_mask(uint16_t e_machine, int *abi)
>  {
>         uint64_t mask = 0;
>
> +       *abi = 0;
>         switch (e_machine) {
>         case EM_ARM:
>                 mask = __perf_reg_mask_arm(/*intr=*/false);
> @@ -107,7 +109,7 @@ uint64_t perf_user_reg_mask(uint16_t e_machine)
>                 break;
>         case EM_386:
>         case EM_X86_64:
> -               mask = __perf_reg_mask_x86(/*intr=*/false);
> +               mask = __perf_reg_mask_x86(/*intr=*/false, abi);
>                 break;
>         default:
>                 pr_debug("Unknown ELF machine %d, user sampling register mask will be empty.\n",
> @@ -118,7 +120,7 @@ uint64_t perf_user_reg_mask(uint16_t e_machine)
>         return mask;
>  }
>
> -const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags)
> +const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags, int abi)
>  {
>         const char *reg_name = NULL;
>
> @@ -150,7 +152,7 @@ const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags)
>                 break;
>         case EM_386:
>         case EM_X86_64:
> -               reg_name = __perf_reg_name_x86(id);
> +               reg_name = __perf_reg_name_x86(id, abi);
>                 break;
>         default:
>                 break;
> diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
> index 7c04700bf837..c9501ca8045d 100644
> --- a/tools/perf/util/perf_regs.h
> +++ b/tools/perf/util/perf_regs.h
> @@ -13,10 +13,10 @@ enum {
>  };
>
>  int perf_sdt_arg_parse_op(uint16_t e_machine, char *old_op, char **new_op);
> -uint64_t perf_intr_reg_mask(uint16_t e_machine);
> -uint64_t perf_user_reg_mask(uint16_t e_machine);
> +uint64_t perf_intr_reg_mask(uint16_t e_machine, int *abi);
> +uint64_t perf_user_reg_mask(uint16_t e_machine, int *abi);
>
> -const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags);
> +const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags, int abi);
>  int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
>  uint64_t perf_arch_reg_ip(uint16_t e_machine);
>  uint64_t perf_arch_reg_sp(uint16_t e_machine);
> @@ -64,8 +64,8 @@ uint64_t __perf_reg_ip_s390(void);
>  uint64_t __perf_reg_sp_s390(void);
>
>  int __perf_sdt_arg_parse_op_x86(char *old_op, char **new_op);
> -uint64_t __perf_reg_mask_x86(bool intr);
> -const char *__perf_reg_name_x86(int id);
> +uint64_t __perf_reg_mask_x86(bool intr, int *abi);
> +const char *__perf_reg_name_x86(int id, int abi);
>  uint64_t __perf_reg_ip_x86(void);
>  uint64_t __perf_reg_sp_x86(void);
>
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index 2b0df7bd9a46..4cc5b96898e6 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -733,7 +733,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, uint16_t e_machine,
>
>                 printed += scnprintf(bf + printed, size - printed,
>                                      "%5s:0x%" PRIx64 " ",
> -                                    perf_reg_name(r, e_machine, e_flags), val);
> +                                    perf_reg_name(r, e_machine, e_flags, regs->abi), val);
>         }
>  }
>
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 4b465abfa36c..7cf7bf86205d 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -959,15 +959,16 @@ static void branch_stack__printf(struct perf_sample *sample,
>         }
>  }
>
> -static void regs_dump__printf(u64 mask, u64 *regs, uint16_t e_machine, uint32_t e_flags)
> +static void regs_dump__printf(u64 mask, struct regs_dump *regs,
> +                             uint16_t e_machine, uint32_t e_flags)
>  {
>         unsigned rid, i = 0;
>
>         for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
> -               u64 val = regs[i++];
> +               u64 val = regs->regs[i++];
>
>                 printf(".... %-5s 0x%016" PRIx64 "\n",
> -                      perf_reg_name(rid, e_machine, e_flags), val);
> +                      perf_reg_name(rid, e_machine, e_flags, regs->abi), val);
>         }
>  }
>
> @@ -995,7 +996,7 @@ static void regs__printf(const char *type, struct regs_dump *regs,
>                mask,
>                regs_dump_abi(regs));
>
> -       regs_dump__printf(mask, regs->regs, e_machine, e_flags);
> +       regs_dump__printf(mask, regs, e_machine, e_flags);
>  }
>
>  static void regs_user__printf(struct perf_sample *sample, uint16_t e_machine, uint32_t e_flags)
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ