[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260209083514.2225115-3-dapeng1.mi@linux.intel.com>
Date: Mon, 9 Feb 2026 16:35:12 +0800
From: Dapeng Mi <dapeng1.mi@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Cc: linux-perf-users@...r.kernel.org,
linux-kernel@...r.kernel.org,
Zide Chen <zide.chen@...el.com>,
Falcon Thomas <thomas.falcon@...el.com>,
Dapeng Mi <dapeng1.mi@...el.com>,
Xudong Hao <xudong.hao@...el.com>,
Dapeng Mi <dapeng1.mi@...ux.intel.com>
Subject: [Patch v6 2/4] perf regs: Support x86 eGPRs/SSP sampling
This patch adds support for sampling x86 extended GP registers (R16-R31)
and the shadow stack pointer (SSP) register.
The original XMM registers space in sample_regs_user/sample_regs_intr is
reclaimed to represent the eGPRs and SSP when SIMD registers sampling is
supported with the new SIMD sampling fields in the perf_event_attr
structure. This necessitates a way to distinguish which register layout
is used for the sample_regs_user/sample_regs_intr bitmap.
To address this, a new "abi" argument is added to the helpers
perf_intr_reg_mask(), perf_user_reg_mask(), and perf_reg_name(). When
"abi & PERF_SAMPLE_REGS_ABI_SIMD" is true, it indicates the eGPRs and SSP
layout is represented; otherwise, the legacy XMM registers are
represented.
Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
---
tools/perf/builtin-script.c | 2 +-
tools/perf/util/evsel.c | 6 +-
tools/perf/util/parse-regs-options.c | 17 ++-
.../perf/util/perf-regs-arch/perf_regs_x86.c | 120 +++++++++++++++---
tools/perf/util/perf_regs.c | 14 +-
tools/perf/util/perf_regs.h | 10 +-
.../scripting-engines/trace-event-python.c | 2 +-
tools/perf/util/session.c | 9 +-
8 files changed, 139 insertions(+), 41 deletions(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 14c6f6c3c4f2..ffe51f895666 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -730,7 +730,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
printed += fprintf(fp, "%5s:0x%"PRIx64" ",
- perf_reg_name(r, e_machine, e_flags),
+ perf_reg_name(r, e_machine, e_flags, regs->abi),
val);
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f59228c1a39e..b7fb3f936ae3 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1049,19 +1049,21 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
}
if (param->record_mode == CALLCHAIN_DWARF) {
+ int abi;
+
if (!function) {
uint16_t e_machine = evsel__e_machine(evsel, /*e_flags=*/NULL);
evsel__set_sample_bit(evsel, REGS_USER);
evsel__set_sample_bit(evsel, STACK_USER);
if (opts->sample_user_regs &&
- DWARF_MINIMAL_REGS(e_machine) != perf_user_reg_mask(EM_HOST)) {
+ DWARF_MINIMAL_REGS(e_machine) != perf_user_reg_mask(EM_HOST, &abi)) {
attr->sample_regs_user |= DWARF_MINIMAL_REGS(e_machine);
pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
"specifying a subset with --user-regs may render DWARF unwinding unreliable, "
"so the minimal registers set (IP, SP) is explicitly forced.\n");
} else {
- attr->sample_regs_user |= perf_user_reg_mask(EM_HOST);
+ attr->sample_regs_user |= perf_user_reg_mask(EM_HOST, &abi);
}
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index c93c2f0c8105..518327883b18 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -10,7 +10,8 @@
#include "util/perf_regs.h"
#include "util/parse-regs-options.h"
-static void list_perf_regs(FILE *fp, uint64_t mask)
+static void
+list_perf_regs(FILE *fp, uint64_t mask, int abi)
{
const char *last_name = NULL;
@@ -21,7 +22,7 @@ static void list_perf_regs(FILE *fp, uint64_t mask)
if (((1ULL << reg) & mask) == 0)
continue;
- name = perf_reg_name(reg, EM_HOST, EF_HOST);
+ name = perf_reg_name(reg, EM_HOST, EF_HOST, abi);
if (name && (!last_name || strcmp(last_name, name)))
fprintf(fp, "%s%s", reg > 0 ? " " : "", name);
last_name = name;
@@ -29,7 +30,8 @@ static void list_perf_regs(FILE *fp, uint64_t mask)
fputc('\n', fp);
}
-static uint64_t name_to_perf_reg_mask(const char *to_match, uint64_t mask)
+static uint64_t
+name_to_perf_reg_mask(const char *to_match, uint64_t mask, int abi)
{
uint64_t reg_mask = 0;
@@ -39,7 +41,7 @@ static uint64_t name_to_perf_reg_mask(const char *to_match, uint64_t mask)
if (((1ULL << reg) & mask) == 0)
continue;
- name = perf_reg_name(reg, EM_HOST, EF_HOST);
+ name = perf_reg_name(reg, EM_HOST, EF_HOST, abi);
if (!name)
continue;
@@ -56,6 +58,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
char *s, *os = NULL, *p;
int ret = -1;
uint64_t mask;
+ int abi;
if (unset)
return 0;
@@ -66,7 +69,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
if (*mode)
return -1;
- mask = intr ? perf_intr_reg_mask(EM_HOST) : perf_user_reg_mask(EM_HOST);
+ mask = intr ? perf_intr_reg_mask(EM_HOST, &abi) : perf_user_reg_mask(EM_HOST, &abi);
/* str may be NULL in case no arg is passed to -I */
if (!str) {
@@ -87,11 +90,11 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
*p = '\0';
if (!strcmp(s, "?")) {
- list_perf_regs(stderr, mask);
+ list_perf_regs(stderr, mask, abi);
goto error;
}
- reg_mask = name_to_perf_reg_mask(s, mask);
+ reg_mask = name_to_perf_reg_mask(s, mask, abi);
if (reg_mask == 0) {
ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
s, intr ? "-I" : "--user-regs=");
diff --git a/tools/perf/util/perf-regs-arch/perf_regs_x86.c b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
index b6d20522b4e8..3e9241a11a95 100644
--- a/tools/perf/util/perf-regs-arch/perf_regs_x86.c
+++ b/tools/perf/util/perf-regs-arch/perf_regs_x86.c
@@ -235,26 +235,26 @@ int __perf_sdt_arg_parse_op_x86(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
-uint64_t __perf_reg_mask_x86(bool intr)
+static uint64_t __arch__reg_mask(u64 sample_type, u64 mask, bool has_simd_regs)
{
struct perf_event_attr attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
- .sample_type = PERF_SAMPLE_REGS_INTR,
- .sample_regs_intr = PERF_REG_EXTENDED_MASK,
- .precise_ip = 1,
- .disabled = 1,
- .exclude_kernel = 1,
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = sample_type,
+ .precise_ip = 1,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .sample_simd_regs_enabled = has_simd_regs,
};
int fd;
-
- if (!intr)
- return PERF_REGS_MASK;
-
/*
* In an unnamed union, init it here to build on older gcc versions
*/
attr.sample_period = 1;
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ attr.sample_regs_intr = mask;
+ else
+ attr.sample_regs_user = mask;
if (perf_pmus__num_core_pmus() > 1) {
struct perf_pmu *pmu = NULL;
@@ -276,13 +276,34 @@ uint64_t __perf_reg_mask_x86(bool intr)
/*group_fd=*/-1, /*flags=*/0);
if (fd != -1) {
close(fd);
- return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
+ return mask;
+ }
+
+ return 0;
+}
+
+uint64_t __perf_reg_mask_x86(bool intr, int *abi)
+{
+ u64 sample_type = intr ? PERF_SAMPLE_REGS_INTR : PERF_SAMPLE_REGS_USER;
+ uint64_t mask = PERF_REGS_MASK;
+
+ *abi = 0;
+ mask |= __arch__reg_mask(sample_type,
+ GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16),
+ true);
+ mask |= __arch__reg_mask(sample_type, BIT_ULL(PERF_REG_X86_SSP), true);
+
+ if (mask != PERF_REGS_MASK) {
+ *abi |= PERF_SAMPLE_REGS_ABI_SIMD;
+ } else {
+ mask |= __arch__reg_mask(sample_type, PERF_REG_EXTENDED_MASK,
+ false);
}
- return PERF_REGS_MASK;
+ return mask;
}
-const char *__perf_reg_name_x86(int id)
+static const char *__arch_reg_gpr_name(int id)
{
switch (id) {
case PERF_REG_X86_AX:
@@ -333,7 +354,60 @@ const char *__perf_reg_name_x86(int id)
return "R14";
case PERF_REG_X86_R15:
return "R15";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+static const char *__arch_reg_egpr_name(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_R16:
+ return "R16";
+ case PERF_REG_X86_R17:
+ return "R17";
+ case PERF_REG_X86_R18:
+ return "R18";
+ case PERF_REG_X86_R19:
+ return "R19";
+ case PERF_REG_X86_R20:
+ return "R20";
+ case PERF_REG_X86_R21:
+ return "R21";
+ case PERF_REG_X86_R22:
+ return "R22";
+ case PERF_REG_X86_R23:
+ return "R23";
+ case PERF_REG_X86_R24:
+ return "R24";
+ case PERF_REG_X86_R25:
+ return "R25";
+ case PERF_REG_X86_R26:
+ return "R26";
+ case PERF_REG_X86_R27:
+ return "R27";
+ case PERF_REG_X86_R28:
+ return "R28";
+ case PERF_REG_X86_R29:
+ return "R29";
+ case PERF_REG_X86_R30:
+ return "R30";
+ case PERF_REG_X86_R31:
+ return "R31";
+ case PERF_REG_X86_SSP:
+ return "SSP";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+static const char *__arch_reg_xmm_name(int id)
+{
+ switch (id) {
#define XMM(x) \
case PERF_REG_X86_XMM ## x: \
case PERF_REG_X86_XMM ## x + 1: \
@@ -362,6 +436,22 @@ const char *__perf_reg_name_x86(int id)
return NULL;
}
+const char *__perf_reg_name_x86(int id, int abi)
+{
+ const char *name;
+
+ name = __arch_reg_gpr_name(id);
+ if (name)
+ return name;
+
+ if (abi & PERF_SAMPLE_REGS_ABI_SIMD)
+ name = __arch_reg_egpr_name(id);
+ else
+ name = __arch_reg_xmm_name(id);
+
+ return name;
+}
+
uint64_t __perf_reg_ip_x86(void)
{
return PERF_REG_X86_IP;
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 5b8f34beb24e..bdd2eef13bc3 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -32,10 +32,11 @@ int perf_sdt_arg_parse_op(uint16_t e_machine, char *old_op, char **new_op)
return ret;
}
-uint64_t perf_intr_reg_mask(uint16_t e_machine)
+uint64_t perf_intr_reg_mask(uint16_t e_machine, int *abi)
{
uint64_t mask = 0;
+ *abi = 0;
switch (e_machine) {
case EM_ARM:
mask = __perf_reg_mask_arm(/*intr=*/true);
@@ -64,7 +65,7 @@ uint64_t perf_intr_reg_mask(uint16_t e_machine)
break;
case EM_386:
case EM_X86_64:
- mask = __perf_reg_mask_x86(/*intr=*/true);
+ mask = __perf_reg_mask_x86(/*intr=*/true, abi);
break;
default:
pr_debug("Unknown ELF machine %d, interrupt sampling register mask will be empty.\n",
@@ -75,10 +76,11 @@ uint64_t perf_intr_reg_mask(uint16_t e_machine)
return mask;
}
-uint64_t perf_user_reg_mask(uint16_t e_machine)
+uint64_t perf_user_reg_mask(uint16_t e_machine, int *abi)
{
uint64_t mask = 0;
+ *abi = 0;
switch (e_machine) {
case EM_ARM:
mask = __perf_reg_mask_arm(/*intr=*/false);
@@ -107,7 +109,7 @@ uint64_t perf_user_reg_mask(uint16_t e_machine)
break;
case EM_386:
case EM_X86_64:
- mask = __perf_reg_mask_x86(/*intr=*/false);
+ mask = __perf_reg_mask_x86(/*intr=*/false, abi);
break;
default:
pr_debug("Unknown ELF machine %d, user sampling register mask will be empty.\n",
@@ -118,7 +120,7 @@ uint64_t perf_user_reg_mask(uint16_t e_machine)
return mask;
}
-const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags)
+const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags, int abi)
{
const char *reg_name = NULL;
@@ -150,7 +152,7 @@ const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags)
break;
case EM_386:
case EM_X86_64:
- reg_name = __perf_reg_name_x86(id);
+ reg_name = __perf_reg_name_x86(id, abi);
break;
default:
break;
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index 7c04700bf837..c9501ca8045d 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -13,10 +13,10 @@ enum {
};
int perf_sdt_arg_parse_op(uint16_t e_machine, char *old_op, char **new_op);
-uint64_t perf_intr_reg_mask(uint16_t e_machine);
-uint64_t perf_user_reg_mask(uint16_t e_machine);
+uint64_t perf_intr_reg_mask(uint16_t e_machine, int *abi);
+uint64_t perf_user_reg_mask(uint16_t e_machine, int *abi);
-const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags);
+const char *perf_reg_name(int id, uint16_t e_machine, uint32_t e_flags, int abi);
int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
uint64_t perf_arch_reg_ip(uint16_t e_machine);
uint64_t perf_arch_reg_sp(uint16_t e_machine);
@@ -64,8 +64,8 @@ uint64_t __perf_reg_ip_s390(void);
uint64_t __perf_reg_sp_s390(void);
int __perf_sdt_arg_parse_op_x86(char *old_op, char **new_op);
-uint64_t __perf_reg_mask_x86(bool intr);
-const char *__perf_reg_name_x86(int id);
+uint64_t __perf_reg_mask_x86(bool intr, int *abi);
+const char *__perf_reg_name_x86(int id, int abi);
uint64_t __perf_reg_ip_x86(void);
uint64_t __perf_reg_sp_x86(void);
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 2b0df7bd9a46..4cc5b96898e6 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -733,7 +733,7 @@ static void regs_map(struct regs_dump *regs, uint64_t mask, uint16_t e_machine,
printed += scnprintf(bf + printed, size - printed,
"%5s:0x%" PRIx64 " ",
- perf_reg_name(r, e_machine, e_flags), val);
+ perf_reg_name(r, e_machine, e_flags, regs->abi), val);
}
}
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4b465abfa36c..7cf7bf86205d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -959,15 +959,16 @@ static void branch_stack__printf(struct perf_sample *sample,
}
}
-static void regs_dump__printf(u64 mask, u64 *regs, uint16_t e_machine, uint32_t e_flags)
+static void regs_dump__printf(u64 mask, struct regs_dump *regs,
+ uint16_t e_machine, uint32_t e_flags)
{
unsigned rid, i = 0;
for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
- u64 val = regs[i++];
+ u64 val = regs->regs[i++];
printf(".... %-5s 0x%016" PRIx64 "\n",
- perf_reg_name(rid, e_machine, e_flags), val);
+ perf_reg_name(rid, e_machine, e_flags, regs->abi), val);
}
}
@@ -995,7 +996,7 @@ static void regs__printf(const char *type, struct regs_dump *regs,
mask,
regs_dump_abi(regs));
- regs_dump__printf(mask, regs->regs, e_machine, e_flags);
+ regs_dump__printf(mask, regs, e_machine, e_flags);
}
static void regs_user__printf(struct perf_sample *sample, uint16_t e_machine, uint32_t e_flags)
--
2.34.1
Powered by blists - more mailing lists