lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fXQOTXhcuUjHSnMDQzi+u+kja2pNJSwziY7WZo7iLpqng@mail.gmail.com>
Date: Tue, 20 Jan 2026 00:00:07 -0800
From: Ian Rogers <irogers@...gle.com>
To: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Thomas Gleixner <tglx@...utronix.de>, Dave Hansen <dave.hansen@...ux.intel.com>, 
	Adrian Hunter <adrian.hunter@...el.com>, Jiri Olsa <jolsa@...nel.org>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Andi Kleen <ak@...ux.intel.com>, 
	Eranian Stephane <eranian@...gle.com>, Mark Rutland <mark.rutland@....com>, broonie@...nel.org, 
	Ravi Bangoria <ravi.bangoria@....com>, linux-kernel@...r.kernel.org, 
	linux-perf-users@...r.kernel.org, Zide Chen <zide.chen@...el.com>, 
	Falcon Thomas <thomas.falcon@...el.com>, Dapeng Mi <dapeng1.mi@...el.com>, 
	Xudong Hao <xudong.hao@...el.com>, Kan Liang <kan.liang@...ux.intel.com>
Subject: Re: [Patch v5 17/19] perf headers: Sync with the kernel headers

On Mon, Jan 19, 2026 at 11:43 PM Mi, Dapeng <dapeng1.mi@...ux.intel.com> wrote:
>
>
> On 1/20/2026 3:16 PM, Ian Rogers wrote:
> > On Tue, Dec 2, 2025 at 10:59 PM Dapeng Mi <dapeng1.mi@...ux.intel.com> wrote:
> >> From: Kan Liang <kan.liang@...ux.intel.com>
> >>
> >> Update include/uapi/linux/perf_event.h and
> >> arch/x86/include/uapi/asm/perf_regs.h to support extended regs.
> >>
> >> Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
> >> Co-developed-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
> >> Signed-off-by: Dapeng Mi <dapeng1.mi@...ux.intel.com>
> >> ---
> >>  tools/arch/x86/include/uapi/asm/perf_regs.h | 62 +++++++++++++++++++++
> >>  tools/include/uapi/linux/perf_event.h       | 45 +++++++++++++--
> >>  2 files changed, 103 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h
> >> index 7c9d2bb3833b..f3561ed10041 100644
> >> --- a/tools/arch/x86/include/uapi/asm/perf_regs.h
> >> +++ b/tools/arch/x86/include/uapi/asm/perf_regs.h
> >> @@ -27,9 +27,34 @@ enum perf_event_x86_regs {
> >>         PERF_REG_X86_R13,
> >>         PERF_REG_X86_R14,
> >>         PERF_REG_X86_R15,
> >> +       /*
> >> +        * The EGPRs/SSP and XMM have overlaps. Only one can be used
> >> +        * at a time. For the ABI type PERF_SAMPLE_REGS_ABI_SIMD,
> >> +        * utilize EGPRs/SSP. For the other ABI type, XMM is used.
> >> +        *
> >> +        * Extended GPRs (EGPRs)
> >> +        */
> >> +       PERF_REG_X86_R16,
> >> +       PERF_REG_X86_R17,
> >> +       PERF_REG_X86_R18,
> >> +       PERF_REG_X86_R19,
> >> +       PERF_REG_X86_R20,
> >> +       PERF_REG_X86_R21,
> >> +       PERF_REG_X86_R22,
> >> +       PERF_REG_X86_R23,
> >> +       PERF_REG_X86_R24,
> >> +       PERF_REG_X86_R25,
> >> +       PERF_REG_X86_R26,
> >> +       PERF_REG_X86_R27,
> >> +       PERF_REG_X86_R28,
> >> +       PERF_REG_X86_R29,
> >> +       PERF_REG_X86_R30,
> >> +       PERF_REG_X86_R31,
> >> +       PERF_REG_X86_SSP,
> >>         /* These are the limits for the GPRs. */
> >>         PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
> >>         PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
> >> +       PERF_REG_MISC_MAX = PERF_REG_X86_SSP + 1,
> >>
> >>         /* These all need two bits set because they are 128bit */
> >>         PERF_REG_X86_XMM0  = 32,
> >> @@ -54,5 +79,42 @@ enum perf_event_x86_regs {
> >>  };
> >>
> >>  #define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
> >> +#define PERF_X86_EGPRS_MASK    GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16)
> >> +
> >> +enum {
> >> +       PERF_REG_X86_XMM,
> >> +       PERF_REG_X86_YMM,
> >> +       PERF_REG_X86_ZMM,
> >> +       PERF_REG_X86_MAX_SIMD_REGS,
> >> +
> >> +       PERF_REG_X86_OPMASK = 0,
> >> +       PERF_REG_X86_MAX_PRED_REGS = 1,
> >> +};
> >> +
> >> +enum {
> >> +       PERF_X86_SIMD_XMM_REGS      = 16,
> >> +       PERF_X86_SIMD_YMM_REGS      = 16,
> >> +       PERF_X86_SIMD_ZMMH_REGS     = 16,
> >> +       PERF_X86_SIMD_ZMM_REGS      = 32,
> >> +       PERF_X86_SIMD_VEC_REGS_MAX  = PERF_X86_SIMD_ZMM_REGS,
> >> +
> >> +       PERF_X86_SIMD_OPMASK_REGS   = 8,
> >> +       PERF_X86_SIMD_PRED_REGS_MAX = PERF_X86_SIMD_OPMASK_REGS,
> >> +};
> >> +
> >> +#define PERF_X86_SIMD_PRED_MASK                GENMASK(PERF_X86_SIMD_PRED_REGS_MAX - 1, 0)
> >> +#define PERF_X86_SIMD_VEC_MASK         GENMASK_ULL(PERF_X86_SIMD_VEC_REGS_MAX - 1, 0)
> >> +
> >> +#define PERF_X86_H16ZMM_BASE           PERF_X86_SIMD_ZMMH_REGS
> >> +
> >> +enum {
> >> +       PERF_X86_OPMASK_QWORDS   = 1,
> >> +       PERF_X86_XMM_QWORDS      = 2,
> >> +       PERF_X86_YMMH_QWORDS     = 2,
> >> +       PERF_X86_YMM_QWORDS      = 4,
> >> +       PERF_X86_ZMMH_QWORDS     = 4,
> >> +       PERF_X86_ZMM_QWORDS      = 8,
> >> +       PERF_X86_SIMD_QWORDS_MAX = PERF_X86_ZMM_QWORDS,
> >> +};
> >>
> >>  #endif /* _ASM_X86_PERF_REGS_H */
> >> diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
> >> index d292f96bc06f..f1474da32622 100644
> >> --- a/tools/include/uapi/linux/perf_event.h
> >> +++ b/tools/include/uapi/linux/perf_event.h
> >> @@ -314,8 +314,9 @@ enum {
> >>   */
> >>  enum perf_sample_regs_abi {
> >>         PERF_SAMPLE_REGS_ABI_NONE               = 0,
> >> -       PERF_SAMPLE_REGS_ABI_32                 = 1,
> >> -       PERF_SAMPLE_REGS_ABI_64                 = 2,
> >> +       PERF_SAMPLE_REGS_ABI_32                 = (1 << 0),
> >> +       PERF_SAMPLE_REGS_ABI_64                 = (1 << 1),
> >> +       PERF_SAMPLE_REGS_ABI_SIMD               = (1 << 2),
> >>  };
> >>
> >>  /*
> >> @@ -382,6 +383,7 @@ enum perf_event_read_format {
> >>  #define PERF_ATTR_SIZE_VER6                    120     /* Add: aux_sample_size */
> >>  #define PERF_ATTR_SIZE_VER7                    128     /* Add: sig_data */
> >>  #define PERF_ATTR_SIZE_VER8                    136     /* Add: config3 */
> >> +#define PERF_ATTR_SIZE_VER9                    168     /* Add: sample_simd_{pred,vec}_reg_* */
> >>
> >>  /*
> >>   * 'struct perf_event_attr' contains various attributes that define
> >> @@ -545,6 +547,25 @@ struct perf_event_attr {
> >>         __u64   sig_data;
> >>
> >>         __u64   config3; /* extension of config2 */
> >> +
> >> +
> >> +       /*
> >> +        * Defines set of SIMD registers to dump on samples.
> >> +        * The sample_simd_regs_enabled !=0 implies the
> >> +        * set of SIMD registers is used to config all SIMD registers.
> >> +        * If !sample_simd_regs_enabled, sample_regs_XXX may be used to
> >> +        * config some SIMD registers on X86.
> >> +        */
> >> +       union {
> >> +               __u16 sample_simd_regs_enabled;
> >> +               __u16 sample_simd_pred_reg_qwords;
> >> +       };
> >> +       __u32 sample_simd_pred_reg_intr;
> >> +       __u32 sample_simd_pred_reg_user;
> >> +       __u16 sample_simd_vec_reg_qwords;
> >> +       __u64 sample_simd_vec_reg_intr;
> >> +       __u64 sample_simd_vec_reg_user;
> >> +       __u32 __reserved_4;
> >>  };
> >>
> >>  /*
> >> @@ -1018,7 +1039,15 @@ enum perf_event_type {
> >>          *      } && PERF_SAMPLE_BRANCH_STACK
> >>          *
> >>          *      { u64                   abi; # enum perf_sample_regs_abi
> >> -        *        u64                   regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
> >> +        *        u64                   regs[weight(mask)];
> >> +        *        struct {
> >> +        *              u16 nr_vectors;
> >> +        *              u16 vector_qwords;
> >> +        *              u16 nr_pred;
> >> +        *              u16 pred_qwords;
> >> +        *              u64 data[nr_vectors * vector_qwords + nr_pred * pred_qwords];
> >> +        *        } && (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> > Why can't these values be taken from the perf_event_attr? The abi is
> > needed as there could be both 32-bit and 64-bit samples for the same
> > event - presumably x32 appears as 64-bit. If the ABI has SIMD within
> > it (implied by the "} && (abi & PERF_SAMPLE_REGS_ABI_SIMD)" below)
> > then why can't we just use the perf_event_attr values? For example,
> > data could be "data[weight(sample_simd_vec_reg_user) *
> > sample_simd_vec_reg_qwords + weight(sample_simd_pred_reg_user) *
> > sample_simd_pred_reg_qwords]".
>
> The main reason is that the sampled SIMD regs could only be a subset of the
> requested SIMD regs in perf_event_attr, so we need to show the bitmask and
> qwords length explicitly in the sample record.

But this doesn't happen in any other register sampling, why in this case?

Perhaps add comments along the lines:
u16 nr_vectors;  // weight(sample_simd_vec_reg_user) except when ...

My random guess as to why the value differs from the weight would be
some kind of optimization around register values of 0. And even if the
number of registers is reduced, why is the number of qwords being
altered?

Thanks,
Ian

> >
> >> +        *      } && PERF_SAMPLE_REGS_USER
> >>          *
> >>          *      { u64                   size;
> >>          *        char                  data[size];
> >> @@ -1045,7 +1074,15 @@ enum perf_event_type {
> >>          *      { u64                   data_src; } && PERF_SAMPLE_DATA_SRC
> >>          *      { u64                   transaction; } && PERF_SAMPLE_TRANSACTION
> >>          *      { u64                   abi; # enum perf_sample_regs_abi
> >> -        *        u64                   regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
> >> +        *        u64                   regs[weight(mask)];
> >> +        *        struct {
> >> +        *              u16 nr_vectors;
> >> +        *              u16 vector_qwords;
> >> +        *              u16 nr_pred;
> >> +        *              u16 pred_qwords;
> >> +        *              u64 data[nr_vectors * vector_qwords + nr_pred * pred_qwords];
> >> +        *        } && (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> > Same comment.
> >
> > Thanks,
> > Ian
> >
> >> +        *      } && PERF_SAMPLE_REGS_INTR
> >>          *      { u64                   phys_addr;} && PERF_SAMPLE_PHYS_ADDR
> >>          *      { u64                   cgroup;} && PERF_SAMPLE_CGROUP
> >>          *      { u64                   data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
> >> --
> >> 2.34.1
> >>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ