lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <8666d188-489d-4be4-8700-f214dc2197e1@linux.intel.com>
Date: Thu, 21 Aug 2025 11:35:52 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: kan.liang@...ux.intel.com, peterz@...radead.org, mingo@...hat.com,
 acme@...nel.org, namhyung@...nel.org, tglx@...utronix.de,
 dave.hansen@...ux.intel.com, irogers@...gle.com, adrian.hunter@...el.com,
 jolsa@...nel.org, alexander.shishkin@...ux.intel.com,
 linux-kernel@...r.kernel.org
Cc: ak@...ux.intel.com, zide.chen@...el.com, mark.rutland@....com,
 broonie@...nel.org, ravi.bangoria@....com, eranian@...gle.com
Subject: Re: [POC PATCH 16/17] perf parse-regs: Support the new SIMD format


On 8/16/2025 5:34 AM, kan.liang@...ux.intel.com wrote:
> From: Kan Liang <kan.liang@...ux.intel.com>
>
> Add has_cap_simd_regs() to check if the new SIMD format is available.
> If yes, get the possible mask and qwords.
>
> Add several __weak functions to return qwords and mask for vector and
> pred registers.
>
> Only support collecting the vector and pred as a whole, and only the
> superset. For example, -I XMM,YMM. Only collect all 16 YMMs.
>
> Examples:
>  $perf record -I?
>  available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10
>  R11 R12 R13 R14 R15 SSP XMM0-31 YMM0-31 ZMM0-31 OPMASK0-7
>
>  $perf record --user-regs=?
>  available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10
>  R11 R12 R13 R14 R15 SSP XMM0-31 YMM0-31 ZMM0-31 OPMASK0-7
>
> Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
> ---
>  tools/perf/arch/x86/util/perf_regs.c      | 257 +++++++++++++++++++++-
>  tools/perf/util/evsel.c                   |  25 +++
>  tools/perf/util/parse-regs-options.c      |  60 ++++-
>  tools/perf/util/perf_event_attr_fprintf.c |   6 +
>  tools/perf/util/perf_regs.c               |  29 +++
>  tools/perf/util/perf_regs.h               |  13 +-
>  tools/perf/util/record.h                  |   6 +
>  7 files changed, 381 insertions(+), 15 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
> index 12fd93f04802..78027df1af9a 100644
> --- a/tools/perf/arch/x86/util/perf_regs.c
> +++ b/tools/perf/arch/x86/util/perf_regs.c
> @@ -13,6 +13,49 @@
>  #include "../../../util/pmu.h"
>  #include "../../../util/pmus.h"
>  
> +static const struct sample_reg sample_reg_masks_ext[] = {
> +	SMPL_REG(AX, PERF_REG_X86_AX),
> +	SMPL_REG(BX, PERF_REG_X86_BX),
> +	SMPL_REG(CX, PERF_REG_X86_CX),
> +	SMPL_REG(DX, PERF_REG_X86_DX),
> +	SMPL_REG(SI, PERF_REG_X86_SI),
> +	SMPL_REG(DI, PERF_REG_X86_DI),
> +	SMPL_REG(BP, PERF_REG_X86_BP),
> +	SMPL_REG(SP, PERF_REG_X86_SP),
> +	SMPL_REG(IP, PERF_REG_X86_IP),
> +	SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
> +	SMPL_REG(CS, PERF_REG_X86_CS),
> +	SMPL_REG(SS, PERF_REG_X86_SS),
> +#ifdef HAVE_ARCH_X86_64_SUPPORT
> +	SMPL_REG(R8, PERF_REG_X86_R8),
> +	SMPL_REG(R9, PERF_REG_X86_R9),
> +	SMPL_REG(R10, PERF_REG_X86_R10),
> +	SMPL_REG(R11, PERF_REG_X86_R11),
> +	SMPL_REG(R12, PERF_REG_X86_R12),
> +	SMPL_REG(R13, PERF_REG_X86_R13),
> +	SMPL_REG(R14, PERF_REG_X86_R14),
> +	SMPL_REG(R15, PERF_REG_X86_R15),
> +	SMPL_REG(R16, PERF_REG_X86_R16),
> +	SMPL_REG(R17, PERF_REG_X86_R17),
> +	SMPL_REG(R18, PERF_REG_X86_R18),
> +	SMPL_REG(R19, PERF_REG_X86_R19),
> +	SMPL_REG(R20, PERF_REG_X86_R20),
> +	SMPL_REG(R21, PERF_REG_X86_R21),
> +	SMPL_REG(R22, PERF_REG_X86_R22),
> +	SMPL_REG(R23, PERF_REG_X86_R23),
> +	SMPL_REG(R24, PERF_REG_X86_R24),
> +	SMPL_REG(R25, PERF_REG_X86_R25),
> +	SMPL_REG(R26, PERF_REG_X86_R26),
> +	SMPL_REG(R27, PERF_REG_X86_R27),
> +	SMPL_REG(R28, PERF_REG_X86_R28),
> +	SMPL_REG(R29, PERF_REG_X86_R29),
> +	SMPL_REG(R30, PERF_REG_X86_R30),
> +	SMPL_REG(R31, PERF_REG_X86_R31),
> +	SMPL_REG(SSP, PERF_REG_X86_SSP),
> +#endif
> +	SMPL_REG_END
> +};
> +
>  static const struct sample_reg sample_reg_masks[] = {
>  	SMPL_REG(AX, PERF_REG_X86_AX),
>  	SMPL_REG(BX, PERF_REG_X86_BX),
> @@ -276,27 +319,159 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
>  	return SDT_ARG_VALID;
>  }
>  
> +static bool support_simd_reg(u64 sample_type, u16 qwords, u64 mask, bool pred)
> +{
> +	struct perf_event_attr attr = {
> +		.type				= PERF_TYPE_HARDWARE,
> +		.config				= PERF_COUNT_HW_CPU_CYCLES,
> +		.sample_type			= sample_type,
> +		.disabled 			= 1,
> +		.exclude_kernel			= 1,
> +		.sample_simd_regs_enabled	= 1,
> +	};
> +	int fd;
> +
> +	attr.sample_period = 1;
> +
> +	if (!pred) {
> +		attr.sample_simd_vec_reg_qwords = qwords;
> +		if (sample_type == PERF_SAMPLE_REGS_INTR)
> +			attr.sample_simd_vec_reg_intr = mask;
> +		else
> +			attr.sample_simd_vec_reg_user = mask;
> +	} else {
> +		attr.sample_simd_pred_reg_qwords = PERF_X86_OPMASK_QWORDS;
> +		if (sample_type == PERF_SAMPLE_REGS_INTR)
> +			attr.sample_simd_pred_reg_intr = PERF_X86_SIMD_PRED_MASK;
> +		else
> +			attr.sample_simd_pred_reg_user = PERF_X86_SIMD_PRED_MASK;
> +	}
> +
> +	if (perf_pmus__num_core_pmus() > 1) {
> +		struct perf_pmu *pmu = NULL;
> +		__u64 type = PERF_TYPE_RAW;
> +
> +		/*
> +		 * The same register set is supported among different hybrid PMUs.
> +		 * Only check the first available one.
> +		 */
> +		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> +			type = pmu->type;
> +			break;
> +		}
> +		attr.config |= type << PERF_PMU_TYPE_SHIFT;
> +	}
> +
> +	event_attr_init(&attr);
> +
> +	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
> +	if (fd != -1) {
> +		close(fd);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +static uint64_t intr_simd_mask, user_simd_mask, pred_mask;
> +static u16	intr_simd_qwords, user_simd_qwords, pred_qwords;
> +
> +static bool get_simd_reg_mask(u64 sample_type)
> +{
> +	u64 mask = GENMASK_ULL(PERF_X86_H16ZMM_BASE - 1, 0);
> +	u16 qwords = PERF_X86_ZMM_QWORDS;
> +
> +	if (support_simd_reg(sample_type, qwords, mask, false)) {
> +		if (support_simd_reg(sample_type, qwords, PERF_X86_SIMD_VEC_MASK, false))
> +			mask = PERF_X86_SIMD_VEC_MASK;
> +	} else {
> +		qwords = PERF_X86_YMM_QWORDS;
> +		if (!support_simd_reg(sample_type, qwords, mask, false)) {
> +			qwords = PERF_X86_XMM_QWORDS;
> +			if (!support_simd_reg(sample_type, qwords, mask, false)) {
> +				qwords = 0;
> +				mask = 0;
> +			}
> +		}
> +	}
> +
> +	if (sample_type == PERF_SAMPLE_REGS_INTR) {
> +		intr_simd_mask = mask;
> +		intr_simd_qwords = qwords;
> +	} else {
> +		user_simd_mask = mask;
> +		user_simd_qwords = qwords;
> +	}

It looks we only use a global variable to save the the SIMD regs mask, but
different SIMD regs has different MASK, e.g., ZMM has 32 regs  but XMM/YMM
only has 16 regs. So If HW supports ZMM16 ~ ZMM 31, the SIMD regs mask
would be always bet to 0xffffffff, is it correct for YMM/XMM regs?


> +
> +	if (support_simd_reg(sample_type, qwords, mask, true)) {
> +		pred_mask = PERF_X86_SIMD_PRED_MASK;
> +		pred_qwords = PERF_X86_OPMASK_QWORDS;
> +	}
> +
> +	return true;

It seems this function always returns true. Feel incorrect.


> +}
> +
> +static bool has_cap_simd_regs(void)
> +{
> +	static bool has_cap_simd_regs;
> +	static bool cached;
> +
> +	if (cached)
> +		return has_cap_simd_regs;
> +
> +	cached = true;
> +	has_cap_simd_regs = get_simd_reg_mask(PERF_SAMPLE_REGS_INTR);
> +	has_cap_simd_regs |= get_simd_reg_mask(PERF_SAMPLE_REGS_USER);
> +
> +	return has_cap_simd_regs;
> +}
> +
>  const struct sample_reg *arch__sample_reg_masks(void)
>  {
> +	if (has_cap_simd_regs())
> +		return sample_reg_masks_ext;
>  	return sample_reg_masks;
>  }
>  
> -uint64_t arch__intr_reg_mask(void)
> +static const struct sample_reg sample_simd_reg_masks_empty[] = {
> +	SMPL_REG_END
> +};
> +
> +static const struct sample_reg sample_simd_reg_masks[] = {
> +	SMPL_REG(XMM, 1),
> +	SMPL_REG(YMM, 2),
> +	SMPL_REG(ZMM, 3),
> +	SMPL_REG(OPMASK, 32),
> +	SMPL_REG_END
> +};

We extend the ".mask" field to represent SIMD mask and qword size
simultaneously. It works, but it's really hard to understand and increase
the complexity. Could we just define some global variables or helpers to
get the mask and qword size for each kind of SIMD regs, like XMM/YMM/ZMM regs? 


> +
> +const struct sample_reg *arch__sample_simd_reg_masks(void)
> +{
> +	if (has_cap_simd_regs())
> +		return sample_simd_reg_masks;
> +	return sample_simd_reg_masks_empty;
> +}
> +
> +static uint64_t __arch__reg_mask(u64 sample_type, u64 mask, bool has_simd_regs)
>  {
>  	struct perf_event_attr attr = {
> -		.type			= PERF_TYPE_HARDWARE,
> -		.config			= PERF_COUNT_HW_CPU_CYCLES,
> -		.sample_type		= PERF_SAMPLE_REGS_INTR,
> -		.sample_regs_intr	= PERF_REG_EXTENDED_MASK,
> -		.precise_ip		= 1,
> -		.disabled 		= 1,
> -		.exclude_kernel		= 1,
> +		.type				= PERF_TYPE_HARDWARE,
> +		.config				= PERF_COUNT_HW_CPU_CYCLES,
> +		.sample_type			= sample_type,
> +		.precise_ip			= 1,
> +		.disabled 			= 1,
> +		.exclude_kernel			= 1,
> +		.sample_simd_regs_enabled	= has_simd_regs,
>  	};
>  	int fd;
>  	/*
>  	 * In an unnamed union, init it here to build on older gcc versions
>  	 */
>  	attr.sample_period = 1;
> +	if (sample_type == PERF_SAMPLE_REGS_INTR)
> +		attr.sample_regs_intr = mask;
> +	else
> +		attr.sample_regs_user = mask;
>  
>  	if (perf_pmus__num_core_pmus() > 1) {
>  		struct perf_pmu *pmu = NULL;
> @@ -318,13 +493,73 @@ uint64_t arch__intr_reg_mask(void)
>  	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
>  	if (fd != -1) {
>  		close(fd);
> -		return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
> +		return mask;
>  	}
>  
> -	return PERF_REGS_MASK;
> +	return 0;
> +}
> +
> +uint64_t arch__intr_reg_mask(void)
> +{
> +	uint64_t mask = PERF_REGS_MASK;
> +
> +	if (has_cap_simd_regs()) {
> +		mask |= __arch__reg_mask(PERF_SAMPLE_REGS_INTR,
> +					 GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16),
> +					 true);
> +		mask |= __arch__reg_mask(PERF_SAMPLE_REGS_INTR,
> +					 BIT_ULL(PERF_REG_X86_SSP),
> +					 true);
> +	} else
> +		mask |= __arch__reg_mask(PERF_SAMPLE_REGS_INTR, PERF_REG_EXTENDED_MASK, false);
> +
> +	return mask;
>  }
>  
>  uint64_t arch__user_reg_mask(void)
>  {
> -	return PERF_REGS_MASK;
> +	uint64_t mask = PERF_REGS_MASK;
> +
> +	if (has_cap_simd_regs()) {
> +		mask |= __arch__reg_mask(PERF_SAMPLE_REGS_USER,
> +					 GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16),
> +					 true);
> +		mask |= __arch__reg_mask(PERF_SAMPLE_REGS_USER,
> +					 BIT_ULL(PERF_REG_X86_SSP),
> +					 true);
> +	}
> +
> +	return mask;
> +}
> +
> +uint64_t arch__intr_simd_reg_mask(u16 *qwords)
> +{
> +	if (!has_cap_simd_regs())
> +		return 0;
> +	*qwords = intr_simd_qwords;
> +	return intr_simd_mask;
> +}
> +
> +uint64_t arch__user_simd_reg_mask(u16 *qwords)
> +{
> +	if (!has_cap_simd_regs())
> +		return 0;
> +	*qwords = user_simd_qwords;
> +	return user_simd_mask;
> +}
> +
> +uint64_t arch__intr_pred_reg_mask(u16 *qwords)
> +{
> +	if (!has_cap_simd_regs())
> +		return 0;
> +	*qwords = pred_qwords;
> +	return pred_mask;
> +}
> +
> +uint64_t arch__user_pred_reg_mask(u16 *qwords)
> +{
> +	if (!has_cap_simd_regs())
> +		return 0;
> +	*qwords = pred_qwords;
> +	return pred_mask;
>  }
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index d55482f094bf..af6e1c843fc5 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1402,12 +1402,37 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
>  		evsel__set_sample_bit(evsel, REGS_INTR);
>  	}
>  
> +	if ((opts->sample_intr_vec_regs || opts->sample_intr_pred_regs) &&
> +	    !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
> +		/* The pred qwords is to implies the set of SIMD registers is used */
> +		if (opts->sample_pred_regs_qwords)
> +			attr->sample_simd_pred_reg_qwords = opts->sample_pred_regs_qwords;
> +		else
> +			attr->sample_simd_pred_reg_qwords = 1;
> +		attr->sample_simd_vec_reg_intr = opts->sample_intr_vec_regs;
> +		attr->sample_simd_vec_reg_qwords = opts->sample_vec_regs_qwords;
> +		attr->sample_simd_pred_reg_intr = opts->sample_intr_pred_regs;
> +		evsel__set_sample_bit(evsel, REGS_INTR);
> +	}
> +
>  	if (opts->sample_user_regs && !evsel->no_aux_samples &&
>  	    !evsel__is_dummy_event(evsel)) {
>  		attr->sample_regs_user |= opts->sample_user_regs;
>  		evsel__set_sample_bit(evsel, REGS_USER);
>  	}
>  
> +	if ((opts->sample_user_vec_regs || opts->sample_user_pred_regs) &&
> +	    !evsel->no_aux_samples && !evsel__is_dummy_event(evsel)) {
> +		if (opts->sample_pred_regs_qwords)
> +			attr->sample_simd_pred_reg_qwords = opts->sample_pred_regs_qwords;
> +		else
> +			attr->sample_simd_pred_reg_qwords = 1;
> +		attr->sample_simd_vec_reg_user = opts->sample_user_vec_regs;
> +		attr->sample_simd_vec_reg_qwords = opts->sample_vec_regs_qwords;
> +		attr->sample_simd_pred_reg_user = opts->sample_user_pred_regs;
> +		evsel__set_sample_bit(evsel, REGS_USER);
> +	}
> +
>  	if (target__has_cpu(&opts->target) || opts->sample_cpu)
>  		evsel__set_sample_bit(evsel, CPU);
>  
> diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
> index cda1c620968e..27266038352f 100644
> --- a/tools/perf/util/parse-regs-options.c
> +++ b/tools/perf/util/parse-regs-options.c
> @@ -4,20 +4,26 @@
>  #include <stdint.h>
>  #include <string.h>
>  #include <stdio.h>
> +#include <linux/bitops.h>
>  #include "util/debug.h"
>  #include <subcmd/parse-options.h>
>  #include "util/perf_regs.h"
>  #include "util/parse-regs-options.h"
> +#include "record.h"
>  
>  static int
>  __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>  {
>  	uint64_t *mode = (uint64_t *)opt->value;
>  	const struct sample_reg *r = NULL;
> +	u16 simd_qwords, pred_qwords;
> +	u64 simd_mask, pred_mask;
> +	struct record_opts *opts;
>  	char *s, *os = NULL, *p;
>  	int ret = -1;
>  	uint64_t mask;
>  
> +
>  	if (unset)
>  		return 0;
>  
> @@ -27,10 +33,17 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>  	if (*mode)
>  		return -1;
>  
> -	if (intr)
> +	if (intr) {
> +		opts = container_of(opt->value, struct record_opts, sample_intr_regs);
>  		mask = arch__intr_reg_mask();
> -	else
> +		simd_mask = arch__intr_simd_reg_mask(&simd_qwords);
> +		pred_mask = arch__intr_pred_reg_mask(&pred_qwords);
> +	} else {
> +		opts = container_of(opt->value, struct record_opts, sample_user_regs);
>  		mask = arch__user_reg_mask();
> +		simd_mask = arch__user_simd_reg_mask(&simd_qwords);
> +		pred_mask = arch__user_pred_reg_mask(&pred_qwords);
> +	}
>  
>  	/* str may be NULL in case no arg is passed to -I */
>  	if (str) {
> @@ -50,10 +63,51 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>  					if (r->mask & mask)
>  						fprintf(stderr, "%s ", r->name);
>  				}
> +				for (r = arch__sample_simd_reg_masks(); r->name; r++) {
> +					if (pred_qwords == r->qwords.pred) {
> +						fprintf(stderr, "%s0-%d ", r->name, fls64(pred_mask) - 1);
> +						continue;
> +					}
> +					if (simd_qwords >= r->mask)
> +						fprintf(stderr, "%s0-%d ", r->name, fls64(simd_mask) - 1);
> +				}
> +
>  				fputc('\n', stderr);
>  				/* just printing available regs */
>  				goto error;
>  			}
> +
> +			if (simd_mask || pred_mask) {
> +				u16 vec_regs_qwords = 0, pred_regs_qwords = 0;
> +
> +				for (r = arch__sample_simd_reg_masks(); r->name; r++) {
> +					if (!strcasecmp(s, r->name)) {
> +						vec_regs_qwords = r->qwords.vec;
> +						pred_regs_qwords = r->qwords.pred;
> +						break;
> +					}
> +				}
> +
> +				/* Just need the highest qwords */
> +				if (vec_regs_qwords > opts->sample_vec_regs_qwords) {
> +					opts->sample_vec_regs_qwords = vec_regs_qwords;
> +					if (intr)
> +						opts->sample_intr_vec_regs = simd_mask;
> +					else
> +						opts->sample_user_vec_regs = simd_mask;
> +				}
> +				if (pred_regs_qwords > opts->sample_pred_regs_qwords) {
> +					opts->sample_pred_regs_qwords = pred_regs_qwords;
> +					if (intr)
> +						opts->sample_intr_pred_regs = pred_mask;
> +					else
> +						opts->sample_user_pred_regs = pred_mask;
> +				}
> +
> +				if (r->name)
> +					goto next;
> +			}
> +
>  			for (r = arch__sample_reg_masks(); r->name; r++) {
>  				if ((r->mask & mask) && !strcasecmp(s, r->name))
>  					break;
> @@ -65,7 +119,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
>  			}
>  
>  			*mode |= r->mask;
> -
> +next:
>  			if (!p)
>  				break;
>  
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> index 66b666d9ce64..fb0366d050cf 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -360,6 +360,12 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
>  	PRINT_ATTRf(aux_start_paused, p_unsigned);
>  	PRINT_ATTRf(aux_pause, p_unsigned);
>  	PRINT_ATTRf(aux_resume, p_unsigned);
> +	PRINT_ATTRf(sample_simd_pred_reg_qwords, p_unsigned);
> +	PRINT_ATTRf(sample_simd_pred_reg_intr, p_hex);
> +	PRINT_ATTRf(sample_simd_pred_reg_user, p_hex);
> +	PRINT_ATTRf(sample_simd_vec_reg_qwords, p_unsigned);
> +	PRINT_ATTRf(sample_simd_vec_reg_intr, p_hex);
> +	PRINT_ATTRf(sample_simd_vec_reg_user, p_hex);
>  
>  	return ret;
>  }
> diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
> index 44b90bbf2d07..0744c77b4ac8 100644
> --- a/tools/perf/util/perf_regs.c
> +++ b/tools/perf/util/perf_regs.c
> @@ -21,6 +21,30 @@ uint64_t __weak arch__user_reg_mask(void)
>  	return 0;
>  }
>  
> +uint64_t __weak arch__intr_simd_reg_mask(u16 *qwords)
> +{
> +	*qwords = 0;
> +	return 0;
> +}
> +
> +uint64_t __weak arch__user_simd_reg_mask(u16 *qwords)
> +{
> +	*qwords = 0;
> +	return 0;
> +}
> +
> +uint64_t __weak arch__intr_pred_reg_mask(u16 *qwords)
> +{
> +	*qwords = 0;
> +	return 0;
> +}
> +
> +uint64_t __weak arch__user_pred_reg_mask(u16 *qwords)
> +{
> +	*qwords = 0;
> +	return 0;
> +}
> +
>  static const struct sample_reg sample_reg_masks[] = {
>  	SMPL_REG_END
>  };
> @@ -30,6 +54,11 @@ const struct sample_reg * __weak arch__sample_reg_masks(void)
>  	return sample_reg_masks;
>  }
>  
> +const struct sample_reg * __weak arch__sample_simd_reg_masks(void)
> +{
> +	return sample_reg_masks;
> +}
> +
>  const char *perf_reg_name(int id, const char *arch)
>  {
>  	const char *reg_name = NULL;
> diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
> index f2d0736d65cc..b932caa73a8a 100644
> --- a/tools/perf/util/perf_regs.h
> +++ b/tools/perf/util/perf_regs.h
> @@ -9,7 +9,13 @@ struct regs_dump;
>  
>  struct sample_reg {
>  	const char *name;
> -	uint64_t mask;
> +	union {
> +		struct {
> +			uint32_t vec;
> +			uint32_t pred;
> +		} qwords;
> +		uint64_t mask;
> +	};
>  };
>  
>  #define SMPL_REG_MASK(b) (1ULL << (b))
> @@ -27,6 +33,11 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op);
>  uint64_t arch__intr_reg_mask(void);
>  uint64_t arch__user_reg_mask(void);
>  const struct sample_reg *arch__sample_reg_masks(void);
> +const struct sample_reg *arch__sample_simd_reg_masks(void);
> +uint64_t arch__intr_simd_reg_mask(u16 *qwords);
> +uint64_t arch__user_simd_reg_mask(u16 *qwords);
> +uint64_t arch__intr_pred_reg_mask(u16 *qwords);
> +uint64_t arch__user_pred_reg_mask(u16 *qwords);
>  
>  const char *perf_reg_name(int id, const char *arch);
>  int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
> diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
> index ea3a6c4657ee..825ffb4cc53f 100644
> --- a/tools/perf/util/record.h
> +++ b/tools/perf/util/record.h
> @@ -59,7 +59,13 @@ struct record_opts {
>  	unsigned int  user_freq;
>  	u64	      branch_stack;
>  	u64	      sample_intr_regs;
> +	u64	      sample_intr_vec_regs;
>  	u64	      sample_user_regs;
> +	u64	      sample_user_vec_regs;
> +	u16	      sample_pred_regs_qwords;
> +	u16	      sample_vec_regs_qwords;
> +	u16	      sample_intr_pred_regs;
> +	u16	      sample_user_pred_regs;
>  	u64	      default_interval;
>  	u64	      user_interval;
>  	size_t	      auxtrace_snapshot_size;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ