lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0ed1d942-5c7a-4cb3-b28b-2177e172f2e8@linux.intel.com>
Date: Wed, 20 Aug 2025 17:55:15 +0800
From: "Mi, Dapeng" <dapeng1.mi@...ux.intel.com>
To: kan.liang@...ux.intel.com, peterz@...radead.org, mingo@...hat.com,
 acme@...nel.org, namhyung@...nel.org, tglx@...utronix.de,
 dave.hansen@...ux.intel.com, irogers@...gle.com, adrian.hunter@...el.com,
 jolsa@...nel.org, alexander.shishkin@...ux.intel.com,
 linux-kernel@...r.kernel.org
Cc: ak@...ux.intel.com, zide.chen@...el.com, mark.rutland@....com,
 broonie@...nel.org, ravi.bangoria@....com, eranian@...gle.com
Subject: Re: [PATCH V3 06/17] perf: Support SIMD registers


On 8/16/2025 5:34 AM, kan.liang@...ux.intel.com wrote:
> From: Kan Liang <kan.liang@...ux.intel.com>
>
> The users may be interested in the SIMD registers in a sample while
> profiling. The current sample_regs_XXX doesn't have enough space for all
> SIMD registers.
>
> Add sets of the sample_simd_{pred,vec}_reg_* in the
> struct perf_event_attr to define a set of SIMD registers to dump on
> samples.
> The current X86 supports the XMM registers in sample_regs_XXX. To
> utilize the new SIMD registers configuration method, the
> sample_simd_regs_enabled should always be set. If so, the XMM space in
> the sample_regs_XXX is reserved for other usage.
>
> The SIMD registers are wider than 64. A new output format is introduced.
> The number and width of SIMD registers will be dumped first, following
> the register values. The number and width are the same as the user's
> configuration now. If, for some reason (e.g., ARM) they are different,
> an ARCH-specific perf_output_sample_simd_regs can be implemented later
> separately.
> Add a new ABI, PERF_SAMPLE_REGS_ABI_SIMD, to indicate the new format.
> The enum perf_sample_regs_abi becomes a bitmap now. There should be no
> impact on the existing tool, since the version and bitmap are the same
> for 1 and 2.
>
> Add three new __weak functions to retrieve the number of available
> registers, validate the configuration of the SIMD registers, and
> retrieve the SIMD registers. The ARCH-specific functions will be
> implemented in the following patches.
>
> Add a new flag PERF_PMU_CAP_SIMD_REGS to indicate that the PMU has the
> capability to support SIMD registers dumping. Error out if the
> sample_simd_{pred,vec}_reg_* mistakenly set for a PMU that doesn't have
> the capability.
>
> Suggested-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
> ---
>  include/linux/perf_event.h      |  13 ++++
>  include/linux/perf_regs.h       |   9 +++
>  include/uapi/linux/perf_event.h |  47 +++++++++++++--
>  kernel/events/core.c            | 101 +++++++++++++++++++++++++++++++-
>  4 files changed, 162 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 444b162f3f92..205361b7de2e 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -305,6 +305,7 @@ struct perf_event_pmu_context;
>  #define PERF_PMU_CAP_EXTENDED_HW_TYPE	0x0100
>  #define PERF_PMU_CAP_AUX_PAUSE		0x0200
>  #define PERF_PMU_CAP_AUX_PREFER_LARGE	0x0400
> +#define PERF_PMU_CAP_SIMD_REGS		0x0800
>  
>  /**
>   * pmu::scope
> @@ -1526,6 +1527,18 @@ perf_event__output_id_sample(struct perf_event *event,
>  extern void
>  perf_log_lost_samples(struct perf_event *event, u64 lost);
>  
> +static inline bool event_has_simd_regs(struct perf_event *event)
> +{
> +	struct perf_event_attr *attr = &event->attr;
> +
> +	return attr->sample_simd_regs_enabled != 0 ||
> +	       attr->sample_simd_pred_reg_intr != 0 ||
> +	       attr->sample_simd_pred_reg_user != 0 ||
> +	       attr->sample_simd_vec_reg_qwords != 0 ||
> +	       attr->sample_simd_vec_reg_intr != 0 ||
> +	       attr->sample_simd_vec_reg_user != 0;
> +}
> +
>  static inline bool event_has_extended_regs(struct perf_event *event)
>  {
>  	struct perf_event_attr *attr = &event->attr;
> diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
> index f632c5725f16..0172682b18fd 100644
> --- a/include/linux/perf_regs.h
> +++ b/include/linux/perf_regs.h
> @@ -9,6 +9,15 @@ struct perf_regs {
>  	struct pt_regs	*regs;
>  };
>  
> +int perf_simd_reg_validate(u16 vec_qwords, u64 vec_mask,
> +			   u16 pred_qwords, u32 pred_mask);
> +u64 perf_simd_reg_value(struct pt_regs *regs, int idx,
> +			u16 qwords_idx, bool pred);
> +void perf_simd_reg_check(struct pt_regs *regs,
> +			 u64 mask, u16 *nr_vectors, u16 *vec_qwords,
> +			 u16 pred_mask, u16 *nr_pred, u16 *pred_qwords);
> +
> +
>  #ifdef CONFIG_HAVE_PERF_REGS
>  #include <asm/perf_regs.h>
>  
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 78a362b80027..2e9b16acbed6 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -313,9 +313,10 @@ enum {
>   * Values to determine ABI of the registers dump.
>   */
>  enum perf_sample_regs_abi {
> -	PERF_SAMPLE_REGS_ABI_NONE		= 0,
> -	PERF_SAMPLE_REGS_ABI_32			= 1,
> -	PERF_SAMPLE_REGS_ABI_64			= 2,
> +	PERF_SAMPLE_REGS_ABI_NONE		= 0x00,
> +	PERF_SAMPLE_REGS_ABI_32			= 0x01,
> +	PERF_SAMPLE_REGS_ABI_64			= 0x02,
> +	PERF_SAMPLE_REGS_ABI_SIMD		= 0x04,

Better change the definition to bitmap format, so it clearly indicates the
ABI is a bitmap format.

enum perf_sample_regs_abi {
    PERF_SAMPLE_REGS_ABI_NONE        = 0,
    PERF_SAMPLE_REGS_ABI_32            = 1 << 0,
    PERF_SAMPLE_REGS_ABI_64            = 1 << 1,
    PERF_SAMPLE_REGS_ABI_SIMD        = 1 << 2,
};



>  };
>  
>  /*
> @@ -382,6 +383,7 @@ enum perf_event_read_format {
>  #define PERF_ATTR_SIZE_VER6			120	/* Add: aux_sample_size */
>  #define PERF_ATTR_SIZE_VER7			128	/* Add: sig_data */
>  #define PERF_ATTR_SIZE_VER8			136	/* Add: config3 */
> +#define PERF_ATTR_SIZE_VER9			168	/* Add: sample_simd_{pred,vec}_reg_* */
>  
>  /*
>   * 'struct perf_event_attr' contains various attributes that define
> @@ -543,6 +545,25 @@ struct perf_event_attr {
>  	__u64	sig_data;
>  
>  	__u64	config3; /* extension of config2 */
> +
> +
> +	/*
> +	 * Defines set of SIMD registers to dump on samples.
> +	 * The sample_simd_regs_enabled !=0 implies the
> +	 * set of SIMD registers is used to config all SIMD registers.
> +	 * If !sample_simd_regs_enabled, sample_regs_XXX may be used to
> +	 * config some SIMD registers on X86.
> +	 */
> +	union {
> +		__u16 sample_simd_regs_enabled;
> +		__u16 sample_simd_pred_reg_qwords;
> +	};
> +	__u32 sample_simd_pred_reg_intr;
> +	__u32 sample_simd_pred_reg_user;
> +	__u16 sample_simd_vec_reg_qwords;
> +	__u64 sample_simd_vec_reg_intr;
> +	__u64 sample_simd_vec_reg_user;
> +	__u32 __reserved_4;
>  };
>  
>  /*
> @@ -1016,7 +1037,15 @@ enum perf_event_type {
>  	 *      } && PERF_SAMPLE_BRANCH_STACK
>  	 *
>  	 *	{ u64			abi; # enum perf_sample_regs_abi
> -	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
> +	 *	  u64			regs[weight(mask)];
> +	 *	  struct {
> +	 *		u16 nr_vectors;
> +	 *		u16 vector_qwords;
> +	 *		u16 nr_pred;
> +	 *		u16 pred_qwords;
> +	 *		u64 data[nr_vectors * vector_qwords + nr_pred * pred_qwords];
> +	 *	  } && (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> +	 *	} && PERF_SAMPLE_REGS_USER
>  	 *
>  	 *	{ u64			size;
>  	 *	  char			data[size];
> @@ -1043,7 +1072,15 @@ enum perf_event_type {
>  	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
>  	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
>  	 *	{ u64			abi; # enum perf_sample_regs_abi
> -	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
> +	 *	  u64			regs[weight(mask)];
> +	 *	  struct {
> +	 *		u16 nr_vectors;
> +	 *		u16 vector_qwords;
> +	 *		u16 nr_pred;
> +	 *		u16 pred_qwords;
> +	 *		u64 data[nr_vectors * vector_qwords + nr_pred * pred_qwords];
> +	 *	  } && (abi & PERF_SAMPLE_REGS_ABI_SIMD)
> +	 *	} && PERF_SAMPLE_REGS_INTR
>  	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
>  	 *	{ u64			cgroup;} && PERF_SAMPLE_CGROUP
>  	 *	{ u64			data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 95a7b6f5af09..dd8cf3c7fb7a 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7408,6 +7408,47 @@ perf_output_sample_regs(struct perf_output_handle *handle,
>  	}
>  }
>  
> +static void
> +perf_output_sample_simd_regs(struct perf_output_handle *handle,
> +			     struct perf_event *event,
> +			     struct pt_regs *regs,
> +			     u64 mask, u16 pred_mask)
> +{
> +	u16 pred_qwords = event->attr.sample_simd_pred_reg_qwords;
> +	u16 vec_qwords = event->attr.sample_simd_vec_reg_qwords;
> +	u16 nr_pred = hweight16(pred_mask);
> +	u16 nr_vectors = hweight64(mask);
> +	int bit;
> +	u64 val;
> +	u16 i;
> +
> +	/* Get the number of available regs */
> +	perf_simd_reg_check(regs, mask, &nr_vectors, &vec_qwords,
> +			    pred_mask, &nr_pred, &pred_qwords);
> +
> +	perf_output_put(handle, nr_vectors);
> +	perf_output_put(handle, vec_qwords);
> +	perf_output_put(handle, nr_pred);
> +	perf_output_put(handle, pred_qwords);
> +
> +	if (nr_vectors) {
> +		for_each_set_bit(bit, (unsigned long *)&mask, sizeof(mask) * BITS_PER_BYTE) {
> +			for (i = 0; i < vec_qwords; i++) {
> +				val = perf_simd_reg_value(regs, bit, i, false);
> +				perf_output_put(handle, val);
> +			}
> +		}
> +	}
> +	if (nr_pred) {
> +		for_each_set_bit(bit, (unsigned long *)&pred_mask, sizeof(pred_mask) * BITS_PER_BYTE) {
> +			for (i = 0; i < pred_qwords; i++) {
> +				val = perf_simd_reg_value(regs, bit, i, true);
> +				perf_output_put(handle, val);
> +			}
> +		}
> +	}
> +}
> +
>  static void perf_sample_regs_user(struct perf_regs *regs_user,
>  				  struct pt_regs *regs)
>  {
> @@ -7429,6 +7470,25 @@ static void perf_sample_regs_intr(struct perf_regs *regs_intr,
>  	regs_intr->abi  = perf_reg_abi(current);
>  }
>  
> +int __weak perf_simd_reg_validate(u16 vec_qwords, u64 vec_mask,
> +				  u16 pred_qwords, u32 pred_mask)
> +{
> +	return vec_qwords || vec_mask || pred_qwords || pred_mask ? -ENOSYS : 0;
> +}
> +
> +u64 __weak perf_simd_reg_value(struct pt_regs *regs, int idx,
> +			       u16 qwords_idx, bool pred)
> +{
> +	return 0;
> +}
> +
> +void __weak perf_simd_reg_check(struct pt_regs *regs,
> +				u64 mask, u16 *nr_vectors, u16 *vec_qwords,
> +				u16 pred_mask, u16 *nr_pred, u16 *pred_qwords)
> +{
> +	*nr_vectors = 0;
> +	*nr_pred = 0;
> +}
>  
>  /*
>   * Get remaining task size from user stack pointer.
> @@ -7961,10 +8021,17 @@ void perf_output_sample(struct perf_output_handle *handle,
>  		perf_output_put(handle, abi);
>  
>  		if (abi) {
> -			u64 mask = event->attr.sample_regs_user;
> +			struct perf_event_attr *attr = &event->attr;
> +			u64 mask = attr->sample_regs_user;
>  			perf_output_sample_regs(handle,
>  						data->regs_user.regs,
>  						mask);
> +			if (abi & PERF_SAMPLE_REGS_ABI_SIMD) {
> +				perf_output_sample_simd_regs(handle, event,
> +							     data->regs_user.regs,
> +							     attr->sample_simd_vec_reg_user,
> +							     attr->sample_simd_pred_reg_user);
> +			}
>  		}
>  	}
>  
> @@ -7992,11 +8059,18 @@ void perf_output_sample(struct perf_output_handle *handle,
>  		perf_output_put(handle, abi);
>  
>  		if (abi) {
> -			u64 mask = event->attr.sample_regs_intr;
> +			struct perf_event_attr *attr = &event->attr;
> +			u64 mask = attr->sample_regs_intr;
>  
>  			perf_output_sample_regs(handle,
>  						data->regs_intr.regs,
>  						mask);
> +			if (abi & PERF_SAMPLE_REGS_ABI_SIMD) {
> +				perf_output_sample_simd_regs(handle, event,
> +							     data->regs_intr.regs,
> +							     attr->sample_simd_vec_reg_intr,
> +							     attr->sample_simd_pred_reg_intr);
> +			}
>  		}
>  	}
>  
> @@ -12560,6 +12634,12 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
>  	if (ret)
>  		goto err_pmu;
>  
> +	if (!(pmu->capabilities & PERF_PMU_CAP_SIMD_REGS) &&
> +	    event_has_simd_regs(event)) {
> +		ret = -EOPNOTSUPP;
> +		goto err_destroy;
> +	}
> +
>  	if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
>  	    event_has_extended_regs(event)) {
>  		ret = -EOPNOTSUPP;
> @@ -13101,6 +13181,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
>  		ret = perf_reg_validate(attr->sample_regs_user);
>  		if (ret)
>  			return ret;
> +		ret = perf_simd_reg_validate(attr->sample_simd_vec_reg_qwords,
> +					     attr->sample_simd_vec_reg_user,
> +					     attr->sample_simd_pred_reg_qwords,
> +					     attr->sample_simd_pred_reg_user);
> +		if (ret)
> +			return ret;
>  	}
>  
>  	if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
> @@ -13121,8 +13207,17 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
>  	if (!attr->sample_max_stack)
>  		attr->sample_max_stack = sysctl_perf_event_max_stack;
>  
> -	if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
> +	if (attr->sample_type & PERF_SAMPLE_REGS_INTR) {
>  		ret = perf_reg_validate(attr->sample_regs_intr);
> +		if (ret)
> +			return ret;
> +		ret = perf_simd_reg_validate(attr->sample_simd_vec_reg_qwords,
> +					     attr->sample_simd_vec_reg_intr,
> +					     attr->sample_simd_pred_reg_qwords,
> +					     attr->sample_simd_pred_reg_intr);
> +		if (ret)
> +			return ret;
> +	}
>  
>  #ifndef CONFIG_CGROUP_PERF
>  	if (attr->sample_type & PERF_SAMPLE_CGROUP)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ