lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <af1cd3ed-e847-43f6-9459-d2d74ee94c88@intel.com>
Date: Tue, 9 Dec 2025 11:43:02 -0800
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
 Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
	<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
	<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
	<Dave.Martin@....com>, Chen Yu <yu.c.chen@...el.com>
CC: <x86@...nel.org>, <linux-kernel@...r.kernel.org>,
	<patches@...ts.linux.dev>
Subject: Re: [PATCH v15 25/32] x86/resctrl: Handle number of RMIDs supported
 by RDT_RESOURCE_PERF_PKG

Hi Tony,

On 12/4/25 12:53 PM, Tony Luck wrote:
> There are now three meanings for "number of RMIDs":
> 
> 1) The number for legacy features enumerated by CPUID leaf 0xF. This is the
> maximum number of distinct values that can be loaded into MSR_IA32_PQR_ASSOC.
> Note that systems with Sub-NUMA Cluster mode enabled will force scaling down
> the CPUID enumerated value by the number of SNC nodes per L3-cache.
> 
> 2) The number of registers in MMIO space for each event. This is enumerated
> in the XML files and is the value initialized into event_group::num_rmid.
> 
> 3) The number of "hardware counters" (this isn't a strictly accurate
> description of how things work, but serves as a useful analogy that does
> describe the limitations) feeding to those MMIO registers. This is enumerated
> in telemetry_region::num_rmids returned by intel_pmt_get_regions_by_feature()
> 
> Event groups with insufficient "hardware counters" to track all RMIDs are
> difficult for users to use, since the system may reassign "hardware counters"
> at any time. This means that users cannot reliably collect two consecutive
> event counts to compute the rate at which events are occurring. By default
> such event groups are disabled. The user may override this with a command line

"Disable such event groups by default."
(imperative)

> "rdt=" option. In this case limit an under-resourced event group's number of
> possible monitor resource groups to the lowest number of "hardware counters".
> 
> Scan all enabled event groups and assign the RDT_RESOURCE_PERF_PKG resource
> "num_rmid" value to the smallest of these values as this value will be used
> later to compare against the number of RMIDs supported by other resources
> to determine how many monitoring resource groups are supported.
> 
> N.B. Change type of resctrl_mon::num_rmid to u32 to match its usage and
> the type of event_group::num_rmid so that min(r->num_rmid, e->num_rmid)
> won't complain about mixing signed and unsigned types.
> 
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  include/linux/resctrl.h                 |  2 +-
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 57 ++++++++++++++++++++++++-
>  fs/resctrl/rdtgroup.c                   |  2 +-
>  3 files changed, 57 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 14126d228e61..8623e450619a 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -295,7 +295,7 @@ enum resctrl_schema_fmt {
>   *			events of monitor groups created via mkdir.
>   */
>  struct resctrl_mon {
> -	int			num_rmid;
> +	u32			num_rmid;
>  	unsigned int		mbm_cfg_mask;
>  	int			num_mbm_cntrs;
>  	bool			mbm_cntr_assignable;
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index fec4bb781f82..38fcddc72ed8 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -22,6 +22,7 @@
>  #include <linux/intel_pmt_features.h>
>  #include <linux/intel_vsec.h>
>  #include <linux/io.h>
> +#include <linux/minmax.h>
>  #include <linux/printk.h>
>  #include <linux/rculist.h>
>  #include <linux/rcupdate.h>
> @@ -60,10 +61,15 @@ struct pmt_event {
>   *			Valid if the system supports the event group,
>   *			NULL otherwise.
>   * @force_off:		True when "rdt" command line disables this @guid
> - *			or architecture code disables this @guid.
> + *			or architecture code disables this @guid due to
> + *			insufficient RMIDs.
>   * @force_on:		True when "rdt" command line overrides disable of
>   *			this @guid.
>   * @guid:		Unique number per XML description file.
> + * @num_rmid:		Number of RMIDs supported by this group. May be
> + *			adjusted downwards if enumeration from
> + *			intel_pmt_get_regions_by_feature() indicates fewer
> + *			RMIDs can be tracked simultaneously.
>   * @mmio_size:		Number of bytes of MMIO registers for this group.
>   * @num_events:		Number of events in this group.
>   * @evts:		Array of event descriptors.
> @@ -76,6 +82,7 @@ struct event_group {
>  
>  	/* Remaining fields initialized from XML file. */
>  	u32				guid;
> +	u32				num_rmid;
>  	size_t				mmio_size;
>  	unsigned int			num_events;
>  	struct pmt_event		evts[] __counted_by(num_events);
> @@ -90,6 +97,7 @@ struct event_group {
>  static struct event_group energy_0x26696143 = {
>  	.pfname		= "energy",
>  	.guid		= 0x26696143,
> +	.num_rmid	= 576,
>  	.mmio_size	= XML_MMIO_SIZE(576, 2, 3),
>  	.num_events	= 2,
>  	.evts		= {
> @@ -104,6 +112,7 @@ static struct event_group energy_0x26696143 = {
>  static struct event_group perf_0x26557651 = {
>  	.pfname		= "perf",
>  	.guid		= 0x26557651,
> +	.num_rmid	= 576,
>  	.mmio_size	= XML_MMIO_SIZE(576, 7, 3),
>  	.num_events	= 7,
>  	.evts		= {
> @@ -199,6 +208,24 @@ static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_g
>  	return usable_regions;
>  }
>  
> +static bool all_regions_have_sufficient_rmid(struct event_group *e, struct pmt_feature_group *p)
> +{
> +	struct telemetry_region *tr;
> +	bool ret = true;
> +
> +	for (int i = 0; i < p->count; i++) {
> +		if (!p->regions[i].addr)
> +			continue;
> +		tr = &p->regions[i];
> +		if (tr->num_rmids < e->num_rmid) {
> +			e->force_off = true;
> +			ret = false;

Can this function just return here? 

> +		}
> +	}
> +
> +	return ret;
> +}
> +
>  static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
>  {
>  	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
> @@ -210,6 +237,27 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
>  	if (!group_has_usable_regions(e, p))
>  		return false;
>  
> +	/*
> +	 * Only enable feature with insufficient RMIDs if the user requested

"feature" -> "event group"?

> +	 * it from the kernel command line.
> +	 */
> +	if (!all_regions_have_sufficient_rmid(e, p) && !e->force_on) {
> +		pr_info("%s %s:0x%x monitoring not enabled due to insufficient RMIDs\n",
> +			r->name, e->pfname, e->guid);
> +		return false;
> +	}
> +
> +	for (int i = 0; i < p->count; i++) {
> +		if (!p->regions[i].addr)
> +			continue;
> +		/*
> +		 * e->num_rmid only adjusted lower if user (via rdt= kernel
> +		 * parameter) forces an event group with insufficient RMID
> +		 * to be enabled.
> +		 */
> +		e->num_rmid = min(e->num_rmid, p->regions[i].num_rmids);
> +	}
> +
>  	for (int j = 0; j < e->num_events; j++) {
>  		if (!resctrl_enable_mon_event(e->evts[j].id, true,
>  					      e->evts[j].bin_bits, &e->evts[j]))
> @@ -220,7 +268,12 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
>  		return false;
>  	}
>  
> -	return skipped_events < e->num_events;

As mentioned in earlier patch, this can just be "return true" from the beginning?

> +	if (r->mon.num_rmid)
> +		r->mon.num_rmid = min(r->mon.num_rmid, e->num_rmid);
> +	else
> +		r->mon.num_rmid = e->num_rmid;
> +
> +	return true;
>  }
>  
>  static enum pmt_feature_id lookup_pfid(const char *pfname)
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index ac3c6e44b7c5..60ce2390723e 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -1157,7 +1157,7 @@ static int rdt_num_rmids_show(struct kernfs_open_file *of,
>  {
>  	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
>  
> -	seq_printf(seq, "%d\n", r->mon.num_rmid);
> +	seq_printf(seq, "%u\n", r->mon.num_rmid);
>  
>  	return 0;
>  }

Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ