lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7315c42d-07c3-456a-a625-ed0042e03186@intel.com>
Date: Tue, 2 Dec 2025 08:21:12 -0800
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
 Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
	<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
	<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
	<Dave.Martin@....com>, Chen Yu <yu.c.chen@...el.com>
CC: <x86@...nel.org>, <linux-kernel@...r.kernel.org>,
	<patches@...ts.linux.dev>
Subject: Re: [PATCH v14 19/32] x86/resctrl: Find and enable usable telemetry
 events

Hi Tony,

On 11/24/25 10:53 AM, Tony Luck wrote:
> Every event group has a private copy of the data of all telemetry event
> aggregators (aka "telemetry regions") tracking its feature type. Included
> may be regions that have the same feature type but tracking different guid
> from the event group's.
> 
> Traverse the event group's telemetry region data and mark all regions that
> are not usable by the event group as unusable by clearing those regions'
> MMIO addresses. A region is considered unusable if:
> 1) guid does not match the guid of the event group.
> 2) Package ID is invalid.
> 3) The enumerated size of the MMIO region does not match the expected
>    value from the XML description file.
> 
> Hereafter any telemetry region with an MMIO address is considered valid for
> the event group it is associated with.
> 
> Enable all the event group's events as long as there is at least one usable
> region from where data for its events can be read.

Changelog does not mention the "skipped_events" handling added in this version.

> 
> Note that it is architecturally possible that some telemetry events are
> only supported by a subset of the packages in the system. It is not expected
> that systems will ever do this. If they do the user will see event files in
> resctrl that always return "Unavailable".
> 
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  include/linux/resctrl.h                 |  2 +-
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 62 ++++++++++++++++++++++++-
>  fs/resctrl/monitor.c                    | 10 ++--
>  3 files changed, 67 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index b30f99335bbe..14126d228e61 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -414,7 +414,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
>  u32 resctrl_arch_system_num_rmid_idx(void);
>  int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
>  
> -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
> +bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
>  			      unsigned int binary_bits, void *arch_priv);
>  
>  bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid);
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index bb080bdde190..b6b50650e634 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -20,9 +20,11 @@
>  #include <linux/intel_pmt_features.h>
>  #include <linux/intel_vsec.h>
>  #include <linux/overflow.h>
> +#include <linux/printk.h>
>  #include <linux/resctrl.h>
>  #include <linux/resctrl_types.h>
>  #include <linux/stddef.h>
> +#include <linux/topology.h>
>  #include <linux/types.h>
>  
>  #include "internal.h"
> @@ -117,12 +119,68 @@ static struct event_group *known_event_groups[] = {
>  	     _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)];	\
>  	     _peg++)
>  
> -/* Stub for now */
> -static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
> +/*
> + * Clear the address field of regions that did not pass the checks in
> + * skip_telem_region() so they will not be used by intel_aet_read_event().
> + * This is safe to do because intel_pmt_get_regions_by_feature() allocates
> + * a new pmt_feature_group structure to return to each caller and only makes
> + * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
> + * returns the structure.
> + */
> +static void mark_telem_region_unusable(struct telemetry_region *tr)
>  {
> +	tr->addr = NULL;
> +}
> +
> +static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e)
> +{
> +	if (tr->guid != e->guid)
> +		return true;
> +	if (tr->plat_info.package_id >= topology_max_packages()) {
> +		pr_warn("Bad package %u in guid 0x%x\n", tr->plat_info.package_id,
> +			tr->guid);
> +		return true;
> +	}
> +	if (tr->size != e->mmio_size) {
> +		pr_warn("MMIO space wrong size (%zu bytes) for guid 0x%x. Expected %zu bytes.\n",
> +			tr->size, e->guid, e->mmio_size);
> +		return true;
> +	}
> +
>  	return false;
>  }
>  
> +static bool group_has_usable_regions(struct event_group *e, struct pmt_feature_group *p)
> +{
> +	bool usable_regions = false;
> +
> +	for (int i = 0; i < p->count; i++) {
> +		if (skip_telem_region(&p->regions[i], e)) {
> +			mark_telem_region_unusable(&p->regions[i]);
> +			continue;
> +		}
> +		usable_regions = true;
> +	}
> +
> +	return usable_regions;
> +}
> +
> +static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
> +{
> +	int skipped_events = 0;
> +
> +	if (!group_has_usable_regions(e, p))
> +		return false;
> +
> +	for (int j = 0; j < e->num_events; j++) {
> +		if (!resctrl_enable_mon_event(e->evts[j].id, true,
> +					      e->evts[j].bin_bits, &e->evts[j]))
> +			skipped_events++;
> +	}

The snippet below from patch #25 seems to belong here:
	if (e->num_events == skipped_events) { 
		pr_info("No events enabled in %s %s:0x%x\n", r->name, e->name, e->guid);
		return false;
	}

> +
> +	return skipped_events < e->num_events;
> +}
> +
>  /*
>   * Make a request to the INTEL_PMT_TELEMETRY driver for a copy of the
>   * pmt_feature_group for each known feature. If there is one, the returned
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index 4ef91fc09070..338a122dfcff 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -990,25 +990,27 @@ struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
>  	MON_EVENT(PMT_EVENT_UOPS_RETIRED,		"uops_retired",		RDT_RESOURCE_PERF_PKG,	false),
>  };
>  
> -void resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
> +bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
>  			      unsigned int binary_bits, void *arch_priv)
>  {
>  	if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
>  			 binary_bits > MAX_BINARY_BITS))
> -		return;
> +		return false;
>  	if (mon_event_all[eventid].enabled) {
>  		pr_warn("Duplicate enable for event %d\n", eventid);
> -		return;
> +		return false;
>  	}
>  	if (binary_bits && !mon_event_all[eventid].is_floating_point) {
>  		pr_warn("Event %d may not be floating point\n", eventid);
> -		return;
> +		return false;
>  	}
>  
>  	mon_event_all[eventid].any_cpu = any_cpu;
>  	mon_event_all[eventid].binary_bits = binary_bits;
>  	mon_event_all[eventid].arch_priv = arch_priv;
>  	mon_event_all[eventid].enabled = true;
> +
> +	return true;
>  }
>  
>  bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)

Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ