lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2de48392-9517-4859-a932-d54aa6580e1a@intel.com>
Date: Tue, 3 Jun 2025 20:53:12 -0700
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
 Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
	<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
	<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
	<Dave.Martin@....com>, Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>,
	Chen Yu <yu.c.chen@...el.com>
CC: <x86@...nel.org>, <linux-kernel@...r.kernel.org>,
	<patches@...ts.linux.dev>
Subject: Re: [PATCH v5 17/29] x86/resctrl: Discover hardware telemetry events

Hi Tony,

On 5/21/25 3:50 PM, Tony Luck wrote:
> Hardware has one or more telemetry event aggregators per package
> for each group of telemetry events. Each aggregator provides access
> to event counts in an array of 64-bit values in MMIO space. There
> is a "guid" (in this case a unique 32-bit integer) which refers to
> an XML file published in the https://github.com/intel/Intel-PMT
> that provides all the details about each aggregator.
> 
> The XML files provide the following information:
> 1) Which telemetry events are included in the group for this aggregator.
> 2) The order in which the event counters appear for each RMID.
> 3) The value type of each event counter (integer or fixed-point).
> 4) The number of RMIDs supported.
> 5) Which additional aggregator status registers are included.
> 6) The total size of the MMIO region for this aggregator.
> 
> There is an INTEL_PMT_DISCOVERY driver that enumerate all aggregators
> on the system with intel_pmt_get_regions_by_feature(). Call this for
> each pmt_feature_id that indicates per-RMID telemetry.
> 
> Save the returned pmt_feature_group pointers with guids that are known
> to resctrl for use at run time.
> 
> Those pointers are returned to the INTEL_PMT_DISCOVERY driver at
> resctrl_arch_exit() time.
> 
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  arch/x86/kernel/cpu/resctrl/internal.h  |   3 +
>  arch/x86/kernel/cpu/resctrl/core.c      |   5 +
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 129 ++++++++++++++++++++++++
>  arch/x86/kernel/cpu/resctrl/Makefile    |   1 +
>  4 files changed, 138 insertions(+)
>  create mode 100644 arch/x86/kernel/cpu/resctrl/intel_aet.c
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 02c9e7d163dc..2b2d4b5a4643 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -167,4 +167,7 @@ void __init intel_rdt_mbm_apply_quirk(void);
>  
>  void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
>  
> +bool intel_aet_get_events(void);
> +void __exit intel_aet_exit(void);
> +
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index ce4885c751e4..64ce561e77a0 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -727,6 +727,9 @@ void resctrl_arch_pre_mount(void)
>  
>  	if (!atomic_try_cmpxchg(&only_once, &old, 1))
>  		return;
> +
> +	if (!intel_aet_get_events())
> +		return;
>  }
>  
>  enum {
> @@ -1079,6 +1082,8 @@ late_initcall(resctrl_arch_late_init);
>  
>  static void __exit resctrl_arch_exit(void)
>  {
> +	intel_aet_exit();
> +
>  	cpuhp_remove_state(rdt_online);
>  
>  	resctrl_exit();
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> new file mode 100644
> index 000000000000..df73b9476c4d
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -0,0 +1,129 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Resource Director Technology(RDT)
> + * - Intel Application Energy Telemetry
> + *
> + * Copyright (C) 2025 Intel Corporation
> + *
> + * Author:
> + *    Tony Luck <tony.luck@...el.com>
> + */
> +
> +#define pr_fmt(fmt)   "resctrl: " fmt
> +
> +#include <linux/cleanup.h>
> +#include <linux/cpu.h>
> +#include <linux/resctrl.h>
> +
> +/* Temporary - delete from final version */
> +#include "fake_intel_aet_features.h"
> +
> +#include "internal.h"
> +
> +/**
> + * struct event_group - All information about a group of telemetry events.
> + * @pfg:		Points to the aggregated telemetry space information
> + *			within the OOBMSM driver that contains data for all
> + *			telemetry regions.
> + * @guid:		Unique number per XML description file.
> + */
> +struct event_group {
> +	/* Data fields used by this code. */

As opposed to data fields _not_ used by this code?

> +	struct pmt_feature_group	*pfg;
> +
> +	/* Remaining fields initialized from XML file. */
> +	u32				guid;
> +};
> +
> +/*
> + * Link: https://github.com/intel/Intel-PMT
> + * File: xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml
> + */
> +static struct event_group energy_0x26696143 = {
> +	.guid		= 0x26696143,
> +};
> +
> +/*
> + * Link: https://github.com/intel/Intel-PMT
> + * File: xml/CWF/OOBMSM/RMID-PERF/cwf_aggregator.xml
> + */
> +static struct event_group perf_0x26557651 = {
> +	.guid		= 0x26557651,
> +};
> +
> +static struct event_group *known_event_groups[] = {
> +	&energy_0x26696143,
> +	&perf_0x26557651,
> +};
> +
> +#define NUM_KNOWN_GROUPS ARRAY_SIZE(known_event_groups)
> +
> +/* Stub for now */
> +static int configure_events(struct event_group *e, struct pmt_feature_group *p)
> +{
> +	return -EINVAL;
> +}
> +
> +DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *,	\
> +	if (!IS_ERR_OR_NULL(_T))					\
> +		intel_pmt_put_feature_group(_T))
> +

Line continuations are not necessary (checkpatch.pl)

> +/*
> + * Make a request to the INTEL_PMT_DISCOVERY driver for the
> + * pmt_feature_group for a specific feature. If there is
> + * one the returned structure has an array of telemetry_region
> + * structures. Each describes one telemetry aggregator.
> + * Try to configure any with a known matching guid.
> + */
> +static bool get_pmt_feature(enum pmt_feature_id feature)
> +{
> +	struct pmt_feature_group *p __free(intel_pmt_put_feature_group) = NULL;
> +	struct event_group **peg;
> +	bool ret;
> +
> +	p = intel_pmt_get_regions_by_feature(feature);
> +
> +	if (IS_ERR_OR_NULL(p))
> +		return false;
> +

What is the chance of p being valid here but there are no valid aggregators
for this system? (more below)

> +	for (peg = &known_event_groups[0]; peg < &known_event_groups[NUM_KNOWN_GROUPS]; peg++) {
> +		for (int i = 0; i < p->count; i++) {
> +			if ((*peg)->guid == p->regions[i].guid) {

At first this loop looks wrong since it seems to skip some aggregators by only running
configure_events() on the first aggregator found. After digging to understand what this does
it looks like unnecessary duplication to loop through aggregators here to determine if
configure_events() should be called and then to loop through aggregators again
within configure_events(). This is not obvious in this patch but comparing this
to what is coming in patch #18 I wonder if this cannot just be:

	for (peg = &known_event_groups[0]; peg < &known_event_groups[NUM_KNOWN_GROUPS]; peg++) {
		ret = configure_events(*peg, p);
		if (!ret) {
			(*peg)->pfg = no_free_ptr(p);
			return true;
		}
	}

In turn, configure_events() can contain:

	for (int i = 0; i < p->count; i++) {                                    
		tr = &p->regions[i];                                            
		if (skip_this_region(tr, e))                                    
			continue;                                               
		if (!pkgcounts) {
			pkgcounts = kcalloc(num_pkgs, sizeof(*pkgcounts), GFP_KERNEL);
			if (!pkgcounts)
				return -ENOMEM;
		}
		pkgcounts[tr->plat_info.package_id]++;                          
	}                                      

	if (!pkgcounts)
		return -ENODEV; /* TBD error code */
                                                               



> +				ret = configure_events(*peg, p);
> +				if (!ret) {
> +					(*peg)->pfg = no_free_ptr(p);
> +					return true;
> +				}
> +				break;
> +			}
> +		}
> +	}
> +
> +	return false;
> +}
> +
> +/*
> + * Ask OOBMSM discovery driver for all the RMID based telemetry groups
> + * that it supports.
> + */
> +bool intel_aet_get_events(void)
> +{
> +	bool ret1, ret2;
> +
> +	ret1 = get_pmt_feature(FEATURE_PER_RMID_ENERGY_TELEM);
> +	ret2 = get_pmt_feature(FEATURE_PER_RMID_PERF_TELEM);
> +
> +	return ret1 || ret2;
> +}
> +
> +void __exit intel_aet_exit(void)
> +{
> +	struct event_group **peg;
> +
> +	for (peg = &known_event_groups[0]; peg < &known_event_groups[NUM_KNOWN_GROUPS]; peg++) {
> +		if ((*peg)->pfg) {
> +			intel_pmt_put_feature_group((*peg)->pfg);
> +			(*peg)->pfg = NULL;
> +		}
> +	}
> +}
> diff --git a/arch/x86/kernel/cpu/resctrl/Makefile b/arch/x86/kernel/cpu/resctrl/Makefile
> index cf4fac58d068..cca23f06d15d 100644
> --- a/arch/x86/kernel/cpu/resctrl/Makefile
> +++ b/arch/x86/kernel/cpu/resctrl/Makefile
> @@ -1,6 +1,7 @@
>  # SPDX-License-Identifier: GPL-2.0
>  obj-$(CONFIG_X86_CPU_RESCTRL)		+= core.o rdtgroup.o monitor.o
>  obj-$(CONFIG_X86_CPU_RESCTRL)		+= ctrlmondata.o
> +obj-$(CONFIG_X86_CPU_RESCTRL)		+= intel_aet.o
>  obj-$(CONFIG_X86_CPU_RESCTRL)		+= fake_intel_aet_features.o
>  obj-$(CONFIG_RESCTRL_FS_PSEUDO_LOCK)	+= pseudo_lock.o
>  

Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ