lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e8314281-2778-4cbd-be01-0ac00b8775df@intel.com>
Date: Fri, 18 Apr 2025 18:53:40 -0700
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
 Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
	<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
	<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
	<Dave.Martin@....com>, Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>
CC: <linux-kernel@...r.kernel.org>, <patches@...ts.linux.dev>
Subject: Re: [PATCH v3 18/26] x86/resctrl: Add code to read core telemetry
 events

Hi Tony,

(deja vu ... "Add code to" can be dropped)

On 4/7/25 4:40 PM, Tony Luck wrote:
> The new telemetry events will be part of a new resctrl resource.
> Add the RDT_RESOURCE_PERF_PKG to enum resctrl_res_level.

Please follow tip changelog structure custom throughout this series.

> 
> Add hook resctrl_arch_rmid_read() to pass reads on this
> resource to the telemetry code.
> 
> There may be multiple devices tracking each package, so scan all of them
> and add up counters.
> 
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  include/linux/resctrl_types.h           |  1 +
>  arch/x86/kernel/cpu/resctrl/internal.h  |  5 +++
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 58 +++++++++++++++++++++++++
>  arch/x86/kernel/cpu/resctrl/monitor.c   |  6 +++
>  4 files changed, 70 insertions(+)
> 
> diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> index fbd4b55c41aa..3354f21e82ad 100644
> --- a/include/linux/resctrl_types.h
> +++ b/include/linux/resctrl_types.h
> @@ -39,6 +39,7 @@ enum resctrl_res_level {
>  	RDT_RESOURCE_L2,
>  	RDT_RESOURCE_MBA,
>  	RDT_RESOURCE_SMBA,
> +	RDT_RESOURCE_PERF_PKG,
>  
>  	/* Must be the last */
>  	RDT_NUM_RESOURCES,
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 70b63bbc429d..1b1cbb948a9a 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -175,9 +175,14 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
>  #ifdef CONFIG_INTEL_AET_RESCTRL
>  bool intel_aet_get_events(void);
>  void __exit intel_aet_exit(void);
> +int intel_aet_read_event(int domid, int rmid, int evtid, u64 *val);

This can use enum resctrl_event_id for evtid?

>  #else
>  static inline bool intel_aet_get_events(void) { return false; }
>  static inline void intel_aet_exit(void) { };
> +static inline int intel_aet_read_event(int domid, int rmid, int evtid, u64 *val)
> +{
> +	return -EINVAL;
> +}
>  #endif
>  
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 44d2fe747ed8..67a1245858dc 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -73,6 +73,12 @@ static struct evtinfo {
>  	struct pmt_event	*pmt_event;
>  } evtinfo[QOS_NUM_EVENTS];
>  
> +#define EVT_NUM_RMIDS(evtid)	(evtinfo[evtid].telem_entry->num_rmids)
> +#define EVT_NUM_EVENTS(evtid)	(evtinfo[evtid].telem_entry->num_events)
> +#define EVT_GUID(evtid)		(evtinfo[evtid].telem_entry->guid)
> +
> +#define EVT_OFFSET(evtid)	(evtinfo[evtid].pmt_event->evt_offset)

Please open code these or use functions if you need to.

> +
>  /* All known telemetry event groups */
>  static struct telem_entry *telem_entry[] = {
>  	NULL
> @@ -224,3 +230,55 @@ void __exit intel_aet_exit(void)
>  	}
>  	kfree(pkg_info);
>  }
> +
> +#define VALID_BIT	BIT_ULL(63)
> +#define DATA_BITS	GENMASK_ULL(62, 0)
> +
> +/*
> + * Walk the array of telemetry groups on a specific package.
> + * Read and sum values for a specific counter (described by
> + * guid and offset).
> + * Return failure (~0x0ull) if any counter isn't valid.
> + */
> +static u64 scan_pmt_devs(int package, int guid, int offset)
> +{
> +	u64 rval, val;
> +	int ndev = 0;
> +
> +	rval = 0;

This can be done as part of definition.

> +
> +	for (int i = 0; i < pkg_info[package].count; i++) {
> +		if (pkg_info[package].regions[i].guid != guid)
> +			continue;
> +		ndev++;
> +		val = readq(pkg_info[package].regions[i].addr + offset);
> +
> +		if (!(val & VALID_BIT))
> +			return ~0ull;
> +		rval += val & DATA_BITS;
> +	}
> +
> +	return ndev ? rval : ~0ull;
> +}
> +
> +/*
> + * Read counter for an event on a domain (summing all aggregators
> + * on the domain).
> + */
> +int intel_aet_read_event(int domid, int rmid, int evtid, u64 *val)
> +{
> +	u64 evtcount;
> +	int offset;
> +
> +	if (rmid >= EVT_NUM_RMIDS(evtid))
> +		return -ENOENT;
> +
> +	offset = rmid * EVT_NUM_EVENTS(evtid) * sizeof(u64);
> +	offset += EVT_OFFSET(evtid);
> +	evtcount = scan_pmt_devs(domid, EVT_GUID(evtid), offset);
> +
> +	if (evtcount != ~0ull || *val == 0)
> +		*val += evtcount;
> +
> +	return evtcount != ~0ull ? 0 : -EINVAL;
> +}
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 06623d51d006..4fa297d463ba 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -236,6 +236,12 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>  	u32 prmid;
>  	int ret;
>  
> +	if (r->rid == RDT_RESOURCE_PERF_PKG) {
> +		ret = intel_aet_read_event(d->hdr.id, rmid, eventid, val);
> +
> +		return ret ? ret : 0;
> +	}

Not sure if I am missing something at this stage but it looks like,
since resctrl_arch_rmid_read() can now return ENOENT, and rmid_read::err
obtain value of ENOENT, that there may be an
issue when this error is returned since rdtgroup_mondata_show()'s "checkresult"
does not have handling for ENOENT and will attempt to print data to user space.

> +
>  	resctrl_arch_rmid_read_context_check();

Please keep this context check at top of function.

>  
>  	prmid = logical_rmid_to_physical_rmid(cpu, rmid);


Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ