lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3f152477-89fc-484a-bcfa-f625b6b630e3@intel.com>
Date: Fri, 18 Apr 2025 17:30:50 -0700
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
 Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
	<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
	<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
	<Dave.Martin@....com>, Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>
CC: <linux-kernel@...r.kernel.org>, <patches@...ts.linux.dev>
Subject: Re: [PATCH v3 15/26] x86/resctrl: Second stage of telemetry event
 enumeration

Hi Tony,

On 4/7/25 4:40 PM, Tony Luck wrote:
> Scan the telemetry_region structures looking for recognised guid

Please add context before description of what patch does.

Also, please pick British or American English and stick with it.

> values. Count how many are found in each package.
> 
> Note that telemetry support depends on at least one of the
> original RDT monitoring features being enabled (so that the
> CPU hotplug notifiers for resctrl are running).
> 
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 112 +++++++++++++++++++++++-
>  1 file changed, 110 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 8e531ad279b5..9d414dd40f8b 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -23,10 +23,100 @@
>  static struct pmt_feature_group *feat_energy;
>  static struct pmt_feature_group *feat_perf;
>  
> +/* Per-package event groups active on this machine */
> +static struct pkg_info {
> +	int			count;
> +	struct telemetry_region	*regions;
> +} *pkg_info;
> +
> +/**
> + * struct pmt_event - Telemetry event.

Why does it need "pmt" prefix? Can it be "telem_event" to
match telem_entry?

> + * @evtid:	Resctrl event id
> + * @evt_offset:	MMIO offset of counter
> + * @type:	Type for format user display of event value

I cannot make sense of "Type for format user display of event value"

> + */
> +struct pmt_event {
> +	enum resctrl_event_id	evtid;
> +	int			evt_offset;
> +	enum resctrl_event_type	type;
> +};
> +
> +/**
> + * struct telem_entry - Summarized form from XML telemetry description

Copying from v2 review:
"It is not clear to me how useful it is to document that this is
"Summarized form from XML telemetry description". Either more detail should
be added to help reader understand what XML is being talked about or
the description should be a summary of what this data structure represents."


> + * @name:			Name for this group of events
> + * @guid:			Unique ID for this group
> + * @size:			Size of MMIO mapped counter registers
> + * @num_rmids:			Number of RMIDS supported
> + * @overflow_counter_off:	Offset of overflow count

Description just rewrites member name and changes "counter" to "count".
Could description have more details about what is represented by this?
What overflowed? 

> + * @last_overflow_tstamp_off:	Offset of overflow timestamp

What overflowed at this timestamp?

> + * @last_update_tstamp_off:	Offset of last update timestamp
What was updated at this timestamp?

> + * @active:			Marks this group as active on this system

What does it mean when a group is "active"?

> + * @num_events:			Size of @evts array

Would __counted_by() be useful?

> + * @evts:			Telemetry events in this group
> + */
> +struct telem_entry {
> +	char	*name;
> +	int	guid;
> +	int	size;
> +	int	num_rmids;
> +	int	overflow_counter_off;
> +	int	last_overflow_tstamp_off;
> +	int	last_update_tstamp_off;

Most of types are "int" ... I do not expect many of these types to be
negative so I would like to check if int is most appropriate for all?
Usually size_t is used for size and off_t/loff_t is available for
offsets.

> +	bool	active;
> +	int	num_events;
> +	struct pmt_event evts[];

(missing tab)

> +};
> +
> +/* All known telemetry event groups */

This is more useful by not being "All known Summarized form from XML telemetry description".

> +static struct telem_entry *telem_entry[] = {
> +	NULL
> +};
> +
> +/*
> + * Scan a feature group looking for guids recognized

Switch from British to American English in same patch.

> + * and update the per-package counts of known groups.
> + */
> +static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_group *p)
> +{
> +	struct telem_entry **tentry;
> +	bool found = false;
> +
> +	if (IS_ERR_OR_NULL(p))
> +		return false;
> +
> +	for (int i = 0; i < p->count; i++) {
> +		struct telemetry_region *tr = &p->regions[i];
> +
> +		for (tentry = telem_entry; *tentry; tentry++) {
> +			if (tr->guid == (*tentry)->guid) {
> +				if (tr->plat_info.package_id > max_pkgs) {

Should this be >=?

> +					pr_warn_once("Bad package %d\n", tr->plat_info.package_id);
> +					continue;
> +				}
> +				if (tr->size > (*tentry)->size) {
> +					pr_warn_once("MMIO region for guid 0x%x too small\n", tr->guid);
> +					continue;
> +				}
> +				found = true;
> +				(*tentry)->active = true;
> +				pkg[tr->plat_info.package_id].count++;
> +				break;
> +			}
> +		}
> +	}
> +
> +	return found;
> +}
> +
>  DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *,	\
>  	if (!IS_ERR_OR_NULL(_T))					\
>  		intel_pmt_put_feature_group(_T))
>  
> +DEFINE_FREE(free_pkg_info, struct pkg_info *,				\
> +	if (_T)								\
> +		for (int i = 0; i < topology_max_packages(); i++)	\
> +			kfree(_T[i].regions);				\
> +	kfree(_T))
>  /*
>   * Ask OOBMSM discovery driver for all the RMID based telemetry groups
>   * that it supports.
> @@ -35,20 +125,32 @@ bool intel_aet_get_events(void)
>  {
>  	struct pmt_feature_group *p1 __free(intel_pmt_put_feature_group) = NULL;
>  	struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
> +	struct pkg_info *pkg __free(free_pkg_info) = NULL;
> +	int num_pkgs = topology_max_packages();
>  	bool use_p1, use_p2;
>  
> +	pkg = kcalloc(num_pkgs, sizeof(*pkg_info), GFP_KERNEL);

sizeof(*pkg)?

Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ