[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3f152477-89fc-484a-bcfa-f625b6b630e3@intel.com>
Date: Fri, 18 Apr 2025 17:30:50 -0700
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
<Dave.Martin@....com>, Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>
CC: <linux-kernel@...r.kernel.org>, <patches@...ts.linux.dev>
Subject: Re: [PATCH v3 15/26] x86/resctrl: Second stage of telemetry event
enumeration
Hi Tony,
On 4/7/25 4:40 PM, Tony Luck wrote:
> Scan the telemetry_region structures looking for recognised guid
Please add context before description of what patch does.
Also, please pick British or American English and stick with it.
> values. Count how many are found in each package.
>
> Note that telemetry support depends on at least one of the
> original RDT monitoring features being enabled (so that the
> CPU hotplug notifiers for resctrl are running).
>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
> arch/x86/kernel/cpu/resctrl/intel_aet.c | 112 +++++++++++++++++++++++-
> 1 file changed, 110 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 8e531ad279b5..9d414dd40f8b 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -23,10 +23,100 @@
> static struct pmt_feature_group *feat_energy;
> static struct pmt_feature_group *feat_perf;
>
> +/* Per-package event groups active on this machine */
> +static struct pkg_info {
> + int count;
> + struct telemetry_region *regions;
> +} *pkg_info;
> +
> +/**
> + * struct pmt_event - Telemetry event.
Why does it need "pmt" prefix? Can it be "telem_event" to
match telem_entry?
> + * @evtid: Resctrl event id
> + * @evt_offset: MMIO offset of counter
> + * @type: Type for format user display of event value
I cannot make sense of "Type for format user display of event value"
> + */
> +struct pmt_event {
> + enum resctrl_event_id evtid;
> + int evt_offset;
> + enum resctrl_event_type type;
> +};
> +
> +/**
> + * struct telem_entry - Summarized form from XML telemetry description
Copying from v2 review:
"It is not clear to me how useful it is to document that this is
"Summarized form from XML telemetry description". Either more detail should
be added to help reader understand what XML is being talked about or
the description should be a summary of what this data structure represents."
> + * @name: Name for this group of events
> + * @guid: Unique ID for this group
> + * @size: Size of MMIO mapped counter registers
> + * @num_rmids: Number of RMIDS supported
> + * @overflow_counter_off: Offset of overflow count
Description just rewrites member name and changes "counter" to "count".
Could description have more details about what is represented by this?
What overflowed?
> + * @last_overflow_tstamp_off: Offset of overflow timestamp
What overflowed at this timestamp?
> + * @last_update_tstamp_off: Offset of last update timestamp
What was updated at this timestamp?
> + * @active: Marks this group as active on this system
What does it mean when a group is "active"?
> + * @num_events: Size of @evts array
Would __counted_by() be useful?
> + * @evts: Telemetry events in this group
> + */
> +struct telem_entry {
> + char *name;
> + int guid;
> + int size;
> + int num_rmids;
> + int overflow_counter_off;
> + int last_overflow_tstamp_off;
> + int last_update_tstamp_off;
Most of types are "int" ... I do not expect many of these types to be
negative so I would like to check if int is most appropriate for all?
Usually size_t is used for size and off_t/loff_t is available for
offsets.
> + bool active;
> + int num_events;
> + struct pmt_event evts[];
(missing tab)
> +};
> +
> +/* All known telemetry event groups */
This is more useful by not being "All known Summarized form from XML telemetry description".
> +static struct telem_entry *telem_entry[] = {
> + NULL
> +};
> +
> +/*
> + * Scan a feature group looking for guids recognized
Switch from British to American English in same patch.
> + * and update the per-package counts of known groups.
> + */
> +static bool count_events(struct pkg_info *pkg, int max_pkgs, struct pmt_feature_group *p)
> +{
> + struct telem_entry **tentry;
> + bool found = false;
> +
> + if (IS_ERR_OR_NULL(p))
> + return false;
> +
> + for (int i = 0; i < p->count; i++) {
> + struct telemetry_region *tr = &p->regions[i];
> +
> + for (tentry = telem_entry; *tentry; tentry++) {
> + if (tr->guid == (*tentry)->guid) {
> + if (tr->plat_info.package_id > max_pkgs) {
Should this be >=?
> + pr_warn_once("Bad package %d\n", tr->plat_info.package_id);
> + continue;
> + }
> + if (tr->size > (*tentry)->size) {
> + pr_warn_once("MMIO region for guid 0x%x too small\n", tr->guid);
> + continue;
> + }
> + found = true;
> + (*tentry)->active = true;
> + pkg[tr->plat_info.package_id].count++;
> + break;
> + }
> + }
> + }
> +
> + return found;
> +}
> +
> DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *, \
> if (!IS_ERR_OR_NULL(_T)) \
> intel_pmt_put_feature_group(_T))
>
> +DEFINE_FREE(free_pkg_info, struct pkg_info *, \
> + if (_T) \
> + for (int i = 0; i < topology_max_packages(); i++) \
> + kfree(_T[i].regions); \
> + kfree(_T))
> /*
> * Ask OOBMSM discovery driver for all the RMID based telemetry groups
> * that it supports.
> @@ -35,20 +125,32 @@ bool intel_aet_get_events(void)
> {
> struct pmt_feature_group *p1 __free(intel_pmt_put_feature_group) = NULL;
> struct pmt_feature_group *p2 __free(intel_pmt_put_feature_group) = NULL;
> + struct pkg_info *pkg __free(free_pkg_info) = NULL;
> + int num_pkgs = topology_max_packages();
> bool use_p1, use_p2;
>
> + pkg = kcalloc(num_pkgs, sizeof(*pkg_info), GFP_KERNEL);
sizeof(*pkg)?
Reinette
Powered by blists - more mailing lists