[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e8314281-2778-4cbd-be01-0ac00b8775df@intel.com>
Date: Fri, 18 Apr 2025 18:53:40 -0700
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
<Dave.Martin@....com>, Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>
CC: <linux-kernel@...r.kernel.org>, <patches@...ts.linux.dev>
Subject: Re: [PATCH v3 18/26] x86/resctrl: Add code to read core telemetry
events
Hi Tony,
(deja vu ... "Add code to" can be dropped)
On 4/7/25 4:40 PM, Tony Luck wrote:
> The new telemetry events will be part of a new resctrl resource.
> Add the RDT_RESOURCE_PERF_PKG to enum resctrl_res_level.
Please follow tip changelog structure custom throughout this series.
>
> Add hook resctrl_arch_rmid_read() to pass reads on this
> resource to the telemetry code.
>
> There may be multiple devices tracking each package, so scan all of them
> and add up counters.
>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
> include/linux/resctrl_types.h | 1 +
> arch/x86/kernel/cpu/resctrl/internal.h | 5 +++
> arch/x86/kernel/cpu/resctrl/intel_aet.c | 58 +++++++++++++++++++++++++
> arch/x86/kernel/cpu/resctrl/monitor.c | 6 +++
> 4 files changed, 70 insertions(+)
>
> diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h
> index fbd4b55c41aa..3354f21e82ad 100644
> --- a/include/linux/resctrl_types.h
> +++ b/include/linux/resctrl_types.h
> @@ -39,6 +39,7 @@ enum resctrl_res_level {
> RDT_RESOURCE_L2,
> RDT_RESOURCE_MBA,
> RDT_RESOURCE_SMBA,
> + RDT_RESOURCE_PERF_PKG,
>
> /* Must be the last */
> RDT_NUM_RESOURCES,
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 70b63bbc429d..1b1cbb948a9a 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -175,9 +175,14 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
> #ifdef CONFIG_INTEL_AET_RESCTRL
> bool intel_aet_get_events(void);
> void __exit intel_aet_exit(void);
> +int intel_aet_read_event(int domid, int rmid, int evtid, u64 *val);
This can use enum resctrl_event_id for evtid?
> #else
> static inline bool intel_aet_get_events(void) { return false; }
> static inline void intel_aet_exit(void) { };
> +static inline int intel_aet_read_event(int domid, int rmid, int evtid, u64 *val)
> +{
> + return -EINVAL;
> +}
> #endif
>
> #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 44d2fe747ed8..67a1245858dc 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -73,6 +73,12 @@ static struct evtinfo {
> struct pmt_event *pmt_event;
> } evtinfo[QOS_NUM_EVENTS];
>
> +#define EVT_NUM_RMIDS(evtid) (evtinfo[evtid].telem_entry->num_rmids)
> +#define EVT_NUM_EVENTS(evtid) (evtinfo[evtid].telem_entry->num_events)
> +#define EVT_GUID(evtid) (evtinfo[evtid].telem_entry->guid)
> +
> +#define EVT_OFFSET(evtid) (evtinfo[evtid].pmt_event->evt_offset)
Please open code these or use functions if you need to.
> +
> /* All known telemetry event groups */
> static struct telem_entry *telem_entry[] = {
> NULL
> @@ -224,3 +230,55 @@ void __exit intel_aet_exit(void)
> }
> kfree(pkg_info);
> }
> +
> +#define VALID_BIT BIT_ULL(63)
> +#define DATA_BITS GENMASK_ULL(62, 0)
> +
> +/*
> + * Walk the array of telemetry groups on a specific package.
> + * Read and sum values for a specific counter (described by
> + * guid and offset).
> + * Return failure (~0x0ull) if any counter isn't valid.
> + */
> +static u64 scan_pmt_devs(int package, int guid, int offset)
> +{
> + u64 rval, val;
> + int ndev = 0;
> +
> + rval = 0;
This can be done as part of definition.
> +
> + for (int i = 0; i < pkg_info[package].count; i++) {
> + if (pkg_info[package].regions[i].guid != guid)
> + continue;
> + ndev++;
> + val = readq(pkg_info[package].regions[i].addr + offset);
> +
> + if (!(val & VALID_BIT))
> + return ~0ull;
> + rval += val & DATA_BITS;
> + }
> +
> + return ndev ? rval : ~0ull;
> +}
> +
> +/*
> + * Read counter for an event on a domain (summing all aggregators
> + * on the domain).
> + */
> +int intel_aet_read_event(int domid, int rmid, int evtid, u64 *val)
> +{
> + u64 evtcount;
> + int offset;
> +
> + if (rmid >= EVT_NUM_RMIDS(evtid))
> + return -ENOENT;
> +
> + offset = rmid * EVT_NUM_EVENTS(evtid) * sizeof(u64);
> + offset += EVT_OFFSET(evtid);
> + evtcount = scan_pmt_devs(domid, EVT_GUID(evtid), offset);
> +
> + if (evtcount != ~0ull || *val == 0)
> + *val += evtcount;
> +
> + return evtcount != ~0ull ? 0 : -EINVAL;
> +}
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 06623d51d006..4fa297d463ba 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -236,6 +236,12 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
> u32 prmid;
> int ret;
>
> + if (r->rid == RDT_RESOURCE_PERF_PKG) {
> + ret = intel_aet_read_event(d->hdr.id, rmid, eventid, val);
> +
> + return ret ? ret : 0;
> + }
Not sure if I am missing something at this stage but it looks like,
since resctrl_arch_rmid_read() can now return ENOENT, and rmid_read::err
obtain value of ENOENT, that there may be an
issue when this error is returned since rdtgroup_mondata_show()'s "checkresult"
does not have handling for ENOENT and will attempt to print data to user space.
> +
> resctrl_arch_rmid_read_context_check();
Please keep this context check at top of function.
>
> prmid = logical_rmid_to_physical_rmid(cpu, rmid);
Reinette
Powered by blists - more mailing lists