[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7bfbc2b0-d4a2-4c2c-908f-6d3eb98ae1f5@intel.com>
Date: Fri, 30 May 2025 11:11:47 -0700
From: "Keshavamurthy, Anil S" <anil.s.keshavamurthy@...el.com>
To: Qinyun Tan <qinyuntan@...ux.alibaba.com>, Tony Luck <tony.luck@...el.com>
CC: "H . Peter Anvin" <hpa@...or.com>, <linux-kernel@...r.kernel.org>,
<x86@...nel.org>, Reinette Chatre <reinette.chatre@...el.com>
Subject: Re: [PATCH V4 1/1] x86/resctrl: Remove unappropriate references to
cacheinfo in the resctrl subsystem.
Hi Qinyun,
Ever since this resctrl subsystem has been refactored to support other
architectures by moving common files related to filesystem handling into
fs/resctrl/*, I have seen folks started to tag the subject line with
"x86,fs/resctrl:" when patch touches files under fs/resctrl. Since your
patches touches files under both arch/x86 and fs/resctrl/*, can you
please accommodate this subject tag -> "x86,fs/resctrl:" so we are
consistent.
/Anil
On 5/30/2025 10:50 AM, Qinyun Tan wrote:
> In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain
> structure representing a NUMA node relies on the cacheinfo interface
> (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map)
> for monitoring. The L3 cache information of a SNC NUMA node determines
> which domains are summed for the "top level" L3-scoped events.
>
> rdt_mon_domain::ci is initialized using the first online CPU of a NUMA
> node. When this CPU goes offline, its shared_cpu_map is cleared to contain
> only the offline CPU itself. Subsequently, attempting to read counters
> via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning
> zero values for "top-level events" without any error indication. Replace
> the cacheinfo references in struct rdt_mon_domain and struct rmid_read
> with the cacheinfo ID (a unique identifier for the L3 cache).
>
> rdt_domain_hdr::cpu_mask contains the online CPUs associated with that
> domain. When reading "top-level events", select a CPU from
> rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine
> valid CPUs for reading RMID counter via the MSR interface.
>
> Considering all CPUs associated with the L3 cache improves the chances
> of picking a housekeeping CPU on which the counter reading work can be
> queued, avoiding an unnecessary IPI.
>
> Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files")
> Signed-off-by: Qinyun Tan <qinyuntan@...ux.alibaba.com>
> Tested-by: Tony Luck <tony.luck@...el.com>
> Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>
> ---
> arch/x86/kernel/cpu/resctrl/core.c | 6 ++++--
> fs/resctrl/ctrlmondata.c | 13 +++++++++----
> fs/resctrl/internal.h | 4 ++--
> fs/resctrl/monitor.c | 6 ++++--
> fs/resctrl/rdtgroup.c | 6 +++---
> include/linux/resctrl.h | 4 ++--
> 6 files changed, 24 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 7109cbfcad4fd..187d527ef73b6 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
> struct rdt_hw_mon_domain *hw_dom;
> struct rdt_domain_hdr *hdr;
> struct rdt_mon_domain *d;
> + struct cacheinfo *ci;
> int err;
>
> lockdep_assert_held(&domain_list_lock);
> @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
> d = &hw_dom->d_resctrl;
> d->hdr.id = id;
> d->hdr.type = RESCTRL_MON_DOMAIN;
> - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
> - if (!d->ci) {
> + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
> + if (!ci) {
> pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
> mon_domain_free(hw_dom);
> return;
> }
> + d->ci_id = ci->id;
> cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
>
> arch_mon_domain_online(r, d);
> diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
> index 6ed2dfd4dbbd9..d98e0d2de09fd 100644
> --- a/fs/resctrl/ctrlmondata.c
> +++ b/fs/resctrl/ctrlmondata.c
> @@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> struct rmid_read rr = {0};
> struct rdt_mon_domain *d;
> struct rdtgroup *rdtgrp;
> + int domid, cpu, ret = 0;
> struct rdt_resource *r;
> + struct cacheinfo *ci;
> struct mon_data *md;
> - int domid, ret = 0;
>
> rdtgrp = rdtgroup_kn_lock_live(of->kn);
> if (!rdtgrp) {
> @@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> * one that matches this cache id.
> */
> list_for_each_entry(d, &r->mon_domains, hdr.list) {
> - if (d->ci->id == domid) {
> - rr.ci = d->ci;
> + if (d->ci_id == domid) {
> + rr.ci_id = d->ci_id;
> + cpu = cpumask_any(&d->hdr.cpu_mask);
> + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
> + if (!ci)
> + continue;
> mon_event_read(&rr, r, NULL, rdtgrp,
> - &d->ci->shared_cpu_map, evtid, false);
> + &ci->shared_cpu_map, evtid, false);
> goto checkresult;
> }
> }
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index 9a8cf6f11151d..0a1eedba2b03a 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -98,7 +98,7 @@ struct mon_data {
> * domains in @r sharing L3 @ci.id
> * @evtid: Which monitor event to read.
> * @first: Initialize MBM counter when true.
> - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
> + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
> * @err: Error encountered when reading counter.
> * @val: Returned value of event counter. If @rgrp is a parent resource group,
> * @val includes the sum of event counts from its child resource groups.
> @@ -112,7 +112,7 @@ struct rmid_read {
> struct rdt_mon_domain *d;
> enum resctrl_event_id evtid;
> bool first;
> - struct cacheinfo *ci;
> + unsigned int ci_id;
> int err;
> u64 val;
> void *arch_mon_ctx;
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index bde2801289d35..f5637855c3aca 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
> {
> int cpu = smp_processor_id();
> struct rdt_mon_domain *d;
> + struct cacheinfo *ci;
> struct mbm_state *m;
> int err, ret;
> u64 tval = 0;
> @@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
> }
>
> /* Summing domains that share a cache, must be on a CPU for that cache. */
> - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
> + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
> + if (!ci || ci->id != rr->ci_id)
> return -EINVAL;
>
> /*
> @@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
> */
> ret = -EINVAL;
> list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
> - if (d->ci->id != rr->ci->id)
> + if (d->ci_id != rr->ci_id)
> continue;
> err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
> rr->evtid, &tval, rr->arch_mon_ctx);
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index cc37f58b47dd7..74b25bbb9872c 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -3034,7 +3034,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> char name[32];
>
> snc_mode = r->mon_scope == RESCTRL_L3_NODE;
> - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
> + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
> if (snc_mode)
> sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
>
> @@ -3059,7 +3059,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> return -EPERM;
>
> list_for_each_entry(mevt, &r->evt_list, list) {
> - domid = do_sum ? d->ci->id : d->hdr.id;
> + domid = do_sum ? d->ci_id : d->hdr.id;
> priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
> if (WARN_ON_ONCE(!priv))
> return -EINVAL;
> @@ -3087,7 +3087,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> lockdep_assert_held(&rdtgroup_mutex);
>
> snc_mode = r->mon_scope == RESCTRL_L3_NODE;
> - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
> + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
> kn = kernfs_find_and_get(parent_kn, name);
> if (kn) {
> /*
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index 9ba771f2ddead..6fb4894b8cfd1 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -159,7 +159,7 @@ struct rdt_ctrl_domain {
> /**
> * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
> * @hdr: common header for different domain types
> - * @ci: cache info for this domain
> + * @ci_id: cache info id for this domain
> * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
> * @mbm_total: saved state for MBM total bandwidth
> * @mbm_local: saved state for MBM local bandwidth
> @@ -170,7 +170,7 @@ struct rdt_ctrl_domain {
> */
> struct rdt_mon_domain {
> struct rdt_domain_hdr hdr;
> - struct cacheinfo *ci;
> + unsigned int ci_id;
> unsigned long *rmid_busy_llc;
> struct mbm_state *mbm_total;
> struct mbm_state *mbm_local;
Powered by blists - more mailing lists