[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <e37d46f1-9b60-4809-b929-79244ff98189@linux.alibaba.com>
Date: Sat, 31 May 2025 02:16:43 +0800
From: qinyuntan <qinyuntan@...ux.alibaba.com>
To: "Keshavamurthy, Anil S" <anil.s.keshavamurthy@...el.com>,
Tony Luck <tony.luck@...el.com>
Cc: "H . Peter Anvin" <hpa@...or.com>, linux-kernel@...r.kernel.org,
x86@...nel.org, Reinette Chatre <reinette.chatre@...el.com>
Subject: Re: [PATCH V4 1/1] x86/resctrl: Remove unappropriate references to
cacheinfo in the resctrl subsystem.
Hi, Keshavamurthy, Anil S:
Ok, This suggestion is very reasonable. A few seconds before receiving
your email, I have sent the Patch of version 5, so just let me send the
Patch of version 6 again :).
On 5/31/25 2:11 AM, Keshavamurthy, Anil S wrote:
> Hi Qinyun,
>
> Ever since this resctrl subsystem has been refactored to support other
> architectures by moving common files related to filesystem handling into
> fs/resctrl/*, I have seen folks started to tag the subject line with
> "x86,fs/resctrl:" when patch touches files under fs/resctrl. Since your
> patches touches files under both arch/x86 and fs/resctrl/*, can you
> please accommodate this subject tag -> "x86,fs/resctrl:" so we are
> consistent.
>
> /Anil
>
> On 5/30/2025 10:50 AM, Qinyun Tan wrote:
>> In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the
>> rdt_mon_domain
>> structure representing a NUMA node relies on the cacheinfo interface
>> (rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map)
>> for monitoring. The L3 cache information of a SNC NUMA node determines
>> which domains are summed for the "top level" L3-scoped events.
>>
>> rdt_mon_domain::ci is initialized using the first online CPU of a NUMA
>> node. When this CPU goes offline, its shared_cpu_map is cleared to
>> contain
>> only the offline CPU itself. Subsequently, attempting to read counters
>> via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning
>> zero values for "top-level events" without any error indication. Replace
>> the cacheinfo references in struct rdt_mon_domain and struct rmid_read
>> with the cacheinfo ID (a unique identifier for the L3 cache).
>>
>> rdt_domain_hdr::cpu_mask contains the online CPUs associated with that
>> domain. When reading "top-level events", select a CPU from
>> rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine
>> valid CPUs for reading RMID counter via the MSR interface.
>>
>> Considering all CPUs associated with the L3 cache improves the chances
>> of picking a housekeeping CPU on which the counter reading work can be
>> queued, avoiding an unnecessary IPI.
>>
>> Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster
>> (SNC) monitor files")
>> Signed-off-by: Qinyun Tan <qinyuntan@...ux.alibaba.com>
>> Tested-by: Tony Luck <tony.luck@...el.com>
>> Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>
>> ---
>> arch/x86/kernel/cpu/resctrl/core.c | 6 ++++--
>> fs/resctrl/ctrlmondata.c | 13 +++++++++----
>> fs/resctrl/internal.h | 4 ++--
>> fs/resctrl/monitor.c | 6 ++++--
>> fs/resctrl/rdtgroup.c | 6 +++---
>> include/linux/resctrl.h | 4 ++--
>> 6 files changed, 24 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/
>> resctrl/core.c
>> index 7109cbfcad4fd..187d527ef73b6 100644
>> --- a/arch/x86/kernel/cpu/resctrl/core.c
>> +++ b/arch/x86/kernel/cpu/resctrl/core.c
>> @@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct
>> rdt_resource *r)
>> struct rdt_hw_mon_domain *hw_dom;
>> struct rdt_domain_hdr *hdr;
>> struct rdt_mon_domain *d;
>> + struct cacheinfo *ci;
>> int err;
>> lockdep_assert_held(&domain_list_lock);
>> @@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct
>> rdt_resource *r)
>> d = &hw_dom->d_resctrl;
>> d->hdr.id = id;
>> d->hdr.type = RESCTRL_MON_DOMAIN;
>> - d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
>> - if (!d->ci) {
>> + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
>> + if (!ci) {
>> pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n",
>> cpu, r->name);
>> mon_domain_free(hw_dom);
>> return;
>> }
>> + d->ci_id = ci->id;
>> cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
>> arch_mon_domain_online(r, d);
>> diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
>> index 6ed2dfd4dbbd9..d98e0d2de09fd 100644
>> --- a/fs/resctrl/ctrlmondata.c
>> +++ b/fs/resctrl/ctrlmondata.c
>> @@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m,
>> void *arg)
>> struct rmid_read rr = {0};
>> struct rdt_mon_domain *d;
>> struct rdtgroup *rdtgrp;
>> + int domid, cpu, ret = 0;
>> struct rdt_resource *r;
>> + struct cacheinfo *ci;
>> struct mon_data *md;
>> - int domid, ret = 0;
>> rdtgrp = rdtgroup_kn_lock_live(of->kn);
>> if (!rdtgrp) {
>> @@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m,
>> void *arg)
>> * one that matches this cache id.
>> */
>> list_for_each_entry(d, &r->mon_domains, hdr.list) {
>> - if (d->ci->id == domid) {
>> - rr.ci = d->ci;
>> + if (d->ci_id == domid) {
>> + rr.ci_id = d->ci_id;
>> + cpu = cpumask_any(&d->hdr.cpu_mask);
>> + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
>> + if (!ci)
>> + continue;
>> mon_event_read(&rr, r, NULL, rdtgrp,
>> - &d->ci->shared_cpu_map, evtid, false);
>> + &ci->shared_cpu_map, evtid, false);
>> goto checkresult;
>> }
>> }
>> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
>> index 9a8cf6f11151d..0a1eedba2b03a 100644
>> --- a/fs/resctrl/internal.h
>> +++ b/fs/resctrl/internal.h
>> @@ -98,7 +98,7 @@ struct mon_data {
>> * domains in @r sharing L3 @ci.id
>> * @evtid: Which monitor event to read.
>> * @first: Initialize MBM counter when true.
>> - * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when
>> summing domains.
>> + * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when
>> summing domains.
>> * @err: Error encountered when reading counter.
>> * @val: Returned value of event counter. If @rgrp is a parent
>> resource group,
>> * @val includes the sum of event counts from its child
>> resource groups.
>> @@ -112,7 +112,7 @@ struct rmid_read {
>> struct rdt_mon_domain *d;
>> enum resctrl_event_id evtid;
>> bool first;
>> - struct cacheinfo *ci;
>> + unsigned int ci_id;
>> int err;
>> u64 val;
>> void *arch_mon_ctx;
>> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
>> index bde2801289d35..f5637855c3aca 100644
>> --- a/fs/resctrl/monitor.c
>> +++ b/fs/resctrl/monitor.c
>> @@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid,
>> struct rmid_read *rr)
>> {
>> int cpu = smp_processor_id();
>> struct rdt_mon_domain *d;
>> + struct cacheinfo *ci;
>> struct mbm_state *m;
>> int err, ret;
>> u64 tval = 0;
>> @@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid,
>> struct rmid_read *rr)
>> }
>> /* Summing domains that share a cache, must be on a CPU for that
>> cache. */
>> - if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
>> + ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
>> + if (!ci || ci->id != rr->ci_id)
>> return -EINVAL;
>> /*
>> @@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid,
>> struct rmid_read *rr)
>> */
>> ret = -EINVAL;
>> list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
>> - if (d->ci->id != rr->ci->id)
>> + if (d->ci_id != rr->ci_id)
>> continue;
>> err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
>> rr->evtid, &tval, rr->arch_mon_ctx);
>> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
>> index cc37f58b47dd7..74b25bbb9872c 100644
>> --- a/fs/resctrl/rdtgroup.c
>> +++ b/fs/resctrl/rdtgroup.c
>> @@ -3034,7 +3034,7 @@ static void
>> rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
>> char name[32];
>> snc_mode = r->mon_scope == RESCTRL_L3_NODE;
>> - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d-
>> >hdr.id);
>> + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d-
>> >hdr.id);
>> if (snc_mode)
>> sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
>> @@ -3059,7 +3059,7 @@ static int mon_add_all_files(struct kernfs_node
>> *kn, struct rdt_mon_domain *d,
>> return -EPERM;
>> list_for_each_entry(mevt, &r->evt_list, list) {
>> - domid = do_sum ? d->ci->id : d->hdr.id;
>> + domid = do_sum ? d->ci_id : d->hdr.id;
>> priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
>> if (WARN_ON_ONCE(!priv))
>> return -EINVAL;
>> @@ -3087,7 +3087,7 @@ static int mkdir_mondata_subdir(struct
>> kernfs_node *parent_kn,
>> lockdep_assert_held(&rdtgroup_mutex);
>> snc_mode = r->mon_scope == RESCTRL_L3_NODE;
>> - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d-
>> >hdr.id);
>> + sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d-
>> >hdr.id);
>> kn = kernfs_find_and_get(parent_kn, name);
>> if (kn) {
>> /*
>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>> index 9ba771f2ddead..6fb4894b8cfd1 100644
>> --- a/include/linux/resctrl.h
>> +++ b/include/linux/resctrl.h
>> @@ -159,7 +159,7 @@ struct rdt_ctrl_domain {
>> /**
>> * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor
>> resource
>> * @hdr: common header for different domain types
>> - * @ci: cache info for this domain
>> + * @ci_id: cache info id for this domain
>> * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
>> * @mbm_total: saved state for MBM total bandwidth
>> * @mbm_local: saved state for MBM local bandwidth
>> @@ -170,7 +170,7 @@ struct rdt_ctrl_domain {
>> */
>> struct rdt_mon_domain {
>> struct rdt_domain_hdr hdr;
>> - struct cacheinfo *ci;
>> + unsigned int ci_id;
>> unsigned long *rmid_busy_llc;
>> struct mbm_state *mbm_total;
>> struct mbm_state *mbm_local;
Powered by blists - more mailing lists