[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250530182053.37502-2-qinyuntan@linux.alibaba.com>
Date: Sat, 31 May 2025 02:20:53 +0800
From: Qinyun Tan <qinyuntan@...ux.alibaba.com>
To: Tony Luck <tony.luck@...el.com>
Cc: "H . Peter Anvin" <hpa@...or.com>,
linux-kernel@...r.kernel.org,
x86@...nel.org,
Reinette Chatre <reinette.chatre@...el.com>,
Qinyun Tan <qinyuntan@...ux.alibaba.com>
Subject: [PATCH V6 1/1] x86,fs/resctrl: Remove unappropriate references to cacheinfo in the resctrl subsystem.
In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain
structure representing a NUMA node relies on the cacheinfo interface
(rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map)
for monitoring. The L3 cache information of a SNC NUMA node determines
which domains are summed for the "top level" L3-scoped events.
rdt_mon_domain::ci is initialized using the first online CPU of a NUMA
node. When this CPU goes offline, its shared_cpu_map is cleared to contain
only the offline CPU itself. Subsequently, attempting to read counters
via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning
zero values for "top-level events" without any error indication.
Replace the cacheinfo references in struct rdt_mon_domain and struct
rmid_read with the cacheinfo ID (a unique identifier for the L3 cache).
rdt_domain_hdr::cpu_mask contains the online CPUs associated with that
domain. When reading "top-level events", select a CPU from
rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine
valid CPUs for reading RMID counter via the MSR interface.
Considering all CPUs associated with the L3 cache improves the chances
of picking a housekeeping CPU on which the counter reading work can be
queued, avoiding an unnecessary IPI.
Fixes: 328ea68874642 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files")
Signed-off-by: Qinyun Tan <qinyuntan@...ux.alibaba.com>
Tested-by: Tony Luck <tony.luck@...el.com>
Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>
---
arch/x86/kernel/cpu/resctrl/core.c | 6 ++++--
fs/resctrl/ctrlmondata.c | 13 +++++++++----
fs/resctrl/internal.h | 4 ++--
fs/resctrl/monitor.c | 6 ++++--
fs/resctrl/rdtgroup.c | 6 +++---
include/linux/resctrl.h | 4 ++--
6 files changed, 24 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 7109cbfcad4fd..187d527ef73b6 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
struct rdt_hw_mon_domain *hw_dom;
struct rdt_domain_hdr *hdr;
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
int err;
lockdep_assert_held(&domain_list_lock);
@@ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d = &hw_dom->d_resctrl;
d->hdr.id = id;
d->hdr.type = RESCTRL_MON_DOMAIN;
- d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
- if (!d->ci) {
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci) {
pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
mon_domain_free(hw_dom);
return;
}
+ d->ci_id = ci->id;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
arch_mon_domain_online(r, d);
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index 6ed2dfd4dbbd9..d98e0d2de09fd 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
struct rmid_read rr = {0};
struct rdt_mon_domain *d;
struct rdtgroup *rdtgrp;
+ int domid, cpu, ret = 0;
struct rdt_resource *r;
+ struct cacheinfo *ci;
struct mon_data *md;
- int domid, ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
* one that matches this cache id.
*/
list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->ci->id == domid) {
- rr.ci = d->ci;
+ if (d->ci_id == domid) {
+ rr.ci_id = d->ci_id;
+ cpu = cpumask_any(&d->hdr.cpu_mask);
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci)
+ continue;
mon_event_read(&rr, r, NULL, rdtgrp,
- &d->ci->shared_cpu_map, evtid, false);
+ &ci->shared_cpu_map, evtid, false);
goto checkresult;
}
}
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 9a8cf6f11151d..0a1eedba2b03a 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -98,7 +98,7 @@ struct mon_data {
* domains in @r sharing L3 @ci.id
* @evtid: Which monitor event to read.
* @first: Initialize MBM counter when true.
- * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
* @err: Error encountered when reading counter.
* @val: Returned value of event counter. If @rgrp is a parent resource group,
* @val includes the sum of event counts from its child resource groups.
@@ -112,7 +112,7 @@ struct rmid_read {
struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
- struct cacheinfo *ci;
+ unsigned int ci_id;
int err;
u64 val;
void *arch_mon_ctx;
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index bde2801289d35..f5637855c3aca 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
{
int cpu = smp_processor_id();
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
struct mbm_state *m;
int err, ret;
u64 tval = 0;
@@ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
}
/* Summing domains that share a cache, must be on a CPU for that cache. */
- if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci || ci->id != rr->ci_id)
return -EINVAL;
/*
@@ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
ret = -EINVAL;
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
- if (d->ci->id != rr->ci->id)
+ if (d->ci_id != rr->ci_id)
continue;
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
rr->evtid, &tval, rr->arch_mon_ctx);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index cc37f58b47dd7..74b25bbb9872c 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -3034,7 +3034,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
char name[32];
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
if (snc_mode)
sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
@@ -3059,7 +3059,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
return -EPERM;
list_for_each_entry(mevt, &r->evt_list, list) {
- domid = do_sum ? d->ci->id : d->hdr.id;
+ domid = do_sum ? d->ci_id : d->hdr.id;
priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
if (WARN_ON_ONCE(!priv))
return -EINVAL;
@@ -3087,7 +3087,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
lockdep_assert_held(&rdtgroup_mutex);
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
kn = kernfs_find_and_get(parent_kn, name);
if (kn) {
/*
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 9ba771f2ddead..6fb4894b8cfd1 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -159,7 +159,7 @@ struct rdt_ctrl_domain {
/**
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
* @hdr: common header for different domain types
- * @ci: cache info for this domain
+ * @ci_id: cache info id for this domain
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
* @mbm_local: saved state for MBM local bandwidth
@@ -170,7 +170,7 @@ struct rdt_ctrl_domain {
*/
struct rdt_mon_domain {
struct rdt_domain_hdr hdr;
- struct cacheinfo *ci;
+ unsigned int ci_id;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
--
2.43.5
Powered by blists - more mailing lists