[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250526073744.62520-1-qinyuntan@linux.alibaba.com>
Date: Mon, 26 May 2025 15:37:44 +0800
From: Qinyun Tan <qinyuntan@...ux.alibaba.com>
To: Tony Luck <tony.luck@...el.com>
Cc: "H . Peter Anvin" <hpa@...or.com>,
linux-kernel@...r.kernel.org,
x86@...nel.org,
Reinette Chatre <reinette.chatre@...el.com>,
Qinyun Tan <qinyuntan@...ux.alibaba.com>
Subject: [PATCH] x86/resctrl: Remove unnecessary references to cacheinfo in the resctrl subsystem.
In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain
structure previously relied on the cacheinfo interface to store L3 cache
information (e.g., shared_cpu_map) for monitoring. However, this approach
introduces risks when CPUs go offline:
1. Inconsistency: The ci field in rdt_mon_domain was initialized using the
first online CPU of a NUMA node. If this CPU later goes offline, the
shared_cpu_map (managed by the cache subsystem) is cleared, leading to
incorrect or undefined behavior when accessed via rdt_mon_domain.
2. Lifecycle dependency: The cacheinfo structure's lifecycle is managed
by the cache subsystem, making it unsafe for resctrl to hold
long-term references.
To resolve these issues and align with design principles:
1. Replace direct cacheinfo references in struct rdt_mon_domain and struct
rmid_read with the cacheinfo ID (a unique identifier for the L3 cache).
2. Use hdr.cpu_mask (already maintained by resctrl) to replace
shared_cpu_map logic for determining valid CPUs for RMID counter reads
via the MSR interface.
Signed-off-by: Qinyun Tan <qinyuntan@...ux.alibaba.com>
---
arch/x86/kernel/cpu/resctrl/core.c | 6 ++++--
arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 6 +++---
arch/x86/kernel/cpu/resctrl/internal.h | 4 ++--
arch/x86/kernel/cpu/resctrl/monitor.c | 6 +-----
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 +++---
include/linux/resctrl.h | 4 ++--
6 files changed, 15 insertions(+), 17 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index cf29681d01e04..a0dff742e9e93 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -516,6 +516,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
struct rdt_hw_mon_domain *hw_dom;
struct rdt_domain_hdr *hdr;
struct rdt_mon_domain *d;
+ struct cacheinfo *ci;
int err;
lockdep_assert_held(&domain_list_lock);
@@ -543,12 +544,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
d = &hw_dom->d_resctrl;
d->hdr.id = id;
d->hdr.type = RESCTRL_MON_DOMAIN;
- d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
- if (!d->ci) {
+ ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE);
+ if (!ci) {
pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name);
mon_domain_free(hw_dom);
return;
}
+ d->ci_id = ci->id;
cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
arch_mon_domain_online(r, d);
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 0a0ac5f6112ec..f9768669ce806 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -690,10 +690,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
* one that matches this cache id.
*/
list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->ci->id == domid) {
- rr.ci = d->ci;
+ if (d->ci_id == domid) {
+ rr.ci_id = d->ci_id;
mon_event_read(&rr, r, NULL, rdtgrp,
- &d->ci->shared_cpu_map, evtid, false);
+ &d->hdr.cpu_mask, evtid, false);
goto checkresult;
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index eaae99602b617..91e71db554a9c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -136,7 +136,7 @@ union mon_data_bits {
* domains in @r sharing L3 @ci.id
* @evtid: Which monitor event to read.
* @first: Initialize MBM counter when true.
- * @ci: Cacheinfo for L3. Only set when @d is NULL. Used when summing domains.
+ * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains.
* @err: Error encountered when reading counter.
* @val: Returned value of event counter. If @rgrp is a parent resource group,
* @val includes the sum of event counts from its child resource groups.
@@ -150,7 +150,7 @@ struct rmid_read {
struct rdt_mon_domain *d;
enum resctrl_event_id evtid;
bool first;
- struct cacheinfo *ci;
+ unsigned int ci_id;
int err;
u64 val;
void *arch_mon_ctx;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index a93ed7d2a1602..bedccd62158c3 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -620,10 +620,6 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
return 0;
}
- /* Summing domains that share a cache, must be on a CPU for that cache. */
- if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
- return -EINVAL;
-
/*
* Legacy files must report the sum of an event across all
* domains that share the same L3 cache instance.
@@ -633,7 +629,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
*/
ret = -EINVAL;
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
- if (d->ci->id != rr->ci->id)
+ if (d->ci_id != rr->ci_id)
continue;
err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
rr->evtid, &tval, rr->arch_mon_ctx);
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index cc4a54145c83d..075fdca2080d8 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -3146,7 +3146,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
char name[32];
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
if (snc_mode)
sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
@@ -3171,7 +3171,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
return -EPERM;
priv.u.rid = r->rid;
- priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
+ priv.u.domid = do_sum ? d->ci_id : d->hdr.id;
priv.u.sum = do_sum;
list_for_each_entry(mevt, &r->evt_list, list) {
priv.u.evtid = mevt->evtid;
@@ -3198,7 +3198,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
lockdep_assert_held(&rdtgroup_mutex);
snc_mode = r->mon_scope == RESCTRL_L3_NODE;
- sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
+ sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
kn = kernfs_find_and_get(parent_kn, name);
if (kn) {
/*
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 880351ca3dfcb..c990670d18c02 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -145,7 +145,7 @@ struct rdt_ctrl_domain {
/**
* struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource
* @hdr: common header for different domain types
- * @ci: cache info for this domain
+ * @ci_id: cache info id for this domain
* @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold
* @mbm_total: saved state for MBM total bandwidth
* @mbm_local: saved state for MBM local bandwidth
@@ -156,7 +156,7 @@ struct rdt_ctrl_domain {
*/
struct rdt_mon_domain {
struct rdt_domain_hdr hdr;
- struct cacheinfo *ci;
+ unsigned int ci_id;
unsigned long *rmid_busy_llc;
struct mbm_state *mbm_total;
struct mbm_state *mbm_local;
--
2.43.5
Powered by blists - more mailing lists