[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aOBUW81rwl3P7zQE@agluck-desk3>
Date: Fri, 3 Oct 2025 15:55:23 -0700
From: "Luck, Tony" <tony.luck@...el.com>
To: Reinette Chatre <reinette.chatre@...el.com>
CC: Fenghua Yu <fenghuay@...dia.com>, Maciej Wieczor-Retman
<maciej.wieczor-retman@...el.com>, Peter Newman <peternewman@...gle.com>,
James Morse <james.morse@....com>, Babu Moger <babu.moger@....com>, "Drew
Fustini" <dfustini@...libre.com>, Dave Martin <Dave.Martin@....com>, Chen Yu
<yu.c.chen@...el.com>, <x86@...nel.org>, <linux-kernel@...r.kernel.org>,
<patches@...ts.linux.dev>
Subject: Re: [PATCH v11 05/31] x86,fs/resctrl: Refactor domain create/remove
using struct rdt_domain_hdr
On Fri, Oct 03, 2025 at 08:33:00AM -0700, Reinette Chatre wrote:
> Hi Tony,
>
> On 9/25/25 1:02 PM, Tony Luck wrote:
> > Up until now, all monitoring events were associated with the L3 resource
> > and it made sense to use the L3 specific "struct rdt_mon_domain *"
> > arguments to functions manipulating domains.
> >
> > To simplify enabling of enumeration of domains for events in other
>
> What does "enabling of enumeration of domains" mean?
Is this better?
To prepare for events in resources other than L3, change the calling convention
to pass the generic struct rdt_domain_hdr and use that to find the domain specific
structure where needed.
>
> > resources change the calling convention to pass the generic struct
> > rdt_domain_hdr and use that to find the domain specific structure
> > where needed.
>
> I think it will be helpful to highlight that this is a stepping stone
> that highlights what domain management code is L3 specific and thus in
> need of further refactoring to support new domain types vs. what is generic.
Above re-wording cover this.
>
> >
> > Signed-off-by: Tony Luck <tony.luck@...el.com>
> > ---
>
> ...
>
> > diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
> > index f248eaf50d3c..3ceef35208be 100644
> > --- a/fs/resctrl/ctrlmondata.c
> > +++ b/fs/resctrl/ctrlmondata.c
> > @@ -547,11 +547,16 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
> > }
> >
> > void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> > - struct rdt_mon_domain *d, struct rdtgroup *rdtgrp,
> > + struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
> > cpumask_t *cpumask, int evtid, int first)
> > {
> > + struct rdt_mon_domain *d;
> > int cpu;
> >
> > + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
>
> hdr can be NULL here so this is not safe. I understand this is removed in the next
> patch but it is difficult to reason about the code if the steps are not solid.
Will fix.
>
> > + return;
> > + d = container_of(hdr, struct rdt_mon_domain, hdr);
> > +
> > /* When picking a CPU from cpu_mask, ensure it can't race with cpuhp */
> > lockdep_assert_cpus_held();
> >
> > @@ -598,7 +603,6 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> > enum resctrl_event_id evtid;
> > struct rdt_domain_hdr *hdr;
> > struct rmid_read rr = {0};
> > - struct rdt_mon_domain *d;
> > struct rdtgroup *rdtgrp;
> > int domid, cpu, ret = 0;
> > struct rdt_resource *r;
> > @@ -623,6 +627,8 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> > r = resctrl_arch_get_resource(resid);
> >
> > if (md->sum) {
> > + struct rdt_mon_domain *d;
> > +
> > /*
> > * This file requires summing across all domains that share
> > * the L3 cache id that was provided in the "domid" field of the
> > @@ -649,12 +655,11 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> > * the resource to find the domain with "domid".
> > */
> > hdr = resctrl_find_domain(&r->mon_domains, domid, NULL);
> > - if (!hdr || !domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, resid)) {
> > + if (!hdr) {
> > ret = -ENOENT;
> > goto out;
> > }
> > - d = container_of(hdr, struct rdt_mon_domain, hdr);
> > - mon_event_read(&rr, r, d, rdtgrp, &d->hdr.cpu_mask, evtid, false);
> > + mon_event_read(&rr, r, hdr, rdtgrp, &hdr->cpu_mask, evtid, false);
> > }
> >
> > checkresult:
> > diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> > index 0320360cd7a6..e3b83e48f2d9 100644
> > --- a/fs/resctrl/rdtgroup.c
> > +++ b/fs/resctrl/rdtgroup.c
> > @@ -3164,13 +3164,18 @@ static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subn
> > * when last domain being summed is removed.
> > */
> > static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> > - struct rdt_mon_domain *d)
> > + struct rdt_domain_hdr *hdr)
> > {
> > struct rdtgroup *prgrp, *crgrp;
> > + struct rdt_mon_domain *d;
> > char subname[32];
> > bool snc_mode;
> > char name[32];
> >
> > + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
> > + return;
> > +
> > + d = container_of(hdr, struct rdt_mon_domain, hdr);
> > snc_mode = r->mon_scope == RESCTRL_L3_NODE;
> > sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
> > if (snc_mode)
> > @@ -3184,19 +3189,18 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> > }
> > }
> >
> > -static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> > +static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr,
> > struct rdt_resource *r, struct rdtgroup *prgrp,
> > - bool do_sum)
> > + int domid, bool do_sum)
> > {
> > struct rmid_read rr = {0};
> > struct mon_data *priv;
> > struct mon_evt *mevt;
> > - int ret, domid;
> > + int ret;
> >
> > for_each_mon_event(mevt) {
> > if (mevt->rid != r->rid || !mevt->enabled)
> > continue;
> > - domid = do_sum ? d->ci_id : d->hdr.id;
>
> Looks like an unrelated change. Would this not be more appropriate for "fs/resctrl: Refactor Sub-NUMA
> Cluster (SNC) in mkdir/rmdir code flow"?
Agreed. I'll cut this out and move to the later patch.
>
> > priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
> > if (WARN_ON_ONCE(!priv))
> > return -EINVAL;
> > @@ -3206,23 +3210,28 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> > return ret;
> >
> > if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
> > - mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
> > + mon_event_read(&rr, r, hdr, prgrp, &hdr->cpu_mask, mevt->evtid, true);
> > }
> >
> > return 0;
> > }
> >
> > static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> > - struct rdt_mon_domain *d,
> > + struct rdt_domain_hdr *hdr,
> > struct rdt_resource *r, struct rdtgroup *prgrp)
> > {
> > struct kernfs_node *kn, *ckn;
> > + struct rdt_mon_domain *d;
> > char name[32];
> > bool snc_mode;
> > int ret = 0;
> >
> > lockdep_assert_held(&rdtgroup_mutex);
> >
> > + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
> > + return -EINVAL;
> > +
> > + d = container_of(hdr, struct rdt_mon_domain, hdr);
> > snc_mode = r->mon_scope == RESCTRL_L3_NODE;
> > sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id);
> > kn = kernfs_find_and_get(parent_kn, name);
> > @@ -3240,13 +3249,13 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> > ret = rdtgroup_kn_set_ugid(kn);
> > if (ret)
> > goto out_destroy;
> > - ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
> > + ret = mon_add_all_files(kn, hdr, r, prgrp, hdr->id, snc_mode);
>
> This does not seem right ... looks like this aims to do some of the SNC enabling but
> the domain id is always set to the domain of the node and does not distinguish between
> the L3 id and node id?
Also move to later patch (and get it right)
>
> > if (ret)
> > goto out_destroy;
> > }
> >
> > if (snc_mode) {
> > - sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
> > + sprintf(name, "mon_sub_%s_%02d", r->name, hdr->id);
> > ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
> > if (IS_ERR(ckn)) {
> > ret = -EINVAL;
> > @@ -3257,7 +3266,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> > if (ret)
> > goto out_destroy;
> >
> > - ret = mon_add_all_files(ckn, d, r, prgrp, false);
> > + ret = mon_add_all_files(ckn, hdr, r, prgrp, hdr->id, false);
> > if (ret)
> > goto out_destroy;
> > }
> > @@ -3275,7 +3284,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> > * and "monitor" groups with given domain id.
> > */
> > static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> > - struct rdt_mon_domain *d)
> > + struct rdt_domain_hdr *hdr)
> > {
> > struct kernfs_node *parent_kn;
> > struct rdtgroup *prgrp, *crgrp;
> > @@ -3283,12 +3292,12 @@ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> >
> > list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
> > parent_kn = prgrp->mon.mon_data_kn;
> > - mkdir_mondata_subdir(parent_kn, d, r, prgrp);
> > + mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
> >
> > head = &prgrp->mon.crdtgrp_list;
> > list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
> > parent_kn = crgrp->mon.mon_data_kn;
> > - mkdir_mondata_subdir(parent_kn, d, r, crgrp);
> > + mkdir_mondata_subdir(parent_kn, hdr, r, crgrp);
> > }
> > }
> > }
> > @@ -3297,14 +3306,14 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
> > struct rdt_resource *r,
> > struct rdtgroup *prgrp)
> > {
> > - struct rdt_mon_domain *dom;
> > + struct rdt_domain_hdr *hdr;
> > int ret;
> >
> > /* Walking r->domains, ensure it can't race with cpuhp */
> > lockdep_assert_cpus_held();
> >
> > - list_for_each_entry(dom, &r->mon_domains, hdr.list) {
> > - ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
> > + list_for_each_entry(hdr, &r->mon_domains, list) {
> > + ret = mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
> > if (ret)
> > return ret;
> > }
> > @@ -4187,8 +4196,10 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain
> > mutex_unlock(&rdtgroup_mutex);
> > }
> >
> > -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
> > +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
> > {
> > + struct rdt_mon_domain *d;
> > +
> > mutex_lock(&rdtgroup_mutex);
> >
> > /*
> > @@ -4196,8 +4207,12 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
> > * per domain monitor data directories.
> > */
> > if (resctrl_mounted && resctrl_arch_mon_capable())
> > - rmdir_mondata_subdir_allrdtgrp(r, d);
> > + rmdir_mondata_subdir_allrdtgrp(r, hdr);
> >
> > + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
> > + goto out_unlock;
> > +
>
> One logical change per patch please.
>
> While all other L3 specific functions modified to receive hdr as parameter are changed to use
> container_of() at beginning of function to highlight that the functions are L3 specific ...
> resctrl_offline_mon_domain() is changed differently. Looks like this changes the flow to
> sneak in some PERF_PKG enabling for convenience and thus makes this patch harder to understand.
> Splitting resctrl_offline_mon_domain() to handle different domain types seems more appropriate
> for "x86/resctrl: Handle domain creation/deletion for RDT_RESOURCE_PERF_PKG" where it should be
> clear what changes are made to support PERF_PKG. In this patch, in this stage of series, the
> entire function can be L3 specific.
Will move to later patch and make the offline/online patches have same
style.
>
> > + d = container_of(hdr, struct rdt_mon_domain, hdr);
> > if (resctrl_is_mbm_enabled())
> > cancel_delayed_work(&d->mbm_over);
> > if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) {
> > @@ -4214,7 +4229,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
> > }
> >
> > domain_destroy_mon_state(d);
> > -
> > +out_unlock:
> > mutex_unlock(&rdtgroup_mutex);
> > }
> >
> > @@ -4287,12 +4302,17 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d
> > return err;
> > }
> >
> > -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
> > +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
> > {
> > - int err;
> > + struct rdt_mon_domain *d;
> > + int err = -EINVAL;
> >
> > mutex_lock(&rdtgroup_mutex);
> >
> > + if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
> > + goto out_unlock;
> > +
> > + d = container_of(hdr, struct rdt_mon_domain, hdr);
> > err = domain_setup_mon_state(r, d);
> > if (err)
> > goto out_unlock;
> > @@ -4306,6 +4326,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
> > if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
> > INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
> >
> > + err = 0;
>
> Considering the earlier exit on "if (err)", err can be expected to be 0 here?
Yes. Dropped this superfluous assignment.
>
> > /*
> > * If the filesystem is not mounted then only the default resource group
> > * exists. Creation of its directories is deferred until mount time
> > @@ -4313,7 +4334,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
> > * If resctrl is mounted, add per domain monitor data directories.
> > */
> > if (resctrl_mounted && resctrl_arch_mon_capable())
> > - mkdir_mondata_subdir_allrdtgrp(r, d);
> > + mkdir_mondata_subdir_allrdtgrp(r, hdr);
> >
> > out_unlock:
> > mutex_unlock(&rdtgroup_mutex);
>
> Reinette
-Tony
Powered by blists - more mailing lists