lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <02c7b00c-1f48-444e-b8da-6787c5a12348@intel.com>
Date: Tue, 3 Jun 2025 20:37:31 -0700
From: Reinette Chatre <reinette.chatre@...el.com>
To: Tony Luck <tony.luck@...el.com>, Fenghua Yu <fenghuay@...dia.com>, "Maciej
 Wieczor-Retman" <maciej.wieczor-retman@...el.com>, Peter Newman
	<peternewman@...gle.com>, James Morse <james.morse@....com>, Babu Moger
	<babu.moger@....com>, Drew Fustini <dfustini@...libre.com>, Dave Martin
	<Dave.Martin@....com>, Anil Keshavamurthy <anil.s.keshavamurthy@...el.com>,
	Chen Yu <yu.c.chen@...el.com>
CC: <x86@...nel.org>, <linux-kernel@...r.kernel.org>,
	<patches@...ts.linux.dev>
Subject: Re: [PATCH v5 10/29] x86/resctrl: Change generic domain functions to
 use struct rdt_domain_hdr

Hi Tony,

On 5/21/25 3:50 PM, Tony Luck wrote:
> Historically all monitoring events have been associated with the L3
> resource and it made sense to use "struct rdt_mon_domain *" arguments
> to functions manipulating domains. But the addition of monitor events
> tied to other resources changes this assumption.
> 
> Some functionality like:
> *) adding a CPU to an existing domain
> *) removing a CPU that is not the last one from a domain
> can be achieved with just access to the rdt_domain_hdr structure.
> 
> Change arguments from "rdt_*_domain" to rdt_domain_hdr so functions
> can be used on domains from any resource.
> 
> Add sanity checks where container_of() is used to find the surrounding
> domain structure that hdr has the expected type.
> 
> Simplify code that uses "d->hdr." to "hdr->" where possible.
> 
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> ---
>  include/linux/resctrl.h            |  4 +-
>  arch/x86/kernel/cpu/resctrl/core.c | 39 +++++++-------
>  fs/resctrl/rdtgroup.c              | 83 +++++++++++++++++++++---------
>  3 files changed, 79 insertions(+), 47 deletions(-)
> 
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index d6b09952ef92..c02a4d59f3eb 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -444,9 +444,9 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
>  u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_ctrl_domain *d,
>  			    u32 closid, enum resctrl_conf_type type);
>  int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
> -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
> +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr);
>  void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d);
> -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d);
> +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr);
>  void resctrl_online_cpu(unsigned int cpu);
>  void resctrl_offline_cpu(unsigned int cpu);
>  
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index e4125161ffbd..71b884f25475 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -458,9 +458,7 @@ static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
>  	if (hdr) {
>  		if (!domain_header_is_valid(hdr, RESCTRL_CTRL_DOMAIN, r->rid))
>  			return;
> -		d = container_of(hdr, struct rdt_ctrl_domain, hdr);
> -
> -		cpumask_set_cpu(cpu, &d->hdr.cpu_mask);
> +		cpumask_set_cpu(cpu, &hdr->cpu_mask);
>  		if (r->cache.arch_has_per_cpu_cfg)
>  			rdt_domain_reconfigure_cdp(r);
>  		return;
> @@ -524,7 +522,7 @@ static void l3_mon_domain_setup(int cpu, int id, struct rdt_resource *r, struct
>  
>  	list_add_tail_rcu(&d->hdr.list, add_pos);
>  
> -	err = resctrl_online_mon_domain(r, d);
> +	err = resctrl_online_mon_domain(r, &d->hdr);
>  	if (err) {
>  		list_del_rcu(&d->hdr.list);
>  		synchronize_rcu();
> @@ -597,25 +595,24 @@ static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
>  	if (!domain_header_is_valid(hdr, RESCTRL_CTRL_DOMAIN, r->rid))
>  		return;
>  
> +	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
> +	if (!cpumask_empty(&hdr->cpu_mask))
> +		return;
> +
>  	d = container_of(hdr, struct rdt_ctrl_domain, hdr);
>  	hw_dom = resctrl_to_arch_ctrl_dom(d);
>  
> -	cpumask_clear_cpu(cpu, &d->hdr.cpu_mask);
> -	if (cpumask_empty(&d->hdr.cpu_mask)) {
> -		resctrl_offline_ctrl_domain(r, d);
> -		list_del_rcu(&d->hdr.list);
> -		synchronize_rcu();
> -
> -		/*
> -		 * rdt_ctrl_domain "d" is going to be freed below, so clear
> -		 * its pointer from pseudo_lock_region struct.
> -		 */
> -		if (d->plr)
> -			d->plr->d = NULL;
> -		ctrl_domain_free(hw_dom);
> +	resctrl_offline_ctrl_domain(r, d);
> +	list_del_rcu(&hdr->list);
> +	synchronize_rcu();
>  
> -		return;
> -	}
> +	/*
> +	 * rdt_ctrl_domain "d" is going to be freed below, so clear
> +	 * its pointer from pseudo_lock_region struct.
> +	 */
> +	if (d->plr)
> +		d->plr->d = NULL;
> +	ctrl_domain_free(hw_dom);
>  }
>  


How does this hunk relate to the changelog? It seems like unrelated refactoring
to have have domain_remove_cpu_ctrl() have similar flow as domain_remove_cpu_mon()
after changes in previous patch.


>  static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
> @@ -651,8 +648,8 @@ static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
>  	case RDT_RESOURCE_L3:
>  		d = container_of(hdr, struct rdt_mon_domain, hdr);
>  		hw_dom = resctrl_to_arch_mon_dom(d);
> -		resctrl_offline_mon_domain(r, d);
> -		list_del_rcu(&d->hdr.list);
> +		resctrl_offline_mon_domain(r, hdr);
> +		list_del_rcu(&hdr->list);
>  		synchronize_rcu();
>  		l3_mon_domain_free(hw_dom);
>  		break;
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index 828c743ec470..0213fb3a1113 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -3022,7 +3022,7 @@ static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subn
>   * when last domain being summed is removed.
>   */
>  static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> -					   struct rdt_mon_domain *d)
> +					   struct rdt_domain_hdr *hdr)
>  {
>  	struct rdtgroup *prgrp, *crgrp;
>  	char subname[32];
> @@ -3030,9 +3030,17 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
>  	char name[32];
>  
>  	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
> -	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
> -	if (snc_mode)
> -		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id);
> +	if (snc_mode) {
> +		struct rdt_mon_domain *d;
> +
> +		if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))
> +			return;

Here is another example where I believe RDT_RESOURCE_L3 is more appropriate
than r->rid because SNC mode only applies to L3.

> +		d = container_of(hdr, struct rdt_mon_domain, hdr);
> +		sprintf(name, "mon_%s_%02d", r->name, d->ci->id);
> +		sprintf(subname, "mon_sub_%s_%02d", r->name, hdr->id);
> +	} else {
> +		sprintf(name, "mon_%s_%02d", r->name, hdr->id);
> +	}
>  
>  	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
>  		mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
> @@ -3042,11 +3050,12 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
>  	}
>  }
>  
> -static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> +static int mon_add_all_files(struct kernfs_node *kn, struct rdt_domain_hdr *hdr,
>  			     struct rdt_resource *r, struct rdtgroup *prgrp,
>  			     bool do_sum)
>  {
>  	struct rmid_read rr = {0};
> +	struct rdt_mon_domain *d;

This may need an initialization here to eliminate the "may be used uninitialized" warning
during build.

>  	struct mon_data *priv;
>  	struct mon_evt *mevt;
>  	int ret, domid;
> @@ -3054,7 +3063,14 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
>  	for (mevt = &mon_event_all[0]; mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) {
>  		if (mevt->rid != r->rid || !mevt->enabled)
>  			continue;
> -		domid = do_sum ? d->ci->id : d->hdr.id;
> +		if (r->rid == RDT_RESOURCE_L3) {
> +			if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))

Considering the preceding "if()" ... r->rid is essentially RDT_RESOURCE_L3 and
can be made explicit.

> +				return -EINVAL;
> +			d = container_of(hdr, struct rdt_mon_domain, hdr);
> +			domid = do_sum ? d->ci->id : d->hdr.id;
> +		} else {
> +			domid = hdr->id;
> +		}
>  		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);

Another subtle change is that mon_get_kn_priv() was created for L3 resource
and now being subtly switched to be a generic utility.
Consider its last parameter documented as "Whether SNC summing monitors are
being created.". Surely that can never be set for any resource except L3.
Silently wedging things in like this makes this work difficult to consume.
At least the function's kernel-doc should change, it could benefit from
a warning if do_sum is ever true for a resource that is not L3.

Simlarly this work just silently also takes ownership of struct mon_data.
How does its @sum member apply here? That kernel-doc could also do with an
update stating when @sum can be expected to be valid. Increasingly subtle things
are left to the reader to decipher and it is looking more like this work aims
to wedge itself into resctrl instead of aiming to achieve clean integration.

>  		if (WARN_ON_ONCE(!priv))
>  			return -EINVAL;
> @@ -3063,18 +3079,19 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
>  		if (ret)
>  			return ret;
>  
> -		if (!do_sum && resctrl_is_mbm_event(mevt->evtid))
> -			mon_event_read(&rr, r, d, prgrp, &d->hdr.cpu_mask, mevt->evtid, true);
> +		if (r->rid == RDT_RESOURCE_L3 && !do_sum && resctrl_is_mbm_event(mevt->evtid))
> +			mon_event_read(&rr, r, d, prgrp, &hdr->cpu_mask, mevt->evtid, true);
>  	}
>  
>  	return 0;
>  }
>  
>  static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
> -				struct rdt_mon_domain *d,
> +				struct rdt_domain_hdr *hdr,
>  				struct rdt_resource *r, struct rdtgroup *prgrp)
>  {
>  	struct kernfs_node *kn, *ckn;
> +	struct rdt_mon_domain *d;
>  	char name[32];
>  	bool snc_mode;
>  	int ret = 0;
> @@ -3082,7 +3099,14 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
>  	lockdep_assert_held(&rdtgroup_mutex);
>  
>  	snc_mode = r->mon_scope == RESCTRL_L3_NODE;
> -	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id);
> +	if (snc_mode) {
> +		if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))
> +			return -EINVAL;

Same wrt explicit check using L3 resource.

> +		d = container_of(hdr, struct rdt_mon_domain, hdr);
> +		sprintf(name, "mon_%s_%02d", r->name, d->ci->id);
> +	} else {
> +		sprintf(name, "mon_%s_%02d", r->name, hdr->id);
> +	}
>  	kn = kernfs_find_and_get(parent_kn, name);
>  	if (kn) {
>  		/*
> @@ -3098,13 +3122,13 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
>  		ret = rdtgroup_kn_set_ugid(kn);
>  		if (ret)
>  			goto out_destroy;
> -		ret = mon_add_all_files(kn, d, r, prgrp, snc_mode);
> +		ret = mon_add_all_files(kn, hdr, r, prgrp, snc_mode);
>  		if (ret)
>  			goto out_destroy;
>  	}
>  
>  	if (snc_mode) {
> -		sprintf(name, "mon_sub_%s_%02d", r->name, d->hdr.id);
> +		sprintf(name, "mon_sub_%s_%02d", r->name, hdr->id);
>  		ckn = kernfs_create_dir(kn, name, parent_kn->mode, prgrp);
>  		if (IS_ERR(ckn)) {
>  			ret = -EINVAL;
> @@ -3115,7 +3139,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
>  		if (ret)
>  			goto out_destroy;
>  
> -		ret = mon_add_all_files(ckn, d, r, prgrp, false);
> +		ret = mon_add_all_files(ckn, hdr, r, prgrp, false);
>  		if (ret)
>  			goto out_destroy;
>  	}
> @@ -3133,7 +3157,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
>   * and "monitor" groups with given domain id.
>   */
>  static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
> -					   struct rdt_mon_domain *d)
> +					   struct rdt_domain_hdr *hdr)
>  {
>  	struct kernfs_node *parent_kn;
>  	struct rdtgroup *prgrp, *crgrp;
> @@ -3141,12 +3165,12 @@ static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
>  
>  	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
>  		parent_kn = prgrp->mon.mon_data_kn;
> -		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
> +		mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
>  
>  		head = &prgrp->mon.crdtgrp_list;
>  		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
>  			parent_kn = crgrp->mon.mon_data_kn;
> -			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
> +			mkdir_mondata_subdir(parent_kn, hdr, r, crgrp);
>  		}
>  	}
>  }
> @@ -3155,14 +3179,14 @@ static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
>  				       struct rdt_resource *r,
>  				       struct rdtgroup *prgrp)
>  {
> -	struct rdt_mon_domain *dom;
> +	struct rdt_domain_hdr *hdr;
>  	int ret;
>  
>  	/* Walking r->domains, ensure it can't race with cpuhp */
>  	lockdep_assert_cpus_held();
>  
> -	list_for_each_entry(dom, &r->mon_domains, hdr.list) {
> -		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
> +	list_for_each_entry(hdr, &r->mon_domains, list) {
> +		ret = mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
>  		if (ret)
>  			return ret;
>  	}
> @@ -4030,8 +4054,10 @@ void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain
>  	mutex_unlock(&rdtgroup_mutex);
>  }
>  
> -void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
> +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
>  {
> +	struct rdt_mon_domain *d;
> +
>  	mutex_lock(&rdtgroup_mutex);
>  
>  	/*
> @@ -4039,11 +4065,15 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d
>  	 * per domain monitor data directories.
>  	 */
>  	if (resctrl_mounted && resctrl_arch_mon_capable())
> -		rmdir_mondata_subdir_allrdtgrp(r, d);
> +		rmdir_mondata_subdir_allrdtgrp(r, hdr);
>  
>  	if (r->rid != RDT_RESOURCE_L3)
>  		goto done;
>  
> +	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))

Again, no need to obfuscate things, considering earlier if(), this
can be explicit in check for RDT_RESOURCE_L3, no?

> +		return;
> +
> +	d = container_of(hdr, struct rdt_mon_domain, hdr);
>  	if (resctrl_is_mbm_enabled())
>  		cancel_delayed_work(&d->mbm_over);
>  	if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) {
> @@ -4126,12 +4156,17 @@ int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d
>  	return err;
>  }
>  
> -int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
> +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
>  {
> -	int err;
> +	struct rdt_mon_domain *d;
> +	int err = -EINVAL;
>  
>  	mutex_lock(&rdtgroup_mutex);
>  
> +	if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, r->rid))
> +		goto out_unlock;
> +
> +	d = container_of(hdr, struct rdt_mon_domain, hdr);

Similar here ... expecting the container_of() to require L3 resource
so the domain_header_is_valid() should be explicit for it. Making these
flows explicit makes the code much easier to understand.

>  	err = domain_setup_l3_mon_state(r, d);
>  	if (err)
>  		goto out_unlock;
> @@ -4152,7 +4187,7 @@ int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d)
>  	 * If resctrl is mounted, add per domain monitor data directories.
>  	 */
>  	if (resctrl_mounted && resctrl_arch_mon_capable())
> -		mkdir_mondata_subdir_allrdtgrp(r, d);
> +		mkdir_mondata_subdir_allrdtgrp(r, hdr);
>  
>  out_unlock:
>  	mutex_unlock(&rdtgroup_mutex);

Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ