lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 28 Sep 2023 10:21:50 -0700
From:   Tony Luck <tony.luck@...el.com>
To:     Reinette Chatre <reinette.chatre@...el.com>
Cc:     Fenghua Yu <fenghua.yu@...el.com>,
        Peter Newman <peternewman@...gle.com>,
        Jonathan Corbet <corbet@....net>,
        Shuah Khan <skhan@...uxfoundation.org>, x86@...nel.org,
        Shaopeng Tan <tan.shaopeng@...itsu.com>,
        James Morse <james.morse@....com>,
        Jamie Iles <quic_jiles@...cinc.com>,
        Babu Moger <babu.moger@....com>,
        Randy Dunlap <rdunlap@...radead.org>,
        linux-kernel@...r.kernel.org, linux-doc@...r.kernel.org,
        patches@...ts.linux.dev
Subject: Re: [PATCH v5 2/8] x86/resctrl: Prepare for different scope for
 control/monitor operations

On Mon, Sep 25, 2023 at 04:24:10PM -0700, Reinette Chatre wrote:
> Hi Tony,
> 
> On 8/29/2023 4:44 PM, Tony Luck wrote:
> > Existing resctrl assumes that control and monitor operations on a
> 
> Please remove "Existing", it can just be "resctrl assumes ..."

Will do.

> 
> > resource are performed at the same scope.
> > 
> > Prepare for systems that use different scope (specifically L3 scope
> > for cache control and NODE scope for cache occupancy and memory
> > bandwidth monitoring).
> > 
> > Create separate domain lists for control and monitor operations.
> > 
> > No important functional change. But note that errors during
> > initialization of either control or monitor functions on a domain would
> > previously result in that domain being excluded from both control and
> > monitor operations. Now the domains are allocated independently it is
> > no longer required to disable both control and monitor operations if
> > either fail.
> > 
> > Signed-off-by: Tony Luck <tony.luck@...el.com>
> > ---
> >  include/linux/resctrl.h                   |  16 +-
> >  arch/x86/kernel/cpu/resctrl/internal.h    |   6 +-
> >  arch/x86/kernel/cpu/resctrl/core.c        | 227 +++++++++++++++-------
> >  arch/x86/kernel/cpu/resctrl/ctrlmondata.c |   2 +-
> >  arch/x86/kernel/cpu/resctrl/monitor.c     |   2 +-
> >  arch/x86/kernel/cpu/resctrl/pseudo_lock.c |   2 +-
> >  arch/x86/kernel/cpu/resctrl/rdtgroup.c    |  32 +--
> >  7 files changed, 199 insertions(+), 88 deletions(-)
> > 
> > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> > index 2db1244ae642..33856943a787 100644
> > --- a/include/linux/resctrl.h
> > +++ b/include/linux/resctrl.h
> > @@ -155,10 +155,12 @@ enum resctrl_scope {
> >   * @alloc_capable:	Is allocation available on this machine
> >   * @mon_capable:	Is monitor feature available on this machine
> >   * @num_rmid:		Number of RMIDs available
> > - * @scope:		Scope of this resource
> > + * @ctrl_scope:		Scope of this resource for control functions
> > + * @mon_scope:		Scope of this resource for monitor functions
> >   * @cache:		Cache allocation related data
> >   * @membw:		If the component has bandwidth controls, their properties.
> > - * @domains:		All domains for this resource
> > + * @domains:		Control domains for this resource
> > + * @mon_domains:	Monitor domains for this resource
> >   * @name:		Name to use in "schemata" file.
> >   * @data_width:		Character width of data when displaying
> >   * @default_ctrl:	Specifies default cache cbm or memory B/W percent.
> > @@ -173,10 +175,12 @@ struct rdt_resource {
> >  	bool			alloc_capable;
> >  	bool			mon_capable;
> >  	int			num_rmid;
> > -	enum resctrl_scope	scope;
> > +	enum resctrl_scope	ctrl_scope;
> > +	enum resctrl_scope	mon_scope;
> >  	struct resctrl_cache	cache;
> >  	struct resctrl_membw	membw;
> >  	struct list_head	domains;
> > +	struct list_head	mondomains;
> 
> kerneldoc is "mon_domains" while member is "mondomains".
> I do think to be consistent with other members that "mon_domains"
> would be appropriate. I also think it will be very helpful it
> domains is renamed to "ctrl_domains" to more accurately match
> "ctrl_scope" and what it represents.

Next version will use "mon_domains" (and "mon_" in all the associated
functions and macros). I'm also changing to "ctrl_" as suggested.

> >  	char			*name;
> >  	int			data_width;
> >  	u32			default_ctrl;
> > @@ -222,8 +226,10 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d,
> >  
> >  u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
> >  			    u32 closid, enum resctrl_conf_type type);
> > -int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
> > -void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
> > +int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
> > +int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
> > +void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_domain *d);
> > +void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain *d);
> >  
> >  /**
> >   * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid
> > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> > index 85ceaf9a31ac..31a5fc3b717f 100644
> > --- a/arch/x86/kernel/cpu/resctrl/internal.h
> > +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> > @@ -511,8 +511,10 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn);
> >  int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
> >  int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
> >  			     umode_t mask);
> > -struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> > -				   struct list_head **pos);
> > +struct rdt_domain *rdt_find_ctrldomain(struct list_head *h, int id,
> > +				       struct list_head **pos);
> > +struct rdt_domain *rdt_find_mondomain(struct list_head *h, int id,
> > +				      struct list_head **pos);
> 
> This is not what I expected after our previous discussion. It
> should not be necessary for caller to be aware of the different
> lists. It looks to me like the original parameters of (struct rdt_resource *r,
> int id, struct list_head **pos) can be maintained.

Discussed in other e-mail. There is now a "struct rdt_domain_hdr"
embedded in the control and monitor versions of the domain structure.
rdt_find_domain() operates on that, and callers user container_of() to
get to the enclosing structure.

> 
> >  ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
> >  				char *buf, size_t nbytes, loff_t off);
> >  int rdtgroup_schemata_show(struct kernfs_open_file *of,
> > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> > index 0d3bae523ecb..97f6f9715fdb 100644
> > --- a/arch/x86/kernel/cpu/resctrl/core.c
> > +++ b/arch/x86/kernel/cpu/resctrl/core.c
> > @@ -57,7 +57,7 @@ static void
> >  mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m,
> >  	      struct rdt_resource *r);
> >  
> > -#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.domains)
> > +#define domain_init(id, field) LIST_HEAD_INIT(rdt_resources_all[id].r_resctrl.field)
> >  
> 
> It may make the code easier to read if this is split into
> a "mon_domain_init()" and "ctrl_domain_init()"

Done

> 
> >  struct rdt_hw_resource rdt_resources_all[] = {
> >  	[RDT_RESOURCE_L3] =
> > @@ -65,8 +65,10 @@ struct rdt_hw_resource rdt_resources_all[] = {
> >  		.r_resctrl = {
> >  			.rid			= RDT_RESOURCE_L3,
> >  			.name			= "L3",
> > -			.scope			= RESCTRL_L3_CACHE,
> > -			.domains		= domain_init(RDT_RESOURCE_L3),
> > +			.ctrl_scope		= RESCTRL_L3_CACHE,
> > +			.mon_scope		= RESCTRL_L3_CACHE,
> > +			.domains		= domain_init(RDT_RESOURCE_L3, domains),
> > +			.mondomains		= domain_init(RDT_RESOURCE_L3, mondomains),
> >  			.parse_ctrlval		= parse_cbm,
> >  			.format_str		= "%d=%0*x",
> >  			.fflags			= RFTYPE_RES_CACHE,
> > @@ -79,8 +81,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
> >  		.r_resctrl = {
> >  			.rid			= RDT_RESOURCE_L2,
> >  			.name			= "L2",
> > -			.scope			= RESCTRL_L2_CACHE,
> > -			.domains		= domain_init(RDT_RESOURCE_L2),
> > +			.ctrl_scope		= RESCTRL_L2_CACHE,
> > +			.domains		= domain_init(RDT_RESOURCE_L2, domains),
> >  			.parse_ctrlval		= parse_cbm,
> >  			.format_str		= "%d=%0*x",
> >  			.fflags			= RFTYPE_RES_CACHE,
> > @@ -93,8 +95,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
> >  		.r_resctrl = {
> >  			.rid			= RDT_RESOURCE_MBA,
> >  			.name			= "MB",
> > -			.scope			= RESCTRL_L3_CACHE,
> > -			.domains		= domain_init(RDT_RESOURCE_MBA),
> > +			.ctrl_scope		= RESCTRL_L3_CACHE,
> > +			.domains		= domain_init(RDT_RESOURCE_MBA, domains),
> >  			.parse_ctrlval		= parse_bw,
> >  			.format_str		= "%d=%*u",
> >  			.fflags			= RFTYPE_RES_MB,
> > @@ -105,8 +107,8 @@ struct rdt_hw_resource rdt_resources_all[] = {
> >  		.r_resctrl = {
> >  			.rid			= RDT_RESOURCE_SMBA,
> >  			.name			= "SMBA",
> > -			.scope			= RESCTRL_L3_CACHE,
> > -			.domains		= domain_init(RDT_RESOURCE_SMBA),
> > +			.ctrl_scope		= RESCTRL_L3_CACHE,
> > +			.domains		= domain_init(RDT_RESOURCE_SMBA, domains),
> >  			.parse_ctrlval		= parse_bw,
> >  			.format_str		= "%d=%*u",
> >  			.fflags			= RFTYPE_RES_MB,
> > @@ -384,15 +386,16 @@ void rdt_ctrl_update(void *arg)
> >  }
> >  
> >  /*
> > - * rdt_find_domain - Find a domain in a resource that matches input resource id
> > + * __rdt_find_domain - Find a domain in either the list of control or
> > + * monitor domains that matches input resource id
> >   *
> >   * Search resource r's domain list to find the resource id. If the resource
> >   * id is found in a domain, return the domain. Otherwise, if requested by
> >   * caller, return the first domain whose id is bigger than the input id.
> >   * The domain list is sorted by id in ascending order.
> >   */
> > -struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> > -				   struct list_head **pos)
> > +static void *__rdt_find_domain(struct list_head *h, int id,
> > +			       struct list_head **pos)
> >  {
> >  	struct rdt_domain *d;
> >  	struct list_head *l;
> > @@ -400,7 +403,7 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> >  	if (id < 0)
> >  		return ERR_PTR(-ENODEV);
> >  
> > -	list_for_each(l, &r->domains) {
> > +	list_for_each(l, h) {
> >  		d = list_entry(l, struct rdt_domain, list);
> >  		/* When id is found, return its domain. */
> >  		if (id == d->id)
> > @@ -416,6 +419,18 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
> >  	return NULL;
> >  }
> >  
> > +struct rdt_domain *rdt_find_ctrldomain(struct list_head *h, int id,
> > +				       struct list_head **pos)
> > +{
> > +	return __rdt_find_domain(h, id, pos);
> > +}
> > +
> > +struct rdt_domain *rdt_find_mondomain(struct list_head *h, int id,
> > +				      struct list_head **pos)
> > +{
> > +	return __rdt_find_domain(h, id, pos);
> > +}
> > +
> >  static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
> >  {
> >  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> > @@ -431,10 +446,15 @@ static void setup_default_ctrlval(struct rdt_resource *r, u32 *dc)
> >  }
> >  
> >  static void domain_free(struct rdt_hw_domain *hw_dom)
> > +{
> > +	kfree(hw_dom->ctrl_val);
> > +	kfree(hw_dom);
> > +}
> > +
> > +static void mondomain_free(struct rdt_hw_domain *hw_dom)
> >  {
> >  	kfree(hw_dom->arch_mbm_total);
> >  	kfree(hw_dom->arch_mbm_local);
> > -	kfree(hw_dom->ctrl_val);
> >  	kfree(hw_dom);
> >  }
> >  
> > @@ -502,6 +522,93 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> >  	return -1;
> >  }
> >  
> > +static void domain_add_cpu_ctrl(int cpu, struct rdt_resource *r)
> > +{
> > +	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
> > +	struct list_head *add_pos = NULL;
> > +	struct rdt_hw_domain *hw_dom;
> > +	struct rdt_domain *d;
> > +	int err;
> > +
> > +	d = rdt_find_ctrldomain(&r->domains, id, &add_pos);
> > +	if (IS_ERR(d)) {
> > +		pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> 
> I am not sure here ... this generates identical error messages
> for control and monitor domain. How will the user know what failed?
> Also, how does printing id help? If I understand correctly it can
> only be -1 when the above prints.

Improved error messages in next version. Separate ones for failure in
get_domain_id_from_scope() and rdt_find_domain(). Each have "control"
or "monitor" in the message to help the user pinpoint exactly what
went wrong.

> > +		return;
> > +	}
> > +
> > +	if (d) {
> > +		cpumask_set_cpu(cpu, &d->cpu_mask);
> > +		if (r->cache.arch_has_per_cpu_cfg)
> > +			rdt_domain_reconfigure_cdp(r);
> > +		return;
> > +	}
> > +
> > +	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
> > +	if (!hw_dom)
> > +		return;
> > +
> > +	d = &hw_dom->d_resctrl;
> > +	d->id = id;
> > +	cpumask_set_cpu(cpu, &d->cpu_mask);
> > +
> > +	rdt_domain_reconfigure_cdp(r);
> > +
> > +	if (domain_setup_ctrlval(r, d)) {
> > +		domain_free(hw_dom);
> > +		return;
> > +	}
> > +
> > +	list_add_tail(&d->list, add_pos);
> > +
> > +	err = resctrl_online_ctrl_domain(r, d);
> > +	if (err) {
> > +		list_del(&d->list);
> > +		domain_free(hw_dom);
> > +	}
> > +}
> > +
> > +static void domain_add_cpu_mon(int cpu, struct rdt_resource *r)
> > +{
> > +	int id = get_domain_id_from_scope(cpu, r->mon_scope);
> > +	struct rdt_hw_domain *hw_mondom;
> > +	struct list_head *add_pos = NULL;
> 
> Please maintain reverse fir ordering. Please check all code.

I believe I got them all (for functions that are touched by this
series - there are a handful of places in resctrl code that don't
have reverse fir ordering - I didn't touch those).

> > +	struct rdt_domain *d;
> > +	int err;
> > +
> > +	d = rdt_find_mondomain(&r->mondomains, id, &add_pos);
> > +	if (IS_ERR(d)) {
> > +		pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> > +		return;
> > +	}
> > +
> > +	if (d) {
> > +		cpumask_set_cpu(cpu, &d->cpu_mask);
> > +
> 
> This is an unnecessary empty line that distracts from how the rest
> of the function looks.

Deleted it.

> 
> > +		return;
> > +	}
> > +
> > +	hw_mondom = kzalloc_node(sizeof(*hw_mondom), GFP_KERNEL, cpu_to_node(cpu));
> > +	if (!hw_mondom)
> > +		return;
> > +
> > +	d = &hw_mondom->d_resctrl;
> > +	d->id = id;
> > +	cpumask_set_cpu(cpu, &d->cpu_mask);
> > +
> > +	if (arch_domain_mbm_alloc(r->num_rmid, hw_mondom)) {
> > +		mondomain_free(hw_mondom);
> > +		return;
> > +	}
> > +
> > +	list_add_tail(&d->list, add_pos);
> > +
> > +	err = resctrl_online_mon_domain(r, d);
> > +	if (err) {
> > +		list_del(&d->list);
> > +		mondomain_free(hw_mondom);
> > +	}
> > +}
> > +
> >  /*
> >   * domain_add_cpu - Add a cpu to a resource's domain list.
> >   *
> 
> Note that this leaves the comments about list management here while all
> the list management code is moved away.

Moved the comment in front of rdt_find_domain() which does most of the
work. Small edit in the comment to say that callers do the rest.

> 
> > @@ -517,70 +624,28 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
> >   */
> >  static void domain_add_cpu(int cpu, struct rdt_resource *r)
> >  {
> > -	int id = get_domain_id_from_scope(cpu, r->scope);
> > -	struct list_head *add_pos = NULL;
> > -	struct rdt_hw_domain *hw_dom;
> > -	struct rdt_domain *d;
> > -	int err;
> > -
> > -	d = rdt_find_domain(r, id, &add_pos);
> > -	if (IS_ERR(d)) {
> > -		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
> > -		return;
> > -	}
> > -
> > -	if (d) {
> > -		cpumask_set_cpu(cpu, &d->cpu_mask);
> > -		if (r->cache.arch_has_per_cpu_cfg)
> > -			rdt_domain_reconfigure_cdp(r);
> > -		return;
> > -	}
> > -
> > -	hw_dom = kzalloc_node(sizeof(*hw_dom), GFP_KERNEL, cpu_to_node(cpu));
> > -	if (!hw_dom)
> > -		return;
> > -
> > -	d = &hw_dom->d_resctrl;
> > -	d->id = id;
> > -	cpumask_set_cpu(cpu, &d->cpu_mask);
> > -
> > -	rdt_domain_reconfigure_cdp(r);
> > -
> > -	if (r->alloc_capable && domain_setup_ctrlval(r, d)) {
> > -		domain_free(hw_dom);
> > -		return;
> > -	}
> > -
> > -	if (r->mon_capable && arch_domain_mbm_alloc(r->num_rmid, hw_dom)) {
> > -		domain_free(hw_dom);
> > -		return;
> > -	}
> > -
> > -	list_add_tail(&d->list, add_pos);
> > -
> > -	err = resctrl_online_domain(r, d);
> > -	if (err) {
> > -		list_del(&d->list);
> > -		domain_free(hw_dom);
> > -	}
> > +	if (r->alloc_capable)
> > +		domain_add_cpu_ctrl(cpu, r);
> > +	if (r->mon_capable)
> > +		domain_add_cpu_mon(cpu, r);
> >  }
> >  
> > -static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> > +static void domain_remove_cpu_ctrl(int cpu, struct rdt_resource *r)
> >  {
> > -	int id = get_domain_id_from_scope(cpu, r->scope);
> > +	int id = get_domain_id_from_scope(cpu, r->ctrl_scope);
> >  	struct rdt_hw_domain *hw_dom;
> >  	struct rdt_domain *d;
> >  
> > -	d = rdt_find_domain(r, id, NULL);
> > +	d = rdt_find_ctrldomain(&r->domains, id, NULL);
> >  	if (IS_ERR_OR_NULL(d)) {
> > -		pr_warn("Couldn't find cache id for CPU %d\n", cpu);
> > +		pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> >  		return;
> >  	}
> >  	hw_dom = resctrl_to_arch_dom(d);
> >  
> >  	cpumask_clear_cpu(cpu, &d->cpu_mask);
> >  	if (cpumask_empty(&d->cpu_mask)) {
> > -		resctrl_offline_domain(r, d);
> > +		resctrl_offline_ctrl_domain(r, d);
> >  		list_del(&d->list);
> >  
> >  		/*
> > @@ -593,6 +658,30 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
> >  
> >  		return;
> >  	}
> > +}
> > +
> > +static void domain_remove_cpu_mon(int cpu, struct rdt_resource *r)
> > +{
> > +	int id = get_domain_id_from_scope(cpu, r->mon_scope);
> > +	struct rdt_hw_domain *hw_mondom;
> > +	struct rdt_domain *d;
> > +
> > +	d = rdt_find_mondomain(&r->mondomains, id, NULL);
> > +	if (IS_ERR_OR_NULL(d)) {
> > +		pr_warn("Couldn't find scope id=%d for CPU %d\n", id, cpu);
> > +		return;
> > +	}
> > +	hw_mondom = resctrl_to_arch_dom(d);
> > +
> > +	cpumask_clear_cpu(cpu, &d->cpu_mask);
> > +	if (cpumask_empty(&d->cpu_mask)) {
> > +		resctrl_offline_mon_domain(r, d);
> > +		list_del(&d->list);
> > +
> 
> This is an awkward empty line.

Deleted it.

> 
> > +		mondomain_free(hw_mondom);
> > +
> > +		return;
> > +	}
> >  
> >  	if (r == &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl) {
> >  		if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
> 
> ...
> 
> Reinette

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ