lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <471ad597-c803-4f13-908e-17572aabe212@nvidia.com>
Date: Wed, 14 May 2025 17:10:16 -0700
From: Fenghua Yu <fenghuay@...dia.com>
To: James Morse <james.morse@....com>, x86@...nel.org,
 linux-kernel@...r.kernel.org
Cc: Reinette Chatre <reinette.chatre@...el.com>,
 Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
 Borislav Petkov <bp@...en8.de>, H Peter Anvin <hpa@...or.com>,
 Babu Moger <Babu.Moger@....com>, shameerali.kolothum.thodi@...wei.com,
 D Scott Phillips OS <scott@...amperecomputing.com>,
 carl@...amperecomputing.com, lcherian@...vell.com,
 bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
 baolin.wang@...ux.alibaba.com, Jamie Iles <quic_jiles@...cinc.com>,
 Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
 dfustini@...libre.com, amitsinght@...vell.com,
 David Hildenbrand <david@...hat.com>, Rex Nie <rex.nie@...uarmicro.com>,
 Dave Martin <dave.martin@....com>, Koba Ko <kobak@...dia.com>,
 Shanker Donthineni <sdonthineni@...dia.com>, Tony Luck
 <tony.luck@...el.com>, Shaopeng Tan <tan.shaopeng@...fujitsu.com>
Subject: Re: [PATCH v11 12/30] x86/resctrl: Expand the width of domid by
 replacing mon_data_bits


On 5/13/25 10:15, James Morse wrote:
> MPAM platforms retrieve the cache-id property from the ACPI PPTT table.
> The cache-id field is 32 bits wide. Under resctrl, the cache-id becomes
> the domain-id, and is packed into the mon_data_bits union bitfield.
> The width of cache-id in this field is 14 bits.
>
> Expanding the union would break 32bit x86 platforms as this union is
> stored as the kernfs kn->priv pointer. This saved allocating memory
> for the priv data storage.
>
> The firmware on MPAM platforms have used the PPTT cache-id field to
> expose the interconnect's id for the cache, which is sparse and uses
> more than 14 bits. Use of this id is to enable PCIe direct cache
> injection hints. Using this feature with VFIO means the value provided
> by the ACPI table should be exposed to user-space.
>
> To support cache-id values greater than 14 bits, convert the
> mon_data_bits union to a structure. These are shared between control
> and monitor groups, and are allocated on first use. The list of
> allocated struct mon_data is free'd when the filesystem is umount()ed.
>
> Co-developed-by: Tony Luck <tony.luck@...el.com>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> Signed-off-by: James Morse <james.morse@....com>
> Tested-by: Babu Moger <babu.moger@....com>
> Tested-by: Shaopeng Tan <tan.shaopeng@...fujitsu.com>
> Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>

Reviewed-by: Fenghua Yu <fenghuay@...dia.com>

Thanks.

-Fenghua

> ---
> Previously the MPAM tree repainted the cache-id to compact them,
> argue-ing there was no other user. With VFIO use of this PCIe feature,
> this is no longer an option.
>
> Changes since v9:
>   * Used more specific enum types for resid/rid and evtid.
>
> Changes since v8:
>   * Removed underscore in dom_id in the patch subject.
>   * Change types in struct mon_data to match the data.
>   * Renamed the global list to be more descriptive.
>   * Numerous comments were clarified.
>
> Changes since v7:
>   * Replaced with Tony Luck's list based version.
>
> Changes since v6:
>   * Added the get/put helpers.
>   * Special case the creation of the mondata files for the default control
>     group.
>   * Removed wording about files living longer than expected, the corresponding
>     error handling is wrapped in WARN_ON_ONCE() as this indicates a bug.
> ---
>   arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 24 ++++---
>   arch/x86/kernel/cpu/resctrl/internal.h    | 39 ++++++-----
>   arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 79 +++++++++++++++++++++--
>   3 files changed, 106 insertions(+), 36 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> index 0a0ac5f6112e..110b534d400c 100644
> --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> @@ -661,14 +661,15 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>   int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>   {
>   	struct kernfs_open_file *of = m->private;
> +	enum resctrl_res_level resid;
> +	enum resctrl_event_id evtid;
>   	struct rdt_domain_hdr *hdr;
>   	struct rmid_read rr = {0};
>   	struct rdt_mon_domain *d;
> -	u32 resid, evtid, domid;
>   	struct rdtgroup *rdtgrp;
>   	struct rdt_resource *r;
> -	union mon_data_bits md;
> -	int ret = 0;
> +	struct mon_data *md;
> +	int domid, ret = 0;
>   
>   	rdtgrp = rdtgroup_kn_lock_live(of->kn);
>   	if (!rdtgrp) {
> @@ -676,17 +677,22 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>   		goto out;
>   	}
>   
> -	md.priv = of->kn->priv;
> -	resid = md.u.rid;
> -	domid = md.u.domid;
> -	evtid = md.u.evtid;
> +	md = of->kn->priv;
> +	if (WARN_ON_ONCE(!md)) {
> +		ret = -EIO;
> +		goto out;
> +	}
> +
> +	resid = md->rid;
> +	domid = md->domid;
> +	evtid = md->evtid;
>   	r = resctrl_arch_get_resource(resid);
>   
> -	if (md.u.sum) {
> +	if (md->sum) {
>   		/*
>   		 * This file requires summing across all domains that share
>   		 * the L3 cache id that was provided in the "domid" field of the
> -		 * mon_data_bits union. Search all domains in the resource for
> +		 * struct mon_data. Search all domains in the resource for
>   		 * one that matches this cache id.
>   		 */
>   		list_for_each_entry(d, &r->mon_domains, hdr.list) {
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 576383a808a2..01cb0ff89c85 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -89,27 +89,26 @@ struct mon_evt {
>   };
>   
>   /**
> - * union mon_data_bits - Monitoring details for each event file.
> - * @priv:              Used to store monitoring event data in @u
> - *                     as kernfs private data.
> - * @u.rid:             Resource id associated with the event file.
> - * @u.evtid:           Event id associated with the event file.
> - * @u.sum:             Set when event must be summed across multiple
> - *                     domains.
> - * @u.domid:           When @u.sum is zero this is the domain to which
> - *                     the event file belongs. When @sum is one this
> - *                     is the id of the L3 cache that all domains to be
> - *                     summed share.
> - * @u:                 Name of the bit fields struct.
> + * struct mon_data - Monitoring details for each event file.
> + * @list:            Member of the global @mon_data_kn_priv_list list.
> + * @rid:             Resource id associated with the event file.
> + * @evtid:           Event id associated with the event file.
> + * @sum:             Set when event must be summed across multiple
> + *                   domains.
> + * @domid:           When @sum is zero this is the domain to which
> + *                   the event file belongs. When @sum is one this
> + *                   is the id of the L3 cache that all domains to be
> + *                   summed share.
> + *
> + * Pointed to by the kernfs kn->priv field of monitoring event files.
> + * Readers and writers must hold rdtgroup_mutex.
>    */
> -union mon_data_bits {
> -	void *priv;
> -	struct {
> -		unsigned int rid		: 10;
> -		enum resctrl_event_id evtid	: 7;
> -		unsigned int sum		: 1;
> -		unsigned int domid		: 14;
> -	} u;
> +struct mon_data {
> +	struct list_head	list;
> +	enum resctrl_res_level	rid;
> +	enum resctrl_event_id	evtid;
> +	int			domid;
> +	bool			sum;
>   };
>   
>   /**
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index e2999f668593..d48078410d77 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -45,6 +45,12 @@ LIST_HEAD(rdt_all_groups);
>   /* list of entries for the schemata file */
>   LIST_HEAD(resctrl_schema_all);
>   
> +/*
> + * List of struct mon_data containing private data of event files for use by
> + * rdtgroup_mondata_show(). Protected by rdtgroup_mutex.
> + */
> +static LIST_HEAD(mon_data_kn_priv_list);
> +
>   /* The filesystem can only be mounted once. */
>   bool resctrl_mounted;
>   
> @@ -3093,6 +3099,63 @@ static void rmdir_all_sub(void)
>   	kernfs_remove(kn_mondata);
>   }
>   
> +/**
> + * mon_get_kn_priv() - Get the mon_data priv data for this event.
> + *
> + * The same values are used across the mon_data directories of all control and
> + * monitor groups for the same event in the same domain. Keep a list of
> + * allocated structures and re-use an existing one with the same values for
> + * @rid, @domid, etc.
> + *
> + * @rid:    The resource id for the event file being created.
> + * @domid:  The domain id for the event file being created.
> + * @mevt:   The type of event file being created.
> + * @do_sum: Whether SNC summing monitors are being created.
> + */
> +static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
> +					struct mon_evt *mevt,
> +					bool do_sum)
> +{
> +	struct mon_data *priv;
> +
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
> +	list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
> +		if (priv->rid == rid && priv->domid == domid &&
> +		    priv->sum == do_sum && priv->evtid == mevt->evtid)
> +			return priv;
> +	}
> +
> +	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return NULL;
> +
> +	priv->rid = rid;
> +	priv->domid = domid;
> +	priv->sum = do_sum;
> +	priv->evtid = mevt->evtid;
> +	list_add_tail(&priv->list, &mon_data_kn_priv_list);
> +
> +	return priv;
> +}
> +
> +/**
> + * mon_put_kn_priv() - Free all allocated mon_data structures.
> + *
> + * Called when resctrl file system is unmounted.
> + */
> +static void mon_put_kn_priv(void)
> +{
> +	struct mon_data *priv, *tmp;
> +
> +	lockdep_assert_held(&rdtgroup_mutex);
> +
> +	list_for_each_entry_safe(priv, tmp, &mon_data_kn_priv_list, list) {
> +		list_del(&priv->list);
> +		kfree(priv);
> +	}
> +}
> +
>   static void resctrl_fs_teardown(void)
>   {
>   	lockdep_assert_held(&rdtgroup_mutex);
> @@ -3102,6 +3165,7 @@ static void resctrl_fs_teardown(void)
>   		return;
>   
>   	rmdir_all_sub();
> +	mon_put_kn_priv();
>   	rdt_pseudo_lock_release();
>   	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
>   	closid_exit();
> @@ -3208,19 +3272,20 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
>   			     bool do_sum)
>   {
>   	struct rmid_read rr = {0};
> -	union mon_data_bits priv;
> +	struct mon_data *priv;
>   	struct mon_evt *mevt;
> -	int ret;
> +	int ret, domid;
>   
>   	if (WARN_ON(list_empty(&r->evt_list)))
>   		return -EPERM;
>   
> -	priv.u.rid = r->rid;
> -	priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
> -	priv.u.sum = do_sum;
>   	list_for_each_entry(mevt, &r->evt_list, list) {
> -		priv.u.evtid = mevt->evtid;
> -		ret = mon_addfile(kn, mevt->name, priv.priv);
> +		domid = do_sum ? d->ci->id : d->hdr.id;
> +		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
> +		if (WARN_ON_ONCE(!priv))
> +			return -EINVAL;
> +
> +		ret = mon_addfile(kn, mevt->name, priv);
>   		if (ret)
>   			return ret;
>   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ