[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <471ad597-c803-4f13-908e-17572aabe212@nvidia.com>
Date: Wed, 14 May 2025 17:10:16 -0700
From: Fenghua Yu <fenghuay@...dia.com>
To: James Morse <james.morse@....com>, x86@...nel.org,
linux-kernel@...r.kernel.org
Cc: Reinette Chatre <reinette.chatre@...el.com>,
Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>, H Peter Anvin <hpa@...or.com>,
Babu Moger <Babu.Moger@....com>, shameerali.kolothum.thodi@...wei.com,
D Scott Phillips OS <scott@...amperecomputing.com>,
carl@...amperecomputing.com, lcherian@...vell.com,
bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
baolin.wang@...ux.alibaba.com, Jamie Iles <quic_jiles@...cinc.com>,
Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
dfustini@...libre.com, amitsinght@...vell.com,
David Hildenbrand <david@...hat.com>, Rex Nie <rex.nie@...uarmicro.com>,
Dave Martin <dave.martin@....com>, Koba Ko <kobak@...dia.com>,
Shanker Donthineni <sdonthineni@...dia.com>, Tony Luck
<tony.luck@...el.com>, Shaopeng Tan <tan.shaopeng@...fujitsu.com>
Subject: Re: [PATCH v11 12/30] x86/resctrl: Expand the width of domid by
replacing mon_data_bits
On 5/13/25 10:15, James Morse wrote:
> MPAM platforms retrieve the cache-id property from the ACPI PPTT table.
> The cache-id field is 32 bits wide. Under resctrl, the cache-id becomes
> the domain-id, and is packed into the mon_data_bits union bitfield.
> The width of cache-id in this field is 14 bits.
>
> Expanding the union would break 32bit x86 platforms as this union is
> stored as the kernfs kn->priv pointer. This saved allocating memory
> for the priv data storage.
>
> The firmware on MPAM platforms have used the PPTT cache-id field to
> expose the interconnect's id for the cache, which is sparse and uses
> more than 14 bits. Use of this id is to enable PCIe direct cache
> injection hints. Using this feature with VFIO means the value provided
> by the ACPI table should be exposed to user-space.
>
> To support cache-id values greater than 14 bits, convert the
> mon_data_bits union to a structure. These are shared between control
> and monitor groups, and are allocated on first use. The list of
> allocated struct mon_data is free'd when the filesystem is umount()ed.
>
> Co-developed-by: Tony Luck <tony.luck@...el.com>
> Signed-off-by: Tony Luck <tony.luck@...el.com>
> Signed-off-by: James Morse <james.morse@....com>
> Tested-by: Babu Moger <babu.moger@....com>
> Tested-by: Shaopeng Tan <tan.shaopeng@...fujitsu.com>
> Reviewed-by: Reinette Chatre <reinette.chatre@...el.com>
Reviewed-by: Fenghua Yu <fenghuay@...dia.com>
Thanks.
-Fenghua
> ---
> Previously the MPAM tree repainted the cache-id to compact them,
> argue-ing there was no other user. With VFIO use of this PCIe feature,
> this is no longer an option.
>
> Changes since v9:
> * Used more specific enum types for resid/rid and evtid.
>
> Changes since v8:
> * Removed underscore in dom_id in the patch subject.
> * Change types in struct mon_data to match the data.
> * Renamed the global list to be more descriptive.
> * Numerous comments were clarified.
>
> Changes since v7:
> * Replaced with Tony Luck's list based version.
>
> Changes since v6:
> * Added the get/put helpers.
> * Special case the creation of the mondata files for the default control
> group.
> * Removed wording about files living longer than expected, the corresponding
> error handling is wrapped in WARN_ON_ONCE() as this indicates a bug.
> ---
> arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 24 ++++---
> arch/x86/kernel/cpu/resctrl/internal.h | 39 ++++++-----
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 79 +++++++++++++++++++++--
> 3 files changed, 106 insertions(+), 36 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> index 0a0ac5f6112e..110b534d400c 100644
> --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
> @@ -661,14 +661,15 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
> int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> {
> struct kernfs_open_file *of = m->private;
> + enum resctrl_res_level resid;
> + enum resctrl_event_id evtid;
> struct rdt_domain_hdr *hdr;
> struct rmid_read rr = {0};
> struct rdt_mon_domain *d;
> - u32 resid, evtid, domid;
> struct rdtgroup *rdtgrp;
> struct rdt_resource *r;
> - union mon_data_bits md;
> - int ret = 0;
> + struct mon_data *md;
> + int domid, ret = 0;
>
> rdtgrp = rdtgroup_kn_lock_live(of->kn);
> if (!rdtgrp) {
> @@ -676,17 +677,22 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
> goto out;
> }
>
> - md.priv = of->kn->priv;
> - resid = md.u.rid;
> - domid = md.u.domid;
> - evtid = md.u.evtid;
> + md = of->kn->priv;
> + if (WARN_ON_ONCE(!md)) {
> + ret = -EIO;
> + goto out;
> + }
> +
> + resid = md->rid;
> + domid = md->domid;
> + evtid = md->evtid;
> r = resctrl_arch_get_resource(resid);
>
> - if (md.u.sum) {
> + if (md->sum) {
> /*
> * This file requires summing across all domains that share
> * the L3 cache id that was provided in the "domid" field of the
> - * mon_data_bits union. Search all domains in the resource for
> + * struct mon_data. Search all domains in the resource for
> * one that matches this cache id.
> */
> list_for_each_entry(d, &r->mon_domains, hdr.list) {
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 576383a808a2..01cb0ff89c85 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -89,27 +89,26 @@ struct mon_evt {
> };
>
> /**
> - * union mon_data_bits - Monitoring details for each event file.
> - * @priv: Used to store monitoring event data in @u
> - * as kernfs private data.
> - * @u.rid: Resource id associated with the event file.
> - * @u.evtid: Event id associated with the event file.
> - * @u.sum: Set when event must be summed across multiple
> - * domains.
> - * @u.domid: When @u.sum is zero this is the domain to which
> - * the event file belongs. When @sum is one this
> - * is the id of the L3 cache that all domains to be
> - * summed share.
> - * @u: Name of the bit fields struct.
> + * struct mon_data - Monitoring details for each event file.
> + * @list: Member of the global @mon_data_kn_priv_list list.
> + * @rid: Resource id associated with the event file.
> + * @evtid: Event id associated with the event file.
> + * @sum: Set when event must be summed across multiple
> + * domains.
> + * @domid: When @sum is zero this is the domain to which
> + * the event file belongs. When @sum is one this
> + * is the id of the L3 cache that all domains to be
> + * summed share.
> + *
> + * Pointed to by the kernfs kn->priv field of monitoring event files.
> + * Readers and writers must hold rdtgroup_mutex.
> */
> -union mon_data_bits {
> - void *priv;
> - struct {
> - unsigned int rid : 10;
> - enum resctrl_event_id evtid : 7;
> - unsigned int sum : 1;
> - unsigned int domid : 14;
> - } u;
> +struct mon_data {
> + struct list_head list;
> + enum resctrl_res_level rid;
> + enum resctrl_event_id evtid;
> + int domid;
> + bool sum;
> };
>
> /**
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index e2999f668593..d48078410d77 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -45,6 +45,12 @@ LIST_HEAD(rdt_all_groups);
> /* list of entries for the schemata file */
> LIST_HEAD(resctrl_schema_all);
>
> +/*
> + * List of struct mon_data containing private data of event files for use by
> + * rdtgroup_mondata_show(). Protected by rdtgroup_mutex.
> + */
> +static LIST_HEAD(mon_data_kn_priv_list);
> +
> /* The filesystem can only be mounted once. */
> bool resctrl_mounted;
>
> @@ -3093,6 +3099,63 @@ static void rmdir_all_sub(void)
> kernfs_remove(kn_mondata);
> }
>
> +/**
> + * mon_get_kn_priv() - Get the mon_data priv data for this event.
> + *
> + * The same values are used across the mon_data directories of all control and
> + * monitor groups for the same event in the same domain. Keep a list of
> + * allocated structures and re-use an existing one with the same values for
> + * @rid, @domid, etc.
> + *
> + * @rid: The resource id for the event file being created.
> + * @domid: The domain id for the event file being created.
> + * @mevt: The type of event file being created.
> + * @do_sum: Whether SNC summing monitors are being created.
> + */
> +static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
> + struct mon_evt *mevt,
> + bool do_sum)
> +{
> + struct mon_data *priv;
> +
> + lockdep_assert_held(&rdtgroup_mutex);
> +
> + list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
> + if (priv->rid == rid && priv->domid == domid &&
> + priv->sum == do_sum && priv->evtid == mevt->evtid)
> + return priv;
> + }
> +
> + priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> + if (!priv)
> + return NULL;
> +
> + priv->rid = rid;
> + priv->domid = domid;
> + priv->sum = do_sum;
> + priv->evtid = mevt->evtid;
> + list_add_tail(&priv->list, &mon_data_kn_priv_list);
> +
> + return priv;
> +}
> +
> +/**
> + * mon_put_kn_priv() - Free all allocated mon_data structures.
> + *
> + * Called when resctrl file system is unmounted.
> + */
> +static void mon_put_kn_priv(void)
> +{
> + struct mon_data *priv, *tmp;
> +
> + lockdep_assert_held(&rdtgroup_mutex);
> +
> + list_for_each_entry_safe(priv, tmp, &mon_data_kn_priv_list, list) {
> + list_del(&priv->list);
> + kfree(priv);
> + }
> +}
> +
> static void resctrl_fs_teardown(void)
> {
> lockdep_assert_held(&rdtgroup_mutex);
> @@ -3102,6 +3165,7 @@ static void resctrl_fs_teardown(void)
> return;
>
> rmdir_all_sub();
> + mon_put_kn_priv();
> rdt_pseudo_lock_release();
> rdtgroup_default.mode = RDT_MODE_SHAREABLE;
> closid_exit();
> @@ -3208,19 +3272,20 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
> bool do_sum)
> {
> struct rmid_read rr = {0};
> - union mon_data_bits priv;
> + struct mon_data *priv;
> struct mon_evt *mevt;
> - int ret;
> + int ret, domid;
>
> if (WARN_ON(list_empty(&r->evt_list)))
> return -EPERM;
>
> - priv.u.rid = r->rid;
> - priv.u.domid = do_sum ? d->ci->id : d->hdr.id;
> - priv.u.sum = do_sum;
> list_for_each_entry(mevt, &r->evt_list, list) {
> - priv.u.evtid = mevt->evtid;
> - ret = mon_addfile(kn, mevt->name, priv.priv);
> + domid = do_sum ? d->ci->id : d->hdr.id;
> + priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
> + if (WARN_ON_ONCE(!priv))
> + return -EINVAL;
> +
> + ret = mon_addfile(kn, mevt->name, priv);
> if (ret)
> return ret;
>
Powered by blists - more mailing lists