[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a0afaae6-0b7c-4a94-9b2a-b09c8016922a@nvidia.com>
Date: Fri, 3 Oct 2025 09:54:53 -0700
From: Fenghua Yu <fenghuay@...dia.com>
To: James Morse <james.morse@....com>, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-acpi@...r.kernel.org
Cc: D Scott Phillips OS <scott@...amperecomputing.com>,
carl@...amperecomputing.com, lcherian@...vell.com,
bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
baolin.wang@...ux.alibaba.com, Jamie Iles <quic_jiles@...cinc.com>,
Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
dfustini@...libre.com, amitsinght@...vell.com,
David Hildenbrand <david@...hat.com>, Dave Martin <dave.martin@....com>,
Koba Ko <kobak@...dia.com>, Shanker Donthineni <sdonthineni@...dia.com>,
baisheng.gao@...soc.com, Jonathan Cameron <jonathan.cameron@...wei.com>,
Rob Herring <robh@...nel.org>, Rohit Mathew <rohit.mathew@....com>,
Rafael Wysocki <rafael@...nel.org>, Len Brown <lenb@...nel.org>,
Lorenzo Pieralisi <lpieralisi@...nel.org>, Hanjun Guo
<guohanjun@...wei.com>, Sudeep Holla <sudeep.holla@....com>,
Catalin Marinas <catalin.marinas@....com>, Will Deacon <will@...nel.org>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Danilo Krummrich <dakr@...nel.org>, Ben Horgan <ben.horgan@....com>
Subject: Re: [PATCH v2 08/29] arm_mpam: Add the class and component structures
for firmware described ris
Hi, James,
On 9/10/25 13:42, James Morse wrote:
> An MSC is a container of resources, each identified by their RIS index.
> Some RIS are described by firmware to provide their position in the system.
> Others are discovered when the driver probes the hardware.
>
> To configure a resource it needs to be found by its class, e.g. 'L2'.
> There are two kinds of grouping, a class is a set of components, which
> are visible to user-space as there are likely to be multiple instances
> of the L2 cache. (e.g. one per cluster or package)
>
> Add support for creating and destroying structures to allow a hierarchy
> of resources to be created.
>
> CC: Ben Horgan <ben.horgan@....com>
> Signed-off-by: James Morse <james.morse@....com>
> ---
> Changes since v1:
> * Fixed a comp/vmsc typo.
> * Removed duplicate description from the commit message.
> * Moved parenthesis in the add_to_garbage() macro.
> * Check for out of range ris_idx when creating ris.
> * Removed GFP as probe_lock is no longer a spin lock.
> * Removed alloc flag as ended up searching the lists itself.
> * Added a comment about affinity masks not overlapping.
>
> Changes since RFC:
> * removed a pr_err() debug message that crept in.
> ---
> drivers/resctrl/mpam_devices.c | 406 +++++++++++++++++++++++++++++++-
> drivers/resctrl/mpam_internal.h | 90 +++++++
> include/linux/arm_mpam.h | 8 +-
> 3 files changed, 493 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index efc4738e3b4d..c7f4981b3545 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -18,7 +18,6 @@
> #include <linux/printk.h>
> #include <linux/slab.h>
> #include <linux/spinlock.h>
> -#include <linux/srcu.h>
> #include <linux/types.h>
>
> #include "mpam_internal.h"
> @@ -31,7 +30,7 @@
> static DEFINE_MUTEX(mpam_list_lock);
> static LIST_HEAD(mpam_all_msc);
>
> -static struct srcu_struct mpam_srcu;
> +struct srcu_struct mpam_srcu;
>
> /*
> * Number of MSCs that have been probed. Once all MSC have been probed MPAM
> @@ -39,6 +38,402 @@ static struct srcu_struct mpam_srcu;
> */
> static atomic_t mpam_num_msc;
>
> +/*
> + * An MSC is a physical container for controls and monitors, each identified by
> + * their RIS index. These share a base-address, interrupts and some MMIO
> + * registers. A vMSC is a virtual container for RIS in an MSC that control or
> + * monitor the same thing. Members of a vMSC are all RIS in the same MSC, but
> + * not all RIS in an MSC share a vMSC.
> + * Components are a group of vMSC that control or monitor the same thing but
> + * are from different MSC, so have different base-address, interrupts etc.
> + * Classes are the set components of the same type.
> + *
> + * The features of a vMSC is the union of the RIS it contains.
> + * The features of a Class and Component are the common subset of the vMSC
> + * they contain.
> + *
> + * e.g. The system cache may have bandwidth controls on multiple interfaces,
> + * for regulating traffic from devices independently of traffic from CPUs.
> + * If these are two RIS in one MSC, they will be treated as controlling
> + * different things, and will not share a vMSC/component/class.
> + *
> + * e.g. The L2 may have one MSC and two RIS, one for cache-controls another
> + * for bandwidth. These two RIS are members of the same vMSC.
> + *
> + * e.g. The set of RIS that make up the L2 are grouped as a component. These
> + * are sometimes termed slices. They should be configured the same, as if there
> + * were only one.
> + *
> + * e.g. The SoC probably has more than one L2, each attached to a distinct set
> + * of CPUs. All the L2 components are grouped as a class.
> + *
> + * When creating an MSC, struct mpam_msc is added to the all mpam_all_msc list,
> + * then linked via struct mpam_ris to a vmsc, component and class.
> + * The same MSC may exist under different class->component->vmsc paths, but the
> + * RIS index will be unique.
> + */
> +LIST_HEAD(mpam_classes);
> +
> +/* List of all objects that can be free()d after synchronise_srcu() */
> +static LLIST_HEAD(mpam_garbage);
> +
> +#define init_garbage(x) init_llist_node(&(x)->garbage.llist)
> +
> +static struct mpam_vmsc *
> +mpam_vmsc_alloc(struct mpam_component *comp, struct mpam_msc *msc)
> +{
> + struct mpam_vmsc *vmsc;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + vmsc = kzalloc(sizeof(*vmsc), GFP_KERNEL);
> + if (!vmsc)
> + return ERR_PTR(-ENOMEM);
> + init_garbage(vmsc);
> +
> + INIT_LIST_HEAD_RCU(&vmsc->ris);
> + INIT_LIST_HEAD_RCU(&vmsc->comp_list);
> + vmsc->comp = comp;
> + vmsc->msc = msc;
> +
> + list_add_rcu(&vmsc->comp_list, &comp->vmsc);
> +
> + return vmsc;
> +}
> +
> +static struct mpam_vmsc *mpam_vmsc_get(struct mpam_component *comp,
> + struct mpam_msc *msc)
> +{
> + struct mpam_vmsc *vmsc;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
> + if (vmsc->msc->id == msc->id)
> + return vmsc;
> + }
> +
> + return mpam_vmsc_alloc(comp, msc);
> +}
> +
> +static struct mpam_component *
> +mpam_component_alloc(struct mpam_class *class, int id)
> +{
> + struct mpam_component *comp;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + comp = kzalloc(sizeof(*comp), GFP_KERNEL);
> + if (!comp)
> + return ERR_PTR(-ENOMEM);
> + init_garbage(comp);
> +
> + comp->comp_id = id;
> + INIT_LIST_HEAD_RCU(&comp->vmsc);
> + /* affinity is updated when ris are added */
> + INIT_LIST_HEAD_RCU(&comp->class_list);
> + comp->class = class;
> +
> + list_add_rcu(&comp->class_list, &class->components);
> +
> + return comp;
> +}
> +
> +static struct mpam_component *
> +mpam_component_get(struct mpam_class *class, int id)
> +{
> + struct mpam_component *comp;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_for_each_entry(comp, &class->components, class_list) {
> + if (comp->comp_id == id)
> + return comp;
> + }
> +
> + return mpam_component_alloc(class, id);
> +}
> +
> +static struct mpam_class *
> +mpam_class_alloc(u8 level_idx, enum mpam_class_types type)
> +{
> + struct mpam_class *class;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + class = kzalloc(sizeof(*class), GFP_KERNEL);
> + if (!class)
> + return ERR_PTR(-ENOMEM);
> + init_garbage(class);
> +
> + INIT_LIST_HEAD_RCU(&class->components);
> + /* affinity is updated when ris are added */
> + class->level = level_idx;
> + class->type = type;
> + INIT_LIST_HEAD_RCU(&class->classes_list);
> +
> + list_add_rcu(&class->classes_list, &mpam_classes);
> +
> + return class;
> +}
> +
> +static struct mpam_class *
> +mpam_class_get(u8 level_idx, enum mpam_class_types type)
> +{
> + bool found = false;
> + struct mpam_class *class;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_for_each_entry(class, &mpam_classes, classes_list) {
> + if (class->type == type && class->level == level_idx) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (found)
> + return class;
> +
> + return mpam_class_alloc(level_idx, type);
> +}
> +
> +#define add_to_garbage(x) \
> +do { \
> + __typeof__(x) _x = (x); \
> + _x->garbage.to_free = _x; \
> + llist_add(&_x->garbage.llist, &mpam_garbage); \
> +} while (0)
> +
> +static void mpam_class_destroy(struct mpam_class *class)
> +{
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_del_rcu(&class->classes_list);
> + add_to_garbage(class);
> +}
> +
> +static void mpam_comp_destroy(struct mpam_component *comp)
> +{
> + struct mpam_class *class = comp->class;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_del_rcu(&comp->class_list);
> + add_to_garbage(comp);
> +
> + if (list_empty(&class->components))
> + mpam_class_destroy(class);
> +}
> +
> +static void mpam_vmsc_destroy(struct mpam_vmsc *vmsc)
> +{
> + struct mpam_component *comp = vmsc->comp;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_del_rcu(&vmsc->comp_list);
> + add_to_garbage(vmsc);
> +
> + if (list_empty(&comp->vmsc))
> + mpam_comp_destroy(comp);
> +}
> +
> +static void mpam_ris_destroy(struct mpam_msc_ris *ris)
> +{
> + struct mpam_vmsc *vmsc = ris->vmsc;
> + struct mpam_msc *msc = vmsc->msc;
> + struct platform_device *pdev = msc->pdev;
> + struct mpam_component *comp = vmsc->comp;
> + struct mpam_class *class = comp->class;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + /*
> + * It is assumed affinities don't overlap. If they do the class becomes
> + * unusable immediately.
> + */
> + cpumask_andnot(&comp->affinity, &comp->affinity, &ris->affinity);
> + cpumask_andnot(&class->affinity, &class->affinity, &ris->affinity);
> + clear_bit(ris->ris_idx, &msc->ris_idxs);
> + list_del_rcu(&ris->vmsc_list);
> + list_del_rcu(&ris->msc_list);
> + add_to_garbage(ris);
> + ris->garbage.pdev = pdev;
> +
> + if (list_empty(&vmsc->ris))
> + mpam_vmsc_destroy(vmsc);
> +}
> +
> +/*
> + * There are two ways of reaching a struct mpam_msc_ris. Via the
> + * class->component->vmsc->ris, or via the msc.
> + * When destroying the msc, the other side needs unlinking and cleaning up too.
> + */
> +static void mpam_msc_destroy(struct mpam_msc *msc)
> +{
> + struct platform_device *pdev = msc->pdev;
> + struct mpam_msc_ris *ris, *tmp;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + list_for_each_entry_safe(ris, tmp, &msc->ris, msc_list)
> + mpam_ris_destroy(ris);
> +
> + list_del_rcu(&msc->all_msc_list);
> + platform_set_drvdata(pdev, NULL);
> +
> + add_to_garbage(msc);
> + msc->garbage.pdev = pdev;
> +}
> +
> +static void mpam_free_garbage(void)
> +{
> + struct mpam_garbage *iter, *tmp;
> + struct llist_node *to_free = llist_del_all(&mpam_garbage);
> +
Should this be protected by mpam_list_lock and check if the lock is held?
+ lockdep_assert_held(&mpam_list_lock);
Multiple threads may add and free garbage in parallel. Please see later free_garbage() is not protected by any lock.
> + if (!to_free)
> + return;
> +
> + synchronize_srcu(&mpam_srcu);
> +
> + llist_for_each_entry_safe(iter, tmp, to_free, llist) {
> + if (iter->pdev)
> + devm_kfree(&iter->pdev->dev, iter->to_free);
> + else
> + kfree(iter->to_free);
> + }
> +}
> +
> +/*
> + * The cacheinfo structures are only populated when CPUs are online.
> + */
> +int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
> + cpumask_t *affinity)
> +{
> + return acpi_pptt_get_cpumask_from_cache_id(cache_id, affinity);
> +}
> +
> +/*
> + * cpumask_of_node() only knows about online CPUs. This can't tell us whether
> + * a class is represented on all possible CPUs.
> + */
> +static void get_cpumask_from_node_id(u32 node_id, cpumask_t *affinity)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + if (node_id == cpu_to_node(cpu))
> + cpumask_set_cpu(cpu, affinity);
> + }
> +}
> +
> +static int mpam_ris_get_affinity(struct mpam_msc *msc, cpumask_t *affinity,
> + enum mpam_class_types type,
> + struct mpam_class *class,
> + struct mpam_component *comp)
> +{
> + int err;
> +
> + switch (type) {
> + case MPAM_CLASS_CACHE:
> + err = mpam_get_cpumask_from_cache_id(comp->comp_id, class->level,
> + affinity);
> + if (err)
> + return err;
> +
> + if (cpumask_empty(affinity))
> + pr_warn_once("%s no CPUs associated with cache node",
> + dev_name(&msc->pdev->dev));
> +
> + break;
> + case MPAM_CLASS_MEMORY:
> + get_cpumask_from_node_id(comp->comp_id, affinity);
> + /* affinity may be empty for CPU-less memory nodes */
> + break;
> + case MPAM_CLASS_UNKNOWN:
> + return 0;
> + }
> +
> + cpumask_and(affinity, affinity, &msc->accessibility);
> +
> + return 0;
> +}
> +
> +static int mpam_ris_create_locked(struct mpam_msc *msc, u8 ris_idx,
> + enum mpam_class_types type, u8 class_id,
> + int component_id)
> +{
> + int err;
> + struct mpam_vmsc *vmsc;
> + struct mpam_msc_ris *ris;
> + struct mpam_class *class;
> + struct mpam_component *comp;
> +
> + lockdep_assert_held(&mpam_list_lock);
> +
> + if (ris_idx > MPAM_MSC_MAX_NUM_RIS)
> + return -EINVAL;
> +
> + if (test_and_set_bit(ris_idx, &msc->ris_idxs))
> + return -EBUSY;
> +
Should setting msc->ris_idxs bit be moved to the end of this function
after all error handling paths? The reason is this bit is better to be 0
(or recovered) if any error happens. It's hard to recover it to 0 for
each error handling. The easiest way is to set it at the end of the
function.
> + ris = devm_kzalloc(&msc->pdev->dev, sizeof(*ris), GFP_KERNEL);
> + if (!ris)
> + return -ENOMEM;
> + init_garbage(ris);
> +
> + class = mpam_class_get(class_id, type);
> + if (IS_ERR(class))
> + return PTR_ERR(class);
> +
> + comp = mpam_component_get(class, component_id);
> + if (IS_ERR(comp)) {
> + if (list_empty(&class->components))
> + mpam_class_destroy(class);
> + return PTR_ERR(comp);
> + }
> +
> + vmsc = mpam_vmsc_get(comp, msc);
> + if (IS_ERR(vmsc)) {
> + if (list_empty(&comp->vmsc))
> + mpam_comp_destroy(comp);
> + return PTR_ERR(vmsc);
> + }
> +
> + err = mpam_ris_get_affinity(msc, &ris->affinity, type, class, comp);
> + if (err) {
> + if (list_empty(&vmsc->ris))
> + mpam_vmsc_destroy(vmsc);
> + return err;
> + }
> +
> + ris->ris_idx = ris_idx;
> + INIT_LIST_HEAD_RCU(&ris->vmsc_list);
vmsc_list will be used but not initialized. Missing
INIT_LIST_HEAD_RCU(&ris->msc_list) here?
> + ris->vmsc = vmsc;
> +
> + cpumask_or(&comp->affinity, &comp->affinity, &ris->affinity);
> + cpumask_or(&class->affinity, &class->affinity, &ris->affinity);
> + list_add_rcu(&ris->vmsc_list, &vmsc->ris);
> +
Setting the msc->ris_idxs here is better to avoid to clear it in each
error handling path.
> + return 0;
> +}
> +
> +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
> + enum mpam_class_types type, u8 class_id, int component_id)
> +{
> + int err;
> +
> + mutex_lock(&mpam_list_lock);
> + err = mpam_ris_create_locked(msc, ris_idx, type, class_id,
> + component_id);
> + mutex_unlock(&mpam_list_lock);
> + if (err)
> + mpam_free_garbage();
> +
> + return err;
> +}
> +
> /*
> * An MSC can control traffic from a set of CPUs, but may only be accessible
> * from a (hopefully wider) set of CPUs. The common reason for this is power
> @@ -74,10 +469,10 @@ static void mpam_msc_drv_remove(struct platform_device *pdev)
> return;
>
> mutex_lock(&mpam_list_lock);
> - platform_set_drvdata(pdev, NULL);
> - list_del_rcu(&msc->all_msc_list);
> - synchronize_srcu(&mpam_srcu);
> + mpam_msc_destroy(msc);
> mutex_unlock(&mpam_list_lock);
> +
> + mpam_free_garbage();
Should mpam_free_garbage() be protected by mpam_list_lock? It may race
with adding garbage. I can see other adding and freeing garbage are
protected by mpam_list_lock but not this one.
> }
>
> static int mpam_msc_drv_probe(struct platform_device *pdev)
> @@ -95,6 +490,7 @@ static int mpam_msc_drv_probe(struct platform_device *pdev)
> err = -ENOMEM;
> break;
> }
> + init_garbage(msc);
>
> mutex_init(&msc->probe_lock);
> mutex_init(&msc->part_sel_lock);
> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
> index 7c63d590fc98..02e9576ece6b 100644
> --- a/drivers/resctrl/mpam_internal.h
> +++ b/drivers/resctrl/mpam_internal.h
> @@ -7,10 +7,29 @@
> #include <linux/arm_mpam.h>
> #include <linux/cpumask.h>
> #include <linux/io.h>
> +#include <linux/llist.h>
> #include <linux/mailbox_client.h>
> #include <linux/mutex.h>
> #include <linux/resctrl.h>
> #include <linux/sizes.h>
> +#include <linux/srcu.h>
> +
> +#define MPAM_MSC_MAX_NUM_RIS 16
> +
> +/*
> + * Structures protected by SRCU may not be freed for a surprising amount of
> + * time (especially if perf is running). To ensure the MPAM error interrupt can
> + * tear down all the structures, build a list of objects that can be gargbage
> + * collected once synchronize_srcu() has returned.
> + * If pdev is non-NULL, use devm_kfree().
> + */
> +struct mpam_garbage {
> + /* member of mpam_garbage */
> + struct llist_node llist;
> +
> + void *to_free;
> + struct platform_device *pdev;
> +};
>
> struct mpam_msc {
> /* member of mpam_all_msc */
> @@ -57,8 +76,79 @@ struct mpam_msc {
>
> void __iomem *mapped_hwpage;
> size_t mapped_hwpage_sz;
> +
> + struct mpam_garbage garbage;
> };
>
> +struct mpam_class {
> + /* mpam_components in this class */
> + struct list_head components;
> +
> + cpumask_t affinity;
> +
> + u8 level;
> + enum mpam_class_types type;
> +
> + /* member of mpam_classes */
> + struct list_head classes_list;
> +
> + struct mpam_garbage garbage;
> +};
> +
> +struct mpam_component {
> + u32 comp_id;
> +
> + /* mpam_vmsc in this component */
> + struct list_head vmsc;
> +
> + cpumask_t affinity;
> +
> + /* member of mpam_class:components */
> + struct list_head class_list;
> +
> + /* parent: */
> + struct mpam_class *class;
> +
> + struct mpam_garbage garbage;
> +};
> +
> +struct mpam_vmsc {
> + /* member of mpam_component:vmsc_list */
> + struct list_head comp_list;
> +
> + /* mpam_msc_ris in this dvmsc */
> + struct list_head ris;
> +
> + /* All RIS in this vMSC are members of this MSC */
> + struct mpam_msc *msc;
> +
> + /* parent: */
> + struct mpam_component *comp;
> +
> + struct mpam_garbage garbage;
> +};
> +
> +struct mpam_msc_ris {
> + u8 ris_idx;
> +
> + cpumask_t affinity;
> +
> + /* member of mpam_vmsc:ris */
> + struct list_head vmsc_list;
> +
> + /* member of mpam_msc:ris */
> + struct list_head msc_list;
> +
> + /* parent: */
> + struct mpam_vmsc *vmsc;
> +
> + struct mpam_garbage garbage;
> +};
> +
> +/* List of all classes - protected by srcu*/
> +extern struct srcu_struct mpam_srcu;
> +extern struct list_head mpam_classes;
> +
> int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level,
> cpumask_t *affinity);
>
> diff --git a/include/linux/arm_mpam.h b/include/linux/arm_mpam.h
> index 3d6c39c667c3..3206f5ddc147 100644
> --- a/include/linux/arm_mpam.h
> +++ b/include/linux/arm_mpam.h
> @@ -38,11 +38,7 @@ static inline int acpi_mpam_parse_resources(struct mpam_msc *msc,
> static inline int acpi_mpam_count_msc(void) { return -EINVAL; }
> #endif
>
> -static inline int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
> - enum mpam_class_types type, u8 class_id,
> - int component_id)
> -{
> - return -EINVAL;
> -}
> +int mpam_ris_create(struct mpam_msc *msc, u8 ris_idx,
> + enum mpam_class_types type, u8 class_id, int component_id);
>
> #endif /* __LINUX_ARM_MPAM_H */
Thanks.
-Fenghua
Powered by blists - more mailing lists