[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3359b947-6adb-4d77-97e1-5abb0b9d2a4e@nvidia.com>
Date: Wed, 24 Sep 2025 23:53:42 -0700
From: Fenghua Yu <fenghuay@...dia.com>
To: James Morse <james.morse@....com>, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-acpi@...r.kernel.org
Cc: D Scott Phillips OS <scott@...amperecomputing.com>,
carl@...amperecomputing.com, lcherian@...vell.com,
bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
baolin.wang@...ux.alibaba.com, Jamie Iles <quic_jiles@...cinc.com>,
Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
dfustini@...libre.com, amitsinght@...vell.com,
David Hildenbrand <david@...hat.com>, Dave Martin <dave.martin@....com>,
Koba Ko <kobak@...dia.com>, Shanker Donthineni <sdonthineni@...dia.com>,
baisheng.gao@...soc.com, Jonathan Cameron <jonathan.cameron@...wei.com>,
Rob Herring <robh@...nel.org>, Rohit Mathew <rohit.mathew@....com>,
Rafael Wysocki <rafael@...nel.org>, Len Brown <lenb@...nel.org>,
Lorenzo Pieralisi <lpieralisi@...nel.org>, Hanjun Guo
<guohanjun@...wei.com>, Sudeep Holla <sudeep.holla@....com>,
Catalin Marinas <catalin.marinas@....com>, Will Deacon <will@...nel.org>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Danilo Krummrich <dakr@...nel.org>
Subject: Re: [PATCH v2 20/29] arm_mpam: Allow configuration to be applied and
restored during cpu online
Hi, James,
On 9/10/25 13:43, James Morse wrote:
> When CPUs come online the MSC's original configuration should be restored.
>
> Add struct mpam_config to hold the configuration. This has a bitmap of
> features that were modified. Once the maximum partid is known, allocate
> a configuration array for each component, and reprogram each RIS
> configuration from this.
>
> CC: Dave Martin <Dave.Martin@....com>
> Signed-off-by: James Morse <james.morse@....com>
> ---
> Changes since v1:
> * Switched entry_rcu to srcu versions.
>
> Changes since RFC:
> * Added a comment about the ordering around max_partid.
> * Allocate configurations after interrupts are registered to reduce churn.
> * Added mpam_assert_partid_sizes_fixed();
> * Make reset use an all-ones instead of zero config.
> ---
> drivers/resctrl/mpam_devices.c | 269 +++++++++++++++++++++++++++++---
> drivers/resctrl/mpam_internal.h | 29 +++-
> 2 files changed, 271 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index ec1db5f8b05c..7fd149109c75 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -114,6 +114,16 @@ static LLIST_HEAD(mpam_garbage);
> /* When mpam is disabled, the printed reason to aid debugging */
> static char *mpam_disable_reason;
>
> +/*
> + * Once mpam is enabled, new requestors cannot further reduce the available
> + * partid. Assert that the size is fixed, and new requestors will be turned
> + * away.
> + */
> +static void mpam_assert_partid_sizes_fixed(void)
> +{
> + WARN_ON_ONCE(!partid_max_published);
> +}
> +
> static u32 __mpam_read_reg(struct mpam_msc *msc, u16 reg)
> {
> WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> @@ -363,12 +373,16 @@ static void mpam_class_destroy(struct mpam_class *class)
> add_to_garbage(class);
> }
>
> +static void __destroy_component_cfg(struct mpam_component *comp);
> +
> static void mpam_comp_destroy(struct mpam_component *comp)
> {
> struct mpam_class *class = comp->class;
>
> lockdep_assert_held(&mpam_list_lock);
>
> + __destroy_component_cfg(comp);
> +
> list_del_rcu(&comp->class_list);
> add_to_garbage(comp);
>
> @@ -833,50 +847,105 @@ static void mpam_reset_msc_bitmap(struct mpam_msc *msc, u16 reg, u16 wd)
> __mpam_write_reg(msc, reg, bm);
> }
>
> -static void mpam_reset_ris_partid(struct mpam_msc_ris *ris, u16 partid)
> +/* Called via IPI. Call while holding an SRCU reference */
> +static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
> + struct mpam_config *cfg)
> {
> struct mpam_msc *msc = ris->vmsc->msc;
> struct mpam_props *rprops = &ris->props;
>
> - mpam_assert_srcu_read_lock_held();
> -
> mutex_lock(&msc->part_sel_lock);
> __mpam_part_sel(ris->ris_idx, partid, msc);
>
> - if (mpam_has_feature(mpam_feat_cpor_part, rprops))
> - mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM, rprops->cpbm_wd);
> + if (mpam_has_feature(mpam_feat_cpor_part, rprops) &&
> + mpam_has_feature(mpam_feat_cpor_part, cfg)) {
> + if (cfg->reset_cpbm)
> + mpam_reset_msc_bitmap(msc, MPAMCFG_CPBM,
> + rprops->cpbm_wd);
> + else
> + mpam_write_partsel_reg(msc, CPBM, cfg->cpbm);
> + }
>
> - if (mpam_has_feature(mpam_feat_mbw_part, rprops))
> - mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM, rprops->mbw_pbm_bits);
> + if (mpam_has_feature(mpam_feat_mbw_part, rprops) &&
> + mpam_has_feature(mpam_feat_mbw_part, cfg)) {
> + if (cfg->reset_mbw_pbm)
> + mpam_reset_msc_bitmap(msc, MPAMCFG_MBW_PBM,
> + rprops->mbw_pbm_bits);
> + else
> + mpam_write_partsel_reg(msc, MBW_PBM, cfg->mbw_pbm);
> + }
>
> - if (mpam_has_feature(mpam_feat_mbw_min, rprops))
> + if (mpam_has_feature(mpam_feat_mbw_min, rprops) &&
> + mpam_has_feature(mpam_feat_mbw_min, cfg))
> mpam_write_partsel_reg(msc, MBW_MIN, 0);
>
> - if (mpam_has_feature(mpam_feat_mbw_max, rprops))
> - mpam_write_partsel_reg(msc, MBW_MAX, MPAMCFG_MBW_MAX_MAX);
> + if (mpam_has_feature(mpam_feat_mbw_max, rprops) &&
> + mpam_has_feature(mpam_feat_mbw_max, cfg))
> + mpam_write_partsel_reg(msc, MBW_MAX, cfg->mbw_max);
>
> - if (mpam_has_feature(mpam_feat_mbw_prop, rprops))
> + if (mpam_has_feature(mpam_feat_mbw_prop, rprops) &&
> + mpam_has_feature(mpam_feat_mbw_prop, cfg))
> mpam_write_partsel_reg(msc, MBW_PROP, 0);
> mutex_unlock(&msc->part_sel_lock);
> }
>
> +struct reprogram_ris {
> + struct mpam_msc_ris *ris;
> + struct mpam_config *cfg;
> +};
> +
> +/* Call with MSC lock held */
> +static int mpam_reprogram_ris(void *_arg)
> +{
> + u16 partid, partid_max;
> + struct reprogram_ris *arg = _arg;
> + struct mpam_msc_ris *ris = arg->ris;
> + struct mpam_config *cfg = arg->cfg;
> +
> + if (ris->in_reset_state)
> + return 0;
> +
> + spin_lock(&partid_max_lock);
> + partid_max = mpam_partid_max;
> + spin_unlock(&partid_max_lock);
> + for (partid = 0; partid <= partid_max; partid++)
> + mpam_reprogram_ris_partid(ris, partid, cfg);
> +
> + return 0;
> +}
> +
> +static void mpam_init_reset_cfg(struct mpam_config *reset_cfg)
> +{
> + memset(reset_cfg, 0, sizeof(*reset_cfg));
> +
> + reset_cfg->features = ~0;
> + reset_cfg->cpbm = ~0;
> + reset_cfg->mbw_pbm = ~0;
> + reset_cfg->mbw_max = MPAMCFG_MBW_MAX_MAX;
> +
> + reset_cfg->reset_cpbm = true;
> + reset_cfg->reset_mbw_pbm = true;
> +}
> +
> /*
> * Called via smp_call_on_cpu() to prevent migration, while still being
> * pre-emptible.
> */
> static int mpam_reset_ris(void *arg)
> {
> - u16 partid, partid_max;
> + struct mpam_config reset_cfg;
> struct mpam_msc_ris *ris = arg;
> + struct reprogram_ris reprogram_arg;
>
> if (ris->in_reset_state)
> return 0;
>
> - spin_lock(&partid_max_lock);
> - partid_max = mpam_partid_max;
> - spin_unlock(&partid_max_lock);
> - for (partid = 0; partid < partid_max; partid++)
> - mpam_reset_ris_partid(ris, partid);
> + mpam_init_reset_cfg(&reset_cfg);
> +
> + reprogram_arg.ris = ris;
> + reprogram_arg.cfg = &reset_cfg;
> +
> + mpam_reprogram_ris(&reprogram_arg);
>
> return 0;
> }
> @@ -922,6 +991,40 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online)
> }
> }
>
> +static void mpam_reprogram_msc(struct mpam_msc *msc)
> +{
> + u16 partid;
> + bool reset;
> + struct mpam_config *cfg;
> + struct mpam_msc_ris *ris;
> +
> + /*
> + * No lock for mpam_partid_max as partid_max_published has been
> + * set by mpam_enabled(), so the values can no longer change.
> + */
> + mpam_assert_partid_sizes_fixed();
> +
> + guard(srcu)(&mpam_srcu);
mpam_srcu is locked in caller mpam_cpu_online(). It's unnecessary to
call guard(srcu)(&mpam_srcu) here again for simpler logic and less overhead.
> + list_for_each_entry_srcu(ris, &msc->ris, msc_list,
> + srcu_read_lock_held(&mpam_srcu)) {
> + if (!mpam_is_enabled() && !ris->in_reset_state) {
> + mpam_touch_msc(msc, &mpam_reset_ris, ris);
> + ris->in_reset_state = true;
> + continue;
> + }
> +
> + reset = true;
> + for (partid = 0; partid <= mpam_partid_max; partid++) {
> + cfg = &ris->vmsc->comp->cfg[partid];
> + if (cfg->features)
> + reset = false;
> +
> + mpam_reprogram_ris_partid(ris, partid, cfg);
> + }
> + ris->in_reset_state = reset;
> + }
> +}
> +
> static void _enable_percpu_irq(void *_irq)
> {
> int *irq = _irq;
> @@ -944,7 +1047,7 @@ static int mpam_cpu_online(unsigned int cpu)
> _enable_percpu_irq(&msc->reenable_error_ppi);
>
> if (atomic_fetch_inc(&msc->online_refs) == 0)
> - mpam_reset_msc(msc, true);
> + mpam_reprogram_msc(msc);
> }
> srcu_read_unlock(&mpam_srcu, idx);
[SNIP]
Thanks.
-Fenghua
Powered by blists - more mailing lists