[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <487a736c-27c8-427c-97d5-31fd2d97e919@arm.com>
Date: Fri, 12 Sep 2025 15:40:41 +0100
From: Ben Horgan <ben.horgan@....com>
To: James Morse <james.morse@....com>, linux-kernel@...r.kernel.org,
linux-arm-kernel@...ts.infradead.org, linux-acpi@...r.kernel.org
Cc: D Scott Phillips OS <scott@...amperecomputing.com>,
carl@...amperecomputing.com, lcherian@...vell.com,
bobo.shaobowang@...wei.com, tan.shaopeng@...itsu.com,
baolin.wang@...ux.alibaba.com, Jamie Iles <quic_jiles@...cinc.com>,
Xin Hao <xhao@...ux.alibaba.com>, peternewman@...gle.com,
dfustini@...libre.com, amitsinght@...vell.com,
David Hildenbrand <david@...hat.com>, Dave Martin <dave.martin@....com>,
Koba Ko <kobak@...dia.com>, Shanker Donthineni <sdonthineni@...dia.com>,
fenghuay@...dia.com, baisheng.gao@...soc.com,
Jonathan Cameron <jonathan.cameron@...wei.com>, Rob Herring
<robh@...nel.org>, Rohit Mathew <rohit.mathew@....com>,
Rafael Wysocki <rafael@...nel.org>, Len Brown <lenb@...nel.org>,
Lorenzo Pieralisi <lpieralisi@...nel.org>, Hanjun Guo
<guohanjun@...wei.com>, Sudeep Holla <sudeep.holla@....com>,
Catalin Marinas <catalin.marinas@....com>, Will Deacon <will@...nel.org>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Danilo Krummrich <dakr@...nel.org>
Subject: Re: [PATCH v2 18/29] arm_mpam: Register and enable IRQs
Hi James,
On 9/10/25 21:42, James Morse wrote:
> Register and enable error IRQs. All the MPAM error interrupts indicate a
> software bug, e.g. out of range partid. If the error interrupt is ever
> signalled, attempt to disable MPAM.
>
> Only the irq handler accesses the ESR register, so no locking is needed.
> The work to disable MPAM after an error needs to happen at process
> context as it takes mutex. It also unregisters the interrupts, meaning
> it can't be done from the threaded part of a threaded interrupt.
> Instead, mpam_disable() gets scheduled.
>
> Enabling the IRQs in the MSC may involve cross calling to a CPU that
> can access the MSC.
>
> Once the IRQ is requested, the mpam_disable() path can be called
> asynchronously, which will walk structures sized by max_partid. Ensure
> this size is fixed before the interrupt is requested.
>
> CC: Rohit Mathew <rohit.mathew@....com>
> Tested-by: Rohit Mathew <rohit.mathew@....com>
> Signed-off-by: James Morse <james.morse@....com>
> ---
> Changes since v1:
> * Made mpam_unregister_irqs() safe to race with itself.
> * Removed threaded interrupts.
> * Schedule mpam_disable() from cpuhp callback in the case of an error.
> * Added mpam_disable_reason.
> * Use alloc_percpu()
>
> Changes since RFC:
> * Use guard marco when walking srcu list.
> * Use INTEN macro for enabling interrupts.
> * Move partid_max_published up earlier in mpam_enable_once().
> ---
> drivers/resctrl/mpam_devices.c | 277 +++++++++++++++++++++++++++++++-
> drivers/resctrl/mpam_internal.h | 10 ++
> 2 files changed, 284 insertions(+), 3 deletions(-)
>
>
> +static int __setup_ppi(struct mpam_msc *msc)
> +{
> + int cpu;
> + struct device *dev = &msc->pdev->dev;
> +
> + msc->error_dev_id = alloc_percpu(struct mpam_msc *);
> + if (!msc->error_dev_id)
> + return -ENOMEM;
> +
> + for_each_cpu(cpu, &msc->accessibility) {
> + struct mpam_msc *empty = *per_cpu_ptr(msc->error_dev_id, cpu);
> +
> + if (empty) {
I'm confused about how this if conditioned can be satisfied. Isn't the
alloc clearing msc->error_dev_id for each cpu and then it's only getting
set for each cpu later in the iteration.
> + dev_err_once(dev, "MSC shares PPI with %s!\n",
> + dev_name(&empty->pdev->dev));
> + return -EBUSY;
> + }
> + *per_cpu_ptr(msc->error_dev_id, cpu) = msc;
> + }
> +
> + return 0;
> +}
> +
> +static int mpam_msc_setup_error_irq(struct mpam_msc *msc)
> +{
> + int irq;
> +
> + irq = platform_get_irq_byname_optional(msc->pdev, "error");
> + if (irq <= 0)
> + return 0;
> +
> + /* Allocate and initialise the percpu device pointer for PPI */
> + if (irq_is_percpu(irq))
> + return __setup_ppi(msc);
> +
> + /* sanity check: shared interrupts can be routed anywhere? */
> + if (!cpumask_equal(&msc->accessibility, cpu_possible_mask)) {
> + pr_err_once("msc:%u is a private resource with a shared error interrupt",
> + msc->id);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> /*
> * An MSC can control traffic from a set of CPUs, but may only be accessible
> * from a (hopefully wider) set of CPUs. The common reason for this is power
> @@ -1060,6 +1143,10 @@ static int mpam_msc_drv_probe(struct platform_device *pdev)
> break;
> }
>
> + err = mpam_msc_setup_error_irq(msc);
> + if (err)
> + break;
> +
> if (device_property_read_u32(&pdev->dev, "pcc-channel",
> &msc->pcc_subspace_id))
> msc->iface = MPAM_IFACE_MMIO;
> @@ -1318,11 +1405,172 @@ static void mpam_enable_merge_features(struct list_head *all_classes_list)
> }
> }
>
> +static char *mpam_errcode_names[16] = {
> + [0] = "No error",
> + [1] = "PARTID_SEL_Range",
> + [2] = "Req_PARTID_Range",
> + [3] = "MSMONCFG_ID_RANGE",
> + [4] = "Req_PMG_Range",
> + [5] = "Monitor_Range",
> + [6] = "intPARTID_Range",
> + [7] = "Unexpected_INTERNAL",
> + [8] = "Undefined_RIS_PART_SEL",
> + [9] = "RIS_No_Control",
> + [10] = "Undefined_RIS_MON_SEL",
> + [11] = "RIS_No_Monitor",
> + [12 ... 15] = "Reserved"
> +};
> +
> +static int mpam_enable_msc_ecr(void *_msc)
> +{
> + struct mpam_msc *msc = _msc;
> +
> + __mpam_write_reg(msc, MPAMF_ECR, MPAMF_ECR_INTEN);
> +
> + return 0;
> +}
> +
> +/* This can run in mpam_disable(), and the interrupt handler on the same CPU */
> +static int mpam_disable_msc_ecr(void *_msc)
> +{
> + struct mpam_msc *msc = _msc;
> +
> + __mpam_write_reg(msc, MPAMF_ECR, 0);
> +
> + return 0;
> +}
> +
> +static irqreturn_t __mpam_irq_handler(int irq, struct mpam_msc *msc)
> +{
> + u64 reg;
> + u16 partid;
> + u8 errcode, pmg, ris;
> +
> + if (WARN_ON_ONCE(!msc) ||
> + WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
> + &msc->accessibility)))
> + return IRQ_NONE;
> +
> + reg = mpam_msc_read_esr(msc);
> +
> + errcode = FIELD_GET(MPAMF_ESR_ERRCODE, reg);
> + if (!errcode)
> + return IRQ_NONE;
> +
> + /* Clear level triggered irq */
> + mpam_msc_zero_esr(msc);
> +
> + partid = FIELD_GET(MPAMF_ESR_PARTID_MON, reg);
> + pmg = FIELD_GET(MPAMF_ESR_PMG, reg);
> + ris = FIELD_GET(MPAMF_ESR_RIS, reg);
> +
> + pr_err_ratelimited("error irq from msc:%u '%s', partid:%u, pmg: %u, ris: %u\n",
> + msc->id, mpam_errcode_names[errcode], partid, pmg,
> + ris);
> +
> + /* Disable this interrupt. */
> + mpam_disable_msc_ecr(msc);
> +
> + /*
> + * Schedule the teardown work. Don't use a threaded IRQ as we can't
> + * unregister the interrupt from the threaded part of the handler.
> + */
> + mpam_disable_reason = "hardware error interrupt";
> + schedule_work(&mpam_broken_work);
> +
> + return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t mpam_ppi_handler(int irq, void *dev_id)
> +{
> + struct mpam_msc *msc = *(struct mpam_msc **)dev_id;
> +
> + return __mpam_irq_handler(irq, msc);
> +}
> +
> +static irqreturn_t mpam_spi_handler(int irq, void *dev_id)
> +{
> + struct mpam_msc *msc = dev_id;
> +
> + return __mpam_irq_handler(irq, msc);
> +}
> +
> +static int mpam_register_irqs(void)
> +{
> + int err, irq;
> + struct mpam_msc *msc;
> +
> + lockdep_assert_cpus_held();
> +
> + guard(srcu)(&mpam_srcu);
> + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
> + srcu_read_lock_held(&mpam_srcu)) {
> + irq = platform_get_irq_byname_optional(msc->pdev, "error");
> + if (irq <= 0)
> + continue;
> +
> + /* The MPAM spec says the interrupt can be SPI, PPI or LPI */
> + /* We anticipate sharing the interrupt with other MSCs */
> + if (irq_is_percpu(irq)) {
> + err = request_percpu_irq(irq, &mpam_ppi_handler,
> + "mpam:msc:error",
> + msc->error_dev_id);
> + if (err)
> + return err;
> +
> + msc->reenable_error_ppi = irq;
> + smp_call_function_many(&msc->accessibility,
> + &_enable_percpu_irq, &irq,
> + true);
> + } else {
> + err = devm_request_irq(&msc->pdev->dev,irq,
> + &mpam_spi_handler, IRQF_SHARED,
> + "mpam:msc:error", msc);
> + if (err)
> + return err;
> + }
> +
> + set_bit(MPAM_ERROR_IRQ_REQUESTED, &msc->error_irq_flags);
> + mpam_touch_msc(msc, mpam_enable_msc_ecr, msc);
> + set_bit(MPAM_ERROR_IRQ_HW_ENABLED, &msc->error_irq_flags);
> + }
> +
> + return 0;
> +}
> +
> +static void mpam_unregister_irqs(void)
> +{
> + int irq, idx;
> + struct mpam_msc *msc;
> +
> + cpus_read_lock();
> + /* take the lock as free_irq() can sleep */
> + idx = srcu_read_lock(&mpam_srcu);
> + list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
> + srcu_read_lock_held(&mpam_srcu)) {
> + irq = platform_get_irq_byname_optional(msc->pdev, "error");
> + if (irq <= 0)
> + continue;
> +
> + if (test_and_clear_bit(MPAM_ERROR_IRQ_HW_ENABLED, &msc->error_irq_flags))
> + mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
> +
> + if (test_and_clear_bit(MPAM_ERROR_IRQ_REQUESTED, &msc->error_irq_flags)) {
> + if (irq_is_percpu(irq)) {
> + msc->reenable_error_ppi = 0;
> + free_percpu_irq(irq, msc->error_dev_id);
> + } else {
> + devm_free_irq(&msc->pdev->dev, irq, msc);
> + }
> + }
> + }
> + srcu_read_unlock(&mpam_srcu, idx);
> + cpus_read_unlock();
> +}
> +
> static void mpam_enable_once(void)
> {
> - mutex_lock(&mpam_list_lock);
> - mpam_enable_merge_features(&mpam_classes);
> - mutex_unlock(&mpam_list_lock);
> + int err;
>
> /*
> * Once the cpuhp callbacks have been changed, mpam_partid_max can no
> @@ -1332,6 +1580,27 @@ static void mpam_enable_once(void)
> partid_max_published = true;
> spin_unlock(&partid_max_lock);
>
> + /*
> + * If all the MSC have been probed, enabling the IRQs happens next.
> + * That involves cross-calling to a CPU that can reach the MSC, and
> + * the locks must be taken in this order:
> + */
> + cpus_read_lock();
> + mutex_lock(&mpam_list_lock);
> + mpam_enable_merge_features(&mpam_classes);
> +
> + err = mpam_register_irqs();
> + if (err)
> + pr_warn("Failed to register irqs: %d\n", err);
> +
> + mutex_unlock(&mpam_list_lock);
> + cpus_read_unlock();
> +
> + if (err) {
> + schedule_work(&mpam_broken_work);
> + return;
> + }
> +
> mpam_register_cpuhp_callbacks(mpam_cpu_online, mpam_cpu_offline);
>
> printk(KERN_INFO "MPAM enabled with %u PARTIDs and %u PMGs\n",
> @@ -1397,6 +1666,8 @@ void mpam_disable(struct work_struct *ignored)
> }
> mutex_unlock(&mpam_cpuhp_state_lock);
>
> + mpam_unregister_irqs();
> +
> idx = srcu_read_lock(&mpam_srcu);
> list_for_each_entry_srcu(class, &mpam_classes, classes_list,
> srcu_read_lock_held(&mpam_srcu))
> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
> index 6e047fbd3512..f04a9ef189cf 100644
> --- a/drivers/resctrl/mpam_internal.h
> +++ b/drivers/resctrl/mpam_internal.h
> @@ -32,6 +32,10 @@ struct mpam_garbage {
> struct platform_device *pdev;
> };
>
> +/* Bit positions for error_irq_flags */
> +#define MPAM_ERROR_IRQ_REQUESTED 0
> +#define MPAM_ERROR_IRQ_HW_ENABLED 1
> +
> struct mpam_msc {
> /* member of mpam_all_msc */
> struct list_head all_msc_list;
> @@ -46,6 +50,11 @@ struct mpam_msc {
> struct pcc_mbox_chan *pcc_chan;
> u32 nrdy_usec;
> cpumask_t accessibility;
> + bool has_extd_esr;
> +
> + int reenable_error_ppi;
> + struct mpam_msc * __percpu *error_dev_id;
> +
> atomic_t online_refs;
>
> /*
> @@ -54,6 +63,7 @@ struct mpam_msc {
> */
> struct mutex probe_lock;
> bool probed;
> + unsigned long error_irq_flags;
> u16 partid_max;
> u8 pmg_max;
> unsigned long ris_idxs;
Thanks,
Ben
Powered by blists - more mailing lists