lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250912131219.00000938@huawei.com>
Date: Fri, 12 Sep 2025 13:12:19 +0100
From: Jonathan Cameron <jonathan.cameron@...wei.com>
To: James Morse <james.morse@....com>
CC: <linux-kernel@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
	<linux-acpi@...r.kernel.org>, D Scott Phillips OS
	<scott@...amperecomputing.com>, <carl@...amperecomputing.com>,
	<lcherian@...vell.com>, <bobo.shaobowang@...wei.com>,
	<tan.shaopeng@...itsu.com>, <baolin.wang@...ux.alibaba.com>, Jamie Iles
	<quic_jiles@...cinc.com>, Xin Hao <xhao@...ux.alibaba.com>,
	<peternewman@...gle.com>, <dfustini@...libre.com>, <amitsinght@...vell.com>,
	David Hildenbrand <david@...hat.com>, Dave Martin <dave.martin@....com>, Koba
 Ko <kobak@...dia.com>, Shanker Donthineni <sdonthineni@...dia.com>,
	<fenghuay@...dia.com>, <baisheng.gao@...soc.com>, Rob Herring
	<robh@...nel.org>, Rohit Mathew <rohit.mathew@....com>, "Rafael Wysocki"
	<rafael@...nel.org>, Len Brown <lenb@...nel.org>, Lorenzo Pieralisi
	<lpieralisi@...nel.org>, Hanjun Guo <guohanjun@...wei.com>, Sudeep Holla
	<sudeep.holla@....com>, Catalin Marinas <catalin.marinas@....com>, "Will
 Deacon" <will@...nel.org>, Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Danilo Krummrich <dakr@...nel.org>
Subject: Re: [PATCH v2 18/29] arm_mpam: Register and enable IRQs

On Wed, 10 Sep 2025 20:42:58 +0000
James Morse <james.morse@....com> wrote:

> Register and enable error IRQs. All the MPAM error interrupts indicate a
> software bug, e.g. out of range partid. If the error interrupt is ever
> signalled, attempt to disable MPAM.
> 
> Only the irq handler accesses the ESR register, so no locking is needed.
> The work to disable MPAM after an error needs to happen at process
> context as it takes mutex. It also unregisters the interrupts, meaning
> it can't be done from the threaded part of a threaded interrupt.
> Instead, mpam_disable() gets scheduled.
> 
> Enabling the IRQs in the MSC may involve cross calling to a CPU that
> can access the MSC.
> 
> Once the IRQ is requested, the mpam_disable() path can be called
> asynchronously, which will walk structures sized by max_partid. Ensure
> this size is fixed before the interrupt is requested.
> 
> CC: Rohit Mathew <rohit.mathew@....com>
> Tested-by: Rohit Mathew <rohit.mathew@....com>
> Signed-off-by: James Morse <james.morse@....com>
A few comments inline.


> @@ -1318,11 +1405,172 @@ static void mpam_enable_merge_features(struct list_head *all_classes_list)
>  	}
>  }
>  
> +static char *mpam_errcode_names[16] = {
> +	[0] = "No error",

I think you had a bunch of defines for these in an earlier patch.  Can we use
that to index here instead of [0] etc. 

> +	[1] = "PARTID_SEL_Range",
> +	[2] = "Req_PARTID_Range",
> +	[3] = "MSMONCFG_ID_RANGE",
> +	[4] = "Req_PMG_Range",
> +	[5] = "Monitor_Range",
> +	[6] = "intPARTID_Range",
> +	[7] = "Unexpected_INTERNAL",
> +	[8] = "Undefined_RIS_PART_SEL",
> +	[9] = "RIS_No_Control",
> +	[10] = "Undefined_RIS_MON_SEL",
> +	[11] = "RIS_No_Monitor",
> +	[12 ... 15] = "Reserved"
> +};


> +static void mpam_unregister_irqs(void)
> +{
> +	int irq, idx;
> +	struct mpam_msc *msc;
> +
> +	cpus_read_lock();

	guard(cpus_read_lock)();
	guard(srcu)(&mpam_srcu);

> +	/* take the lock as free_irq() can sleep */
> +	idx = srcu_read_lock(&mpam_srcu);
> +	list_for_each_entry_srcu(msc, &mpam_all_msc, all_msc_list,
> +				 srcu_read_lock_held(&mpam_srcu)) {
> +		irq = platform_get_irq_byname_optional(msc->pdev, "error");
> +		if (irq <= 0)
> +			continue;
> +
> +		if (test_and_clear_bit(MPAM_ERROR_IRQ_HW_ENABLED, &msc->error_irq_flags))
> +			mpam_touch_msc(msc, mpam_disable_msc_ecr, msc);
> +
> +		if (test_and_clear_bit(MPAM_ERROR_IRQ_REQUESTED, &msc->error_irq_flags)) {
> +			if (irq_is_percpu(irq)) {
> +				msc->reenable_error_ppi = 0;
> +				free_percpu_irq(irq, msc->error_dev_id);
> +			} else {
> +				devm_free_irq(&msc->pdev->dev, irq, msc);
> +			}
> +		}
> +	}
> +	srcu_read_unlock(&mpam_srcu, idx);
> +	cpus_read_unlock();
> +}
> +
>  static void mpam_enable_once(void)
>  {
> -	mutex_lock(&mpam_list_lock);
> -	mpam_enable_merge_features(&mpam_classes);
> -	mutex_unlock(&mpam_list_lock);
> +	int err;
>  
>  	/*
>  	 * Once the cpuhp callbacks have been changed, mpam_partid_max can no
> @@ -1332,6 +1580,27 @@ static void mpam_enable_once(void)
>  	partid_max_published = true;
>  	spin_unlock(&partid_max_lock);
>  
> +	/*
> +	 * If all the MSC have been probed, enabling the IRQs happens next.
> +	 * That involves cross-calling to a CPU that can reach the MSC, and
> +	 * the locks must be taken in this order:
> +	 */
> +	cpus_read_lock();
> +	mutex_lock(&mpam_list_lock);
> +	mpam_enable_merge_features(&mpam_classes);
> +
> +	err = mpam_register_irqs();
> +	if (err)
> +		pr_warn("Failed to register irqs: %d\n", err);

Perhaps move the print into the if (err) below?

> +
> +	mutex_unlock(&mpam_list_lock);
> +	cpus_read_unlock();
> +
> +	if (err) {
> +		schedule_work(&mpam_broken_work);
> +		return;
> +	}

> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
> index 6e047fbd3512..f04a9ef189cf 100644
> --- a/drivers/resctrl/mpam_internal.h
> +++ b/drivers/resctrl/mpam_internal.h
> @@ -32,6 +32,10 @@ struct mpam_garbage {
>  	struct platform_device	*pdev;
>  };
>  
> +/* Bit positions for error_irq_flags */
> +#define	MPAM_ERROR_IRQ_REQUESTED  0
> +#define	MPAM_ERROR_IRQ_HW_ENABLED 1

If there aren't going to be load more of these (I've not really thought
about whether there might) then using a bitmap for these seems to add complexity
that we wouldn't see with 
bool error_irq_req;
bool error_irq_hw_enabled;


> +
>  struct mpam_msc {
>  	/* member of mpam_all_msc */
>  	struct list_head        all_msc_list;
> @@ -46,6 +50,11 @@ struct mpam_msc {
>  	struct pcc_mbox_chan	*pcc_chan;
>  	u32			nrdy_usec;
>  	cpumask_t		accessibility;
> +	bool			has_extd_esr;
> +
> +	int				reenable_error_ppi;
> +	struct mpam_msc * __percpu	*error_dev_id;
> +
>  	atomic_t		online_refs;
>  
>  	/*
> @@ -54,6 +63,7 @@ struct mpam_msc {
>  	 */
>  	struct mutex		probe_lock;
>  	bool			probed;
> +	unsigned long		error_irq_flags;
>  	u16			partid_max;
>  	u8			pmg_max;
>  	unsigned long		ris_idxs;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ