lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ed63e434-6c0b-4111-28a4-2a4a2c2f3725@linux.intel.com>
Date: Thu, 13 Mar 2025 16:26:47 +0200 (EET)
From: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>
To: Lukas Wunner <lukas@...ner.de>, Bjorn Helgaas <bhelgaas@...gle.com>
cc: Jonathan Cameron <Jonathan.Cameron@...wei.com>, linux-pci@...r.kernel.org, 
    LKML <linux-kernel@...r.kernel.org>, 
    Joel Mathew Thomas <proxy0@...amail.com>, stable@...r.kernel.org
Subject: Re: [PATCH 1/1] PCI/bwctrl: Disable PCIe BW controller during
 reset

On Mon, 17 Feb 2025, Ilpo Järvinen wrote:

> PCIe BW controller enables BW notifications for Downstream Ports by
> setting Link Bandwidth Management Interrupt Enable (LBMIE) and Link
> Autonomous Bandwidth Interrupt Enable (LABIE) (PCIe Spec. r6.2 sec.
> 7.5.3.7).
> 
> It was discovered that performing a reset can lead to the device
> underneath the Downstream Port becoming unavailable if BW notifications
> are left enabled throughout the reset sequence (at least LBMIE was
> found to cause an issue).
> 
> While the PCIe Specifications do not indicate BW notifications could not
> be kept enabled during resets, the PCIe Link state during an
> intentional reset is not of large interest. Thus, disable BW controller
> for the bridge while reset is performed and re-enable it after the
> reset has completed to workaround the problems some devices encounter
> if BW notifications are left on throughout the reset sequence.
> 
> Keep a counter for the disable/enable because MFD will execute
> pci_dev_save_and_disable() and pci_dev_restore() back to back for
> sibling devices:
> 
> [   50.139010] vfio-pci 0000:01:00.0: resetting
> [   50.139053] vfio-pci 0000:01:00.1: resetting
> [   50.141126] pcieport 0000:00:01.1: PME: Spurious native interrupt!
> [   50.141133] pcieport 0000:00:01.1: PME: Spurious native interrupt!
> [   50.441466] vfio-pci 0000:01:00.0: reset done
> [   50.501534] vfio-pci 0000:01:00.1: reset done
> 
> Fixes: 665745f27487 ("PCI/bwctrl: Re-add BW notification portdrv as PCIe BW controller")
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219765
> Tested-by: Joel Mathew Thomas <proxy0@...amail.com>
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>
> Cc: stable@...r.kernel.org
> ---
> 
> I suspect the root cause is some kind of violation of specifications.
> Resets shouldn't cause devices to become unavailable just because BW
> notifications have been enabled.
> 
> Before somebody comments on those dual rwsems, I do have yet to be
> submitted patch to simplify the locking as per Lukas Wunner's earlier
> suggestion. I've just focused on solving the regressions first.

Hi all,

This problem was root caused to a problem in hotplug code so this patch 
should not be applied. I've just sent a series to address the 
synchronization problems the hotplug code and it should be considered 
instead to fix the issues this patch attempted to workaround.

-- 
 i.

>  drivers/pci/pci.c         |  8 +++++++
>  drivers/pci/pci.h         |  4 ++++
>  drivers/pci/pcie/bwctrl.c | 49 ++++++++++++++++++++++++++++++++-------
>  3 files changed, 53 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index 869d204a70a3..7a53d7474175 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -5148,6 +5148,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
>  {
>  	const struct pci_error_handlers *err_handler =
>  			dev->driver ? dev->driver->err_handler : NULL;
> +	struct pci_dev *bridge = pci_upstream_bridge(dev);
>  
>  	/*
>  	 * dev->driver->err_handler->reset_prepare() is protected against
> @@ -5166,6 +5167,9 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
>  	 */
>  	pci_set_power_state(dev, PCI_D0);
>  
> +	if (bridge)
> +		pcie_bwnotif_disable(bridge);
> +
>  	pci_save_state(dev);
>  	/*
>  	 * Disable the device by clearing the Command register, except for
> @@ -5181,9 +5185,13 @@ static void pci_dev_restore(struct pci_dev *dev)
>  {
>  	const struct pci_error_handlers *err_handler =
>  			dev->driver ? dev->driver->err_handler : NULL;
> +	struct pci_dev *bridge = pci_upstream_bridge(dev);
>  
>  	pci_restore_state(dev);
>  
> +	if (bridge)
> +		pcie_bwnotif_enable(bridge);
> +
>  	/*
>  	 * dev->driver->err_handler->reset_done() is protected against
>  	 * races with ->remove() by the device lock, which must be held by
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index 01e51db8d285..856546f1aad9 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -759,12 +759,16 @@ static inline void pcie_ecrc_get_policy(char *str) { }
>  #ifdef CONFIG_PCIEPORTBUS
>  void pcie_reset_lbms_count(struct pci_dev *port);
>  int pcie_lbms_count(struct pci_dev *port, unsigned long *val);
> +void pcie_bwnotif_enable(struct pci_dev *port);
> +void pcie_bwnotif_disable(struct pci_dev *port);
>  #else
>  static inline void pcie_reset_lbms_count(struct pci_dev *port) {}
>  static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
>  {
>  	return -EOPNOTSUPP;
>  }
> +static inline void pcie_bwnotif_enable(struct pci_dev *port) {}
> +static inline void pcie_bwnotif_disable(struct pci_dev *port) {}
>  #endif
>  
>  struct pci_dev_reset_methods {
> diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c
> index 0a5e7efbce2c..a117f6f67c07 100644
> --- a/drivers/pci/pcie/bwctrl.c
> +++ b/drivers/pci/pcie/bwctrl.c
> @@ -40,11 +40,13 @@
>   * @set_speed_mutex:	Serializes link speed changes
>   * @lbms_count:		Count for LBMS (since last reset)
>   * @cdev:		Thermal cooling device associated with the port
> + * @disable_count:	BW notifications disabled/enabled counter
>   */
>  struct pcie_bwctrl_data {
>  	struct mutex set_speed_mutex;
>  	atomic_t lbms_count;
>  	struct thermal_cooling_device *cdev;
> +	int disable_count;
>  };
>  
>  /*
> @@ -200,10 +202,9 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
>  	return ret;
>  }
>  
> -static void pcie_bwnotif_enable(struct pcie_device *srv)
> +static void __pcie_bwnotif_enable(struct pci_dev *port)
>  {
> -	struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
> -	struct pci_dev *port = srv->port;
> +	struct pcie_bwctrl_data *data = port->link_bwctrl;
>  	u16 link_status;
>  	int ret;
>  
> @@ -224,12 +225,44 @@ static void pcie_bwnotif_enable(struct pcie_device *srv)
>  	pcie_update_link_speed(port->subordinate);
>  }
>  
> -static void pcie_bwnotif_disable(struct pci_dev *port)
> +void pcie_bwnotif_enable(struct pci_dev *port)
> +{
> +	guard(rwsem_read)(&pcie_bwctrl_setspeed_rwsem);
> +	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
> +
> +	if (!port->link_bwctrl)
> +		return;
> +
> +	port->link_bwctrl->disable_count--;
> +	if (!port->link_bwctrl->disable_count) {
> +		__pcie_bwnotif_enable(port);
> +		pci_dbg(port, "BW notifications enabled\n");
> +	}
> +	WARN_ON_ONCE(port->link_bwctrl->disable_count < 0);
> +}
> +
> +static void __pcie_bwnotif_disable(struct pci_dev *port)
>  {
>  	pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
>  				   PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
>  }
>  
> +void pcie_bwnotif_disable(struct pci_dev *port)
> +{
> +	guard(rwsem_read)(&pcie_bwctrl_setspeed_rwsem);
> +	guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
> +
> +	if (!port->link_bwctrl)
> +		return;
> +
> +	port->link_bwctrl->disable_count++;
> +
> +	if (port->link_bwctrl->disable_count == 1) {
> +		__pcie_bwnotif_disable(port);
> +		pci_dbg(port, "BW notifications disabled\n");
> +	}
> +}
> +
>  static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
>  {
>  	struct pcie_device *srv = context;
> @@ -314,7 +347,7 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)
>  				return ret;
>  			}
>  
> -			pcie_bwnotif_enable(srv);
> +			__pcie_bwnotif_enable(port);
>  		}
>  	}
>  
> @@ -336,7 +369,7 @@ static void pcie_bwnotif_remove(struct pcie_device *srv)
>  
>  	scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
>  		scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
> -			pcie_bwnotif_disable(srv->port);
> +			__pcie_bwnotif_disable(srv->port);
>  
>  			free_irq(srv->irq, srv);
>  
> @@ -347,13 +380,13 @@ static void pcie_bwnotif_remove(struct pcie_device *srv)
>  
>  static int pcie_bwnotif_suspend(struct pcie_device *srv)
>  {
> -	pcie_bwnotif_disable(srv->port);
> +	__pcie_bwnotif_disable(srv->port);
>  	return 0;
>  }
>  
>  static int pcie_bwnotif_resume(struct pcie_device *srv)
>  {
> -	pcie_bwnotif_enable(srv);
> +	__pcie_bwnotif_enable(srv->port);
>  	return 0;
>  }
>  
> 
> base-commit: 2014c95afecee3e76ca4a56956a936e23283f05b
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ