[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <74bcafc2-9d36-06d0-5ed4-66694356588d@linux.intel.com>
Date: Mon, 1 Dec 2025 11:21:49 +0200 (EET)
From: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>
To: Guanghui Feng <guanghuifeng@...ux.alibaba.com>
cc: bhelgaas@...gle.com, linux-pci@...r.kernel.org,
LKML <linux-kernel@...r.kernel.org>, kanie@...ux.alibaba.com,
alikernel-developer@...ux.alibaba.com
Subject: Re: [PATCH v2] PCI: Fix PCIe SBR dev/link wait error
On Sun, 30 Nov 2025, Guanghui Feng wrote:
> When executing a PCIe secondary bus reset, all downstream switches and
> endpoints will generate reset events. Simultaneously, all PCIe links
> will undergo retraining, and each link will independently re-execute the
> LTSSM state machine training. Therefore, after executing the SBR, it is
> necessary to wait for all downstream links and devices to complete
> recovery. Otherwise, after the SBR returns, accessing devices with some
> links or endpoints not yet fully recovered may result in driver errors,
> or even trigger device offline issues.
>
> Signed-off-by: Guanghui Feng <guanghuifeng@...ux.alibaba.com>
> Reviewed-by: Guixin Liu <kanie@...ux.alibaba.com>
> ---
Hi,
In future, when posting an update, please always explain here
below the --- line what was changed between the versions.
> drivers/pci/pci.c | 138 ++++++++++++++++++++++++++++++++--------------
> 1 file changed, 97 insertions(+), 41 deletions(-)
>
> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
> index b14dd064006c..76afecb11164 100644
> --- a/drivers/pci/pci.c
> +++ b/drivers/pci/pci.c
> @@ -4788,6 +4788,63 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
> return max(min_delay, max_delay);
> }
>
> +static int pci_readiness_check(struct pci_dev *pdev, struct pci_dev *child,
> + unsigned long start_t, char *reset_type)
> +{
> + int elapsed = jiffies_to_msecs(jiffies - start_t);
> +
> + if (pci_dev_is_disconnected(pdev) || pci_dev_is_disconnected(child))
> + return 0;
> +
> + if (pcie_get_speed_cap(pdev) <= PCIE_SPEED_5_0GT) {
> + u16 status;
> +
> + pci_dbg(pdev, "waiting %d ms for downstream link\n", elapsed);
> +
> + if (!pci_dev_wait(child, reset_type, 0))
> + return 0;
> +
> + if (PCI_RESET_WAIT > elapsed)
> + return PCI_RESET_WAIT - elapsed;
> +
> + /*
> + * If the port supports active link reporting we now check
> + * whether the link is active and if not bail out early with
> + * the assumption that the device is not present anymore.
> + */
> + if (!pdev->link_active_reporting)
> + return -ENOTTY;
> +
> + pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &status);
> + if (!(status & PCI_EXP_LNKSTA_DLLLA))
> + return -ENOTTY;
> +
> + if (!pci_dev_wait(child, reset_type, 0))
> + return 0;
> +
> + if (PCIE_RESET_READY_POLL_MS > elapsed)
> + return PCIE_RESET_READY_POLL_MS - elapsed;
> +
> + return -ENOTTY;
> + }
> +
> + pci_dbg(pdev, "waiting %d ms for downstream link, after activation\n",
> + elapsed);
> + if (!pcie_wait_for_link_delay(pdev, true, 0)) {
> + /* Did not train, no need to wait any further */
> + pci_info(pdev, "Data Link Layer Link Active not set in %d msec\n", elapsed);
> + return -ENOTTY;
> + }
> +
> + if (!pci_dev_wait(child, reset_type, 0))
> + return 0;
> +
> + if (PCIE_RESET_READY_POLL_MS > elapsed)
> + return PCIE_RESET_READY_POLL_MS - elapsed;
> +
> + return -ENOTTY;
> +}
> +
> /**
> * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
> * @dev: PCI bridge
> @@ -4802,12 +4859,14 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
> * 4.3.2.
> *
> * Return 0 on success or -ENOTTY if the first device on the secondary bus
> - * failed to become accessible.
> + * failed to become accessible or a value greater than 0 indicates the
> + * left required waiting time..
> */
> -int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> +static int __pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, unsigned long start_t,
> + char *reset_type)
> {
> - struct pci_dev *child __free(pci_dev_put) = NULL;
> - int delay;
> + struct pci_dev *child;
> + int delay, ret, elapsed = jiffies_to_msecs(jiffies - start_t);
>
> if (pci_dev_is_disconnected(dev))
> return 0;
> @@ -4835,8 +4894,6 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> return 0;
> }
>
> - child = pci_dev_get(list_first_entry(&dev->subordinate->devices,
> - struct pci_dev, bus_list));
> up_read(&pci_bus_sem);
>
> /*
> @@ -4844,8 +4901,10 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> * accessing the device after reset (that is 1000 ms + 100 ms).
> */
> if (!pci_is_pcie(dev)) {
> - pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
> - msleep(1000 + delay);
> + if (1000 + delay > elapsed)
> + return 1000 + delay - elapsed;
> +
> + pci_dbg(dev, "waiting %d ms for secondary bus\n", elapsed);
> return 0;
> }
>
> @@ -4867,41 +4926,40 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> if (!pcie_downstream_port(dev))
> return 0;
>
> - if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
> - u16 status;
> -
> - pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
> - msleep(delay);
> -
> - if (!pci_dev_wait(child, reset_type, PCI_RESET_WAIT - delay))
> - return 0;
> + if (delay > elapsed)
> + return delay - elapsed;
>
> + down_read(&pci_bus_sem);
> + list_for_each_entry(child, &dev->subordinate->devices, bus_list) {
> /*
> - * If the port supports active link reporting we now check
> - * whether the link is active and if not bail out early with
> - * the assumption that the device is not present anymore.
> + * Check if all devices under the same bus have completed
> + * the reset process, including multifunction devices in
> + * the same bus.
> */
> - if (!dev->link_active_reporting)
> - return -ENOTTY;
> + ret = pci_readiness_check(dev, child, start_t, reset_type);
>
> - pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &status);
> - if (!(status & PCI_EXP_LNKSTA_DLLLA))
> - return -ENOTTY;
> + if (ret == 0 && child->subordinate)
> + ret = __pci_bridge_wait_for_secondary_bus(child, start_t, reset_type);
>
> - return pci_dev_wait(child, reset_type,
> - PCIE_RESET_READY_POLL_MS - PCI_RESET_WAIT);
> + if(ret)
> + break;
> }
> + up_read(&pci_bus_sem);
>
> - pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
> - delay);
> - if (!pcie_wait_for_link_delay(dev, true, delay)) {
> - /* Did not train, no need to wait any further */
> - pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay);
> - return -ENOTTY;
> - }
> + return ret;
> +}
>
> - return pci_dev_wait(child, reset_type,
> - PCIE_RESET_READY_POLL_MS - delay);
> +int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
> +{
> + int res = 0;
> + unsigned long start_t = jiffies;
> +
> + do {
> + msleep(res);
> + res = __pci_bridge_wait_for_secondary_bus(dev, start_t, reset_type);
> + } while (res > 0);
> +
> + return res;
> }
>
> void pci_reset_secondary_bus(struct pci_dev *dev)
> @@ -5542,10 +5600,8 @@ static void pci_bus_restore_locked(struct pci_bus *bus)
>
> list_for_each_entry(dev, &bus->devices, bus_list) {
> pci_dev_restore(dev);
> - if (dev->subordinate) {
> - pci_bridge_wait_for_secondary_bus(dev, "bus reset");
> + if (dev->subordinate)
> pci_bus_restore_locked(dev->subordinate);
> - }
> }
> }
???
Unfortunately, this takes a wrong turn and is very much against the
feedback I gave to you.
>
> @@ -5575,14 +5631,14 @@ static void pci_slot_restore_locked(struct pci_slot *slot)
> {
> struct pci_dev *dev;
>
> + pci_bridge_wait_for_secondary_bus(slot->bus->self, "slot reset");
> +
> list_for_each_entry(dev, &slot->bus->devices, bus_list) {
> if (!dev->slot || dev->slot != slot)
> continue;
> pci_dev_restore(dev);
> - if (dev->subordinate) {
> - pci_bridge_wait_for_secondary_bus(dev, "slot reset");
> + if (dev->subordinate)
> pci_bus_restore_locked(dev->subordinate);
> - }
--
i.
Powered by blists - more mailing lists