lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8ad1dec8-79eb-4189-aab0-cd62a1ed0c37@linux.alibaba.com>
Date: Mon, 1 Dec 2025 20:21:51 +0800
From: "guanghuifeng@...ux.alibaba.com" <guanghuifeng@...ux.alibaba.com>
To: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>
Cc: bhelgaas@...gle.com, linux-pci@...r.kernel.org,
 LKML <linux-kernel@...r.kernel.org>, kanie@...ux.alibaba.com,
 alikernel-developer@...ux.alibaba.com
Subject: Re: [PATCH v2] PCI: Fix PCIe SBR dev/link wait error


在 2025/12/1 17:21, Ilpo Järvinen 写道:
> On Sun, 30 Nov 2025, Guanghui Feng wrote:
>
>> When executing a PCIe secondary bus reset, all downstream switches and
>> endpoints will generate reset events. Simultaneously, all PCIe links
>> will undergo retraining, and each link will independently re-execute the
>> LTSSM state machine training. Therefore, after executing the SBR, it is
>> necessary to wait for all downstream links and devices to complete
>> recovery. Otherwise, after the SBR returns, accessing devices with some
>> links or endpoints not yet fully recovered may result in driver errors,
>> or even trigger device offline issues.
>>
>> Signed-off-by: Guanghui Feng <guanghuifeng@...ux.alibaba.com>
>> Reviewed-by: Guixin Liu <kanie@...ux.alibaba.com>
>> ---
> Hi,
>
> In future, when posting an update, please always explain here
> below the --- line what was changed between the versions.
OK
>>   drivers/pci/pci.c | 138 ++++++++++++++++++++++++++++++++--------------
>>   1 file changed, 97 insertions(+), 41 deletions(-)
>>
>> diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
>> index b14dd064006c..76afecb11164 100644
>> --- a/drivers/pci/pci.c
>> +++ b/drivers/pci/pci.c
>> @@ -4788,6 +4788,63 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
>>   	return max(min_delay, max_delay);
>>   }
>>   
>> +static int pci_readiness_check(struct pci_dev *pdev, struct pci_dev *child,
>> +		unsigned long start_t, char *reset_type)
>> +{
>> +	int elapsed = jiffies_to_msecs(jiffies - start_t);
>> +
>> +	if (pci_dev_is_disconnected(pdev) || pci_dev_is_disconnected(child))
>> +		return 0;
>> +
>> +	if (pcie_get_speed_cap(pdev) <= PCIE_SPEED_5_0GT) {
>> +		u16 status;
>> +
>> +		pci_dbg(pdev, "waiting %d ms for downstream link\n", elapsed);
>> +
>> +		if (!pci_dev_wait(child, reset_type, 0))
>> +			return 0;
>> +
>> +		if (PCI_RESET_WAIT > elapsed)
>> +			return PCI_RESET_WAIT - elapsed;
>> +
>> +		/*
>> +		 * If the port supports active link reporting we now check
>> +		 * whether the link is active and if not bail out early with
>> +		 * the assumption that the device is not present anymore.
>> +		 */
>> +		if (!pdev->link_active_reporting)
>> +			return -ENOTTY;
>> +
>> +		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &status);
>> +		if (!(status & PCI_EXP_LNKSTA_DLLLA))
>> +			return -ENOTTY;
>> +
>> +		if (!pci_dev_wait(child, reset_type, 0))
>> +			return 0;
>> +
>> +		if (PCIE_RESET_READY_POLL_MS > elapsed)
>> +			return PCIE_RESET_READY_POLL_MS - elapsed;
>> +
>> +		return -ENOTTY;
>> +	}
>> +
>> +	pci_dbg(pdev, "waiting %d ms for downstream link, after activation\n",
>> +		elapsed);
>> +	if (!pcie_wait_for_link_delay(pdev, true, 0)) {
>> +		/* Did not train, no need to wait any further */
>> +		pci_info(pdev, "Data Link Layer Link Active not set in %d msec\n", elapsed);
>> +		return -ENOTTY;
>> +	}
>> +
>> +	if (!pci_dev_wait(child, reset_type, 0))
>> +		return 0;
>> +
>> +	if (PCIE_RESET_READY_POLL_MS > elapsed)
>> +		return PCIE_RESET_READY_POLL_MS - elapsed;
>> +
>> +	return -ENOTTY;
>> +}
>> +
>>   /**
>>    * pci_bridge_wait_for_secondary_bus - Wait for secondary bus to be accessible
>>    * @dev: PCI bridge
>> @@ -4802,12 +4859,14 @@ static int pci_bus_max_d3cold_delay(const struct pci_bus *bus)
>>    * 4.3.2.
>>    *
>>    * Return 0 on success or -ENOTTY if the first device on the secondary bus
>> - * failed to become accessible.
>> + * failed to become accessible or a value greater than 0 indicates the
>> + * left required waiting time..
>>    */
>> -int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>> +static int __pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, unsigned long start_t,
>> +		char *reset_type)
>>   {
>> -	struct pci_dev *child __free(pci_dev_put) = NULL;
>> -	int delay;
>> +	struct pci_dev *child;
>> +	int delay, ret, elapsed = jiffies_to_msecs(jiffies - start_t);
>>   
>>   	if (pci_dev_is_disconnected(dev))
>>   		return 0;
>> @@ -4835,8 +4894,6 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>>   		return 0;
>>   	}
>>   
>> -	child = pci_dev_get(list_first_entry(&dev->subordinate->devices,
>> -					     struct pci_dev, bus_list));
>>   	up_read(&pci_bus_sem);
>>   
>>   	/*
>> @@ -4844,8 +4901,10 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>>   	 * accessing the device after reset (that is 1000 ms + 100 ms).
>>   	 */
>>   	if (!pci_is_pcie(dev)) {
>> -		pci_dbg(dev, "waiting %d ms for secondary bus\n", 1000 + delay);
>> -		msleep(1000 + delay);
>> +		if (1000 + delay > elapsed)
>> +			return 1000 + delay - elapsed;
>> +
>> +		pci_dbg(dev, "waiting %d ms for secondary bus\n", elapsed);
>>   		return 0;
>>   	}
>>   
>> @@ -4867,41 +4926,40 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>>   	if (!pcie_downstream_port(dev))
>>   		return 0;
>>   
>> -	if (pcie_get_speed_cap(dev) <= PCIE_SPEED_5_0GT) {
>> -		u16 status;
>> -
>> -		pci_dbg(dev, "waiting %d ms for downstream link\n", delay);
>> -		msleep(delay);
>> -
>> -		if (!pci_dev_wait(child, reset_type, PCI_RESET_WAIT - delay))
>> -			return 0;
>> +	if (delay > elapsed)
>> +		return delay - elapsed;
>>   
>> +	down_read(&pci_bus_sem);
>> +	list_for_each_entry(child, &dev->subordinate->devices, bus_list) {
>>   		/*
>> -		 * If the port supports active link reporting we now check
>> -		 * whether the link is active and if not bail out early with
>> -		 * the assumption that the device is not present anymore.
>> +		 * Check if all devices under the same bus have completed
>> +		 * the reset process, including multifunction devices in
>> +		 * the same bus.
>>   		 */
>> -		if (!dev->link_active_reporting)
>> -			return -ENOTTY;
>> +		ret = pci_readiness_check(dev, child, start_t, reset_type);
>>   
>> -		pcie_capability_read_word(dev, PCI_EXP_LNKSTA, &status);
>> -		if (!(status & PCI_EXP_LNKSTA_DLLLA))
>> -			return -ENOTTY;
>> +		if (ret == 0 && child->subordinate)
>> +			ret = __pci_bridge_wait_for_secondary_bus(child, start_t, reset_type);
>>   
>> -		return pci_dev_wait(child, reset_type,
>> -				    PCIE_RESET_READY_POLL_MS - PCI_RESET_WAIT);
>> +		if(ret)
>> +			break;
>>   	}
>> +	up_read(&pci_bus_sem);
>>   
>> -	pci_dbg(dev, "waiting %d ms for downstream link, after activation\n",
>> -		delay);
>> -	if (!pcie_wait_for_link_delay(dev, true, delay)) {
>> -		/* Did not train, no need to wait any further */
>> -		pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay);
>> -		return -ENOTTY;
>> -	}
>> +	return ret;
>> +}
>>   
>> -	return pci_dev_wait(child, reset_type,
>> -			    PCIE_RESET_READY_POLL_MS - delay);
>> +int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type)
>> +{
>> +	int res = 0;
>> +	unsigned long start_t = jiffies;
>> +
>> +	do {
>> +		msleep(res);
>> +		res = __pci_bridge_wait_for_secondary_bus(dev, start_t, reset_type);
>> +	} while (res > 0);
>> +
>> +	return res;
>>   }
>>   
>>   void pci_reset_secondary_bus(struct pci_dev *dev)
>> @@ -5542,10 +5600,8 @@ static void pci_bus_restore_locked(struct pci_bus *bus)
>>   
>>   	list_for_each_entry(dev, &bus->devices, bus_list) {
>>   		pci_dev_restore(dev);
>> -		if (dev->subordinate) {
>> -			pci_bridge_wait_for_secondary_bus(dev, "bus reset");
>> +		if (dev->subordinate)
>>   			pci_bus_restore_locked(dev->subordinate);
>> -		}
>>   	}
>>   }
> ???
>
> Unfortunately, this takes a wrong turn and is very much against the
> feedback I gave to you.

1. The implementation of 
pci_bridge_secondary_bus_reset/pci_bridge_wait_for_secondary_bus

    has been modified to wait for all downstream switches/link/devices 
to complete their recovery

    before returning.

2. Therefore, when pci_bus_restore_locked is called via __pci_reset_bus, 
the waiting process has

    already been completed via 
pci_bridge_secondary_bus_reset/pci_bridge_wait_for_secondary_bus,

    so pci_bus_restore_locked does not require additional waiting.

>>   
>> @@ -5575,14 +5631,14 @@ static void pci_slot_restore_locked(struct pci_slot *slot)
>>   {
>>   	struct pci_dev *dev;
>>   
>> +	pci_bridge_wait_for_secondary_bus(slot->bus->self, "slot reset");
>> +
>>   	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
>>   		if (!dev->slot || dev->slot != slot)
>>   			continue;
>>   		pci_dev_restore(dev);
>> -		if (dev->subordinate) {
>> -			pci_bridge_wait_for_secondary_bus(dev, "slot reset");
>> +		if (dev->subordinate)
>>   			pci_bus_restore_locked(dev->subordinate);
>> -		}

1. When pci_slot_restore_locked is called for a PCIe slot reset, 
pci_bridge_wait_for_secondary_bus

    has already been executed to wait for all switches, links, and 
devices to complete their recovery firstly

>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ