lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260121060857.237777-1-acelan.kao@canonical.com>
Date: Wed, 21 Jan 2026 14:08:57 +0800
From: "Chia-Lin Kao (AceLan)" <acelan.kao@...onical.com>
To: Andreas Noever <andreas.noever@...il.com>,
	Mika Westerberg <westeri@...nel.org>,
	Yehezkel Bernat <YehezkelShB@...il.com>,
	linux-usb@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH v2] thunderbolt: Fix PCIe device enumeration with delayed rescan

PCIe devices behind Thunderbolt tunnels may fail to enumerate when
spurious hotplug events prevent pciehp from detecting link-up.

Root cause:

Spurious unplug events occur immediately after tunnel activation:

  [  932.438] thunderbolt: acking hot unplug event on 702:2
  [  932.852] thunderbolt: PCIe Up path activation complete
  [  932.855] thunderbolt: hotplug event for upstream port 702:2
            (unplug: 0)
  [  932.855] thunderbolt: hotplug event for upstream port 702:2
            (unplug: 1)

These events disrupt pciehp timing, causing device enumeration to fail
~70% of the time on affected hardware. Manual PCI rescan succeeds,
proving devices are present and functional on the bus.

Solution:

Schedule delayed work (300ms) after tunnel activation to:
1. Check if pciehp successfully enumerated devices (device count increased)
2. If not, trigger pci_rescan_bus() to discover devices manually
3. Log results for observability

The delayed work approach is non-blocking and only rescans when actually
needed, avoiding overhead on systems where pciehp works correctly.

Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@...onical.com>
---
Logs: https://people.canonical.com/~acelan/bugs/tbt_storage/
merged.out.bad: Plugged-in TBT storage, but eventually fails to enumerate
merged.out.good: Plugged-in TBT storage, and successfully enumerates
merged.out.patched: Plugged-in TBT storage, it should fail without this
                    patch, but it works now
---
 drivers/thunderbolt/tb.c | 95 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 293fc9f258a5c..a3761be6eeea4 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -11,6 +11,7 @@
 #include <linux/delay.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_data/x86/apple.h>
+#include <linux/pci.h>
 
 #include "tb.h"
 #include "tb_regs.h"
@@ -18,6 +19,7 @@
 
 #define TB_TIMEOUT		100	/* ms */
 #define TB_RELEASE_BW_TIMEOUT	10000	/* ms */
+#define TB_PCIEHP_ENUMERATION_DELAY 300	/* ms */
 
 /*
  * How many time bandwidth allocation request from graphics driver is
@@ -83,6 +85,16 @@ struct tb_hotplug_event {
 	int retry;
 };
 
+/* Delayed work to verify PCIe enumeration after tunnel activation */
+struct tb_pci_rescan_work {
+	struct delayed_work work;
+	struct tb *tb;
+	struct pci_bus *bus;
+	int devices_before;
+	u64 route;
+	u8 port;
+};
+
 static void tb_scan_port(struct tb_port *port);
 static void tb_handle_hotplug(struct work_struct *work);
 static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
@@ -90,6 +102,60 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
 static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
 					  int retry, unsigned long delay);
 
+static void tb_pci_rescan_work_fn(struct work_struct *work)
+{
+	struct tb_pci_rescan_work *rescan_work =
+		container_of(work, typeof(*rescan_work), work.work);
+	struct tb *tb = rescan_work->tb;
+	struct pci_bus *bus = rescan_work->bus;
+	int devices_after = 0;
+	struct pci_dev *dev;
+	struct tb_switch *sw;
+	struct tb_port *port;
+
+	mutex_lock(&tb->lock);
+
+	sw = tb_switch_find_by_route(tb, rescan_work->route);
+	if (!sw) {
+		tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
+		       rescan_work->route);
+		goto out_unlock;
+	}
+
+	port = &sw->ports[rescan_work->port];
+
+	pci_lock_rescan_remove();
+	for_each_pci_dev(dev)
+		devices_after++;
+	pci_unlock_rescan_remove();
+
+	if (devices_after > rescan_work->devices_before) {
+		tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
+			    devices_after - rescan_work->devices_before);
+	} else {
+		tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
+
+		pci_lock_rescan_remove();
+		pci_rescan_bus(bus);
+
+		devices_after = 0;
+		for_each_pci_dev(dev)
+			devices_after++;
+		pci_unlock_rescan_remove();
+
+		if (devices_after > rescan_work->devices_before)
+			tb_port_info(port, "rescan found %d new device(s)\n",
+				     devices_after - rescan_work->devices_before);
+		else
+			tb_port_warn(port, "no devices found even after rescan\n");
+	}
+
+	tb_switch_put(sw);
+out_unlock:
+	mutex_unlock(&tb->lock);
+	kfree(rescan_work);
+}
+
 static void tb_queue_hotplug(struct tb *tb, u64 route, u8 port, bool unplug)
 {
 	struct tb_hotplug_event *ev;
@@ -2400,6 +2466,35 @@ static int tb_tunnel_pci(struct tb *tb, struct tb_switch *sw)
 		tb_sw_warn(sw, "failed to connect xHCI\n");
 
 	list_add_tail(&tunnel->list, &tcm->tunnel_list);
+
+	/* Verify pciehp enumeration; trigger rescan if needed */
+	if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
+		struct pci_bus *bus = tb->nhi->pdev->bus;
+		struct pci_bus *scan_bus = bus->parent ? bus->parent : bus;
+		struct tb_pci_rescan_work *rescan_work;
+		struct pci_dev *dev;
+		int devices_before = 0;
+
+		pci_lock_rescan_remove();
+		for_each_pci_dev(dev)
+			devices_before++;
+		pci_unlock_rescan_remove();
+
+		rescan_work = kmalloc(sizeof(*rescan_work), GFP_KERNEL);
+		if (!rescan_work)
+			return 0;
+
+		rescan_work->tb = tb;
+		rescan_work->bus = scan_bus;
+		rescan_work->devices_before = devices_before;
+		rescan_work->route = tb_route(sw);
+		rescan_work->port = up->port;
+
+		INIT_DELAYED_WORK(&rescan_work->work, tb_pci_rescan_work_fn);
+		queue_delayed_work(tb->wq, &rescan_work->work,
+				   msecs_to_jiffies(TB_PCIEHP_ENUMERATION_DELAY));
+	}
+
 	return 0;
 }
 
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ