[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260121052744.233517-1-acelan.kao@canonical.com>
Date: Wed, 21 Jan 2026 13:27:44 +0800
From: "Chia-Lin Kao (AceLan)" <acelan.kao@...onical.com>
To: Andreas Noever <andreas.noever@...il.com>,
Mika Westerberg <westeri@...nel.org>,
Yehezkel Bernat <YehezkelShB@...il.com>,
linux-usb@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH] thunderbolt: Fix PCIe device enumeration with delayed rescan
PCIe devices behind Thunderbolt tunnels may fail to enumerate when
spurious hotplug events prevent pciehp from detecting link-up.
Root cause:
Spurious unplug events occur immediately after tunnel activation:
[ 932.438] thunderbolt: acking hot unplug event on 702:2
[ 932.852] thunderbolt: PCIe Up path activation complete
[ 932.855] thunderbolt: hotplug event for upstream port 702:2
(unplug: 0)
[ 932.855] thunderbolt: hotplug event for upstream port 702:2
(unplug: 1)
These events disrupt pciehp timing, causing device enumeration to fail
~70% of the time on affected hardware. Manual PCI rescan succeeds,
proving devices are present and functional on the bus.
Solution:
Schedule delayed work (300ms) after tunnel activation to:
1. Check if pciehp successfully enumerated devices (device count increased)
2. If not, trigger pci_rescan_bus() to discover devices manually
3. Log results for observability
The delayed work approach is non-blocking and only rescans when actually
needed, avoiding overhead on systems where pciehp works correctly.
Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@...onical.com>
---
Logs: https://people.canonical.com/~acelan/bugs/tbt_storage/
merged.out.bad: Plugged-in TBT storage, but eventually fails to enumerate
merged.out.good: Plugged-in TBT storage, and successfully enumerates
merged.out.patched: Plugged-in TBT storage, it should fail without this
patch, but it works now
---
drivers/thunderbolt/tb.c | 95 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 95 insertions(+)
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 293fc9f258a5c..1cfc9a265c453 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -11,6 +11,7 @@
#include <linux/delay.h>
#include <linux/pm_runtime.h>
#include <linux/platform_data/x86/apple.h>
+#include <linux/pci.h>
#include "tb.h"
#include "tb_regs.h"
@@ -18,6 +19,7 @@
#define TB_TIMEOUT 100 /* ms */
#define TB_RELEASE_BW_TIMEOUT 10000 /* ms */
+#define TB_PCIEHP_ENUMERATION_DELAY 300 /* ms */
/*
* How many time bandwidth allocation request from graphics driver is
@@ -83,6 +85,16 @@ struct tb_hotplug_event {
int retry;
};
+/* Delayed work to verify PCIe enumeration after tunnel activation */
+struct tb_pci_rescan_work {
+ struct delayed_work work;
+ struct tb *tb;
+ struct pci_bus *bus;
+ int devices_before;
+ u64 route;
+ u8 port;
+};
+
static void tb_scan_port(struct tb_port *port);
static void tb_handle_hotplug(struct work_struct *work);
static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
@@ -90,6 +102,60 @@ static void tb_dp_resource_unavailable(struct tb *tb, struct tb_port *port,
static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port,
int retry, unsigned long delay);
+static void tb_pci_rescan_work_fn(struct work_struct *work)
+{
+ struct tb_pci_rescan_work *rescan_work =
+ container_of(work, typeof(*rescan_work), work.work);
+ struct tb *tb = rescan_work->tb;
+ struct pci_bus *bus = rescan_work->bus;
+ int devices_after = 0;
+ struct pci_dev *dev;
+ struct tb_switch *sw;
+ struct tb_port *port;
+
+ mutex_lock(&tb->lock);
+
+ sw = tb_switch_find_by_route(tb, rescan_work->route);
+ if (!sw) {
+ tb_dbg(tb, "Switch at route %llx disappeared, skipping rescan\n",
+ rescan_work->route);
+ goto out_unlock;
+ }
+
+ port = &sw->ports[rescan_work->port];
+
+ pci_lock_rescan_remove();
+ for_each_pci_dev(dev)
+ devices_after++;
+ pci_unlock_rescan_remove();
+
+ if (devices_after > rescan_work->devices_before) {
+ tb_port_dbg(port, "pciehp enumerated %d new device(s)\n",
+ devices_after - rescan_work->devices_before);
+ } else {
+ tb_port_info(port, "pciehp failed to enumerate devices, triggering rescan\n");
+
+ pci_lock_rescan_remove();
+ pci_rescan_bus(bus);
+
+ devices_after = 0;
+ for_each_pci_dev(dev)
+ devices_after++;
+ pci_unlock_rescan_remove();
+
+ if (devices_after > rescan_work->devices_before)
+ tb_port_info(port, "rescan found %d new device(s)\n",
+ devices_after - rescan_work->devices_before);
+ else
+ tb_port_warn(port, "no devices found even after rescan\n");
+ }
+
+ tb_switch_put(sw);
+out_unlock:
+ mutex_unlock(&tb->lock);
+ kfree(rescan_work);
+}
+
static void tb_queue_hotplug(struct tb *tb, u64 route, u8 port, bool unplug)
{
struct tb_hotplug_event *ev;
@@ -2400,6 +2466,35 @@ static int tb_tunnel_pci(struct tb *tb, struct tb_switch *sw)
tb_sw_warn(sw, "failed to connect xHCI\n");
list_add_tail(&tunnel->list, &tcm->tunnel_list);
+
+ /* Verify pciehp enumeration; trigger rescan if needed */
+ if (tb->nhi && tb->nhi->pdev && tb->nhi->pdev->bus) {
+ struct pci_bus *bus = tb->nhi->pdev->bus;
+ struct pci_bus *scan_bus = bus->parent ? bus->parent : bus;
+ struct tb_pci_rescan_work *rescan_work;
+ struct pci_dev *dev;
+ int devices_before = 0;
+
+ pci_lock_rescan_remove();
+ for_each_pci_dev(dev)
+ devices_before++;
+ pci_unlock_rescan_remove();
+
+ rescan_work = kmalloc_obj(rescan_work, GFP_KERNEL);
+ if (!rescan_work)
+ return 0;
+
+ rescan_work->tb = tb;
+ rescan_work->bus = scan_bus;
+ rescan_work->devices_before = devices_before;
+ rescan_work->route = tb_route(sw);
+ rescan_work->port = up->port;
+
+ INIT_DELAYED_WORK(&rescan_work->work, tb_pci_rescan_work_fn);
+ queue_delayed_work(tb->wq, &rescan_work->work,
+ msecs_to_jiffies(TB_PCIEHP_ENUMERATION_DELAY));
+ }
+
return 0;
}
--
2.51.0
Powered by blists - more mailing lists