[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20260209120815.29545-1-harshankmatkar1304@outlook.com>
Date: Mon, 9 Feb 2026 12:08:45 +0000
From: Harshank Matkar <harshankmatkar1304@...look.com>
To: "anthony.l.nguyen@...el.com" <anthony.l.nguyen@...el.com>,
"przemyslaw.kitszel@...el.com" <przemyslaw.kitszel@...el.com>
CC: "intel-wired-lan@...ts.osuosl.org" <intel-wired-lan@...ts.osuosl.org>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>, Harshank Matkar
<harshankmatkar1304@...look.com>
Subject: [PATCH] igc: Add PCIe link recovery for I225/I226
From: Harshank Matkar <harshankmatkar1304@...look.com>
When ASPM L0s transitions occur on Intel I225/I226 controllers,
transient PCIe link instability can cause register read failures
(0xFFFFFFFF responses). Implement a multi-layer recovery strategy:
1. Immediate retries: 3 attempts with 100-200μs delays
2. Link retraining: Trigger PCIe link retraining via capabilities
3. Device detachment: Only as last resort after max attempts
The recovery mechanism includes rate limiting, maximum attempt
tracking, and device presence validation to prevent false detaches
on transient ASPM glitches while maintaining safety through
bounded retry limits.
Signed-off-by: Harshank Matkar <harshankmatkar1304@...look.com>
---
drivers/net/ethernet/intel/igc/igc.h | 6 +-
drivers/net/ethernet/intel/igc/igc_main.c | 155 ++++++++++++++++++++--
2 files changed, 149 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index a427f05814c1..2ef488b279b9 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -346,6 +346,10 @@ struct igc_adapter {
struct mutex led_mutex;
struct igc_led_classdev *leds;
bool leds_available;
+
+ /* PCIe recovery tracking */
+ unsigned int pcie_recovery_attempts;
+ unsigned long last_recovery_time;
};
void igc_up(struct igc_adapter *adapter);
@@ -422,7 +426,7 @@ enum igc_rss_type_num {
IGC_RSS_TYPE_MAX = 10,
};
#define IGC_RSS_TYPE_MAX_TABLE 16
-#define IGC_RSS_TYPE_MASK GENMASK(3,0) /* 4-bits (3:0) = mask 0x0F */
+#define IGC_RSS_TYPE_MASK GENMASK(3, 0) /* 4-bits (3:0) = mask 0x0F */
/* igc_rss_type - Rx descriptor RSS type field */
static inline u32 igc_rss_type(const union igc_adv_rx_desc *rx_desc)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 89a321a344d2..f0daa3edbb79 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1411,9 +1411,8 @@ static int igc_tx_map(struct igc_ring *tx_ring,
/* Make sure there is space in the ring for the next send. */
igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
- if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more())
writel(i, tx_ring->tail);
- }
return 0;
dma_error:
@@ -1613,8 +1612,7 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
* otherwise try next time
*/
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
- count += TXD_USE_COUNT(skb_frag_size(
- &skb_shinfo(skb)->frags[f]));
+ count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]));
if (igc_maybe_stop_tx(tx_ring, count + 5)) {
/* this is a hard error */
@@ -2524,7 +2522,6 @@ static int __igc_xdp_run_prog(struct igc_adapter *adapter,
if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
goto out_failure;
return IGC_XDP_REDIRECT;
- break;
default:
bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
fallthrough;
@@ -2791,7 +2788,7 @@ static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
* igc_xdp_buff shares its layout with xdp_buff_xsk and private
* igc_xdp_buff fields fall into xdp_buff_xsk->cb
*/
- return (struct igc_xdp_buff *)xdp;
+ return (struct igc_xdp_buff *)xdp;
}
static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
@@ -3895,9 +3892,8 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter,
{
int err;
- if (rule->flex) {
+ if (rule->flex)
return igc_add_flex_filter(adapter, rule);
- }
if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
err = igc_add_etype_filter(adapter, rule->filter.etype,
@@ -6984,11 +6980,112 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_hwtstamp_set = igc_ptp_hwtstamp_set,
};
+#define IGC_REGISTER_READ_RETRIES 3
+#define IGC_PCIE_RECOVERY_MAX_ATTEMPTS 10
+#define IGC_PCIE_RECOVERY_INTERVAL_MS 1000
+
+/**
+ * igc_pcie_link_recover - Attempt PCIe link recovery
+ * @adapter: board private structure
+ *
+ * Attempts to recover a failed PCIe link by triggering a link retrain.
+ * Rate-limited to 1 attempt per second, maximum 10 attempts.
+ *
+ * Returns true if recovery was successful, false otherwise.
+ */
+static bool igc_pcie_link_recover(struct igc_adapter *adapter)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ unsigned long now = jiffies;
+ u16 lnksta, lnkctl;
+ int ret;
+ int i;
+
+ /* Rate limiting: no more than 1 attempt per second */
+ if (time_before(now, adapter->last_recovery_time +
+ msecs_to_jiffies(IGC_PCIE_RECOVERY_INTERVAL_MS)))
+ return false;
+
+ /* Maximum attempt limit */
+ if (adapter->pcie_recovery_attempts >= IGC_PCIE_RECOVERY_MAX_ATTEMPTS) {
+ netdev_err(adapter->netdev,
+ "Exceeded maximum PCIe recovery attempts (%d)\n",
+ IGC_PCIE_RECOVERY_MAX_ATTEMPTS);
+ return false;
+ }
+
+ adapter->last_recovery_time = now;
+ adapter->pcie_recovery_attempts++;
+
+ netdev_warn(adapter->netdev,
+ "Attempting PCIe link recovery (attempt %d/%d)\n",
+ adapter->pcie_recovery_attempts,
+ IGC_PCIE_RECOVERY_MAX_ATTEMPTS);
+
+ /* Check if device is still present on the bus */
+ if (!pci_device_is_present(pdev)) {
+ netdev_err(adapter->netdev, "Device not present on PCIe bus\n");
+ return false;
+ }
+
+ /* Check link status */
+ ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+ if (ret) {
+ netdev_err(adapter->netdev, "Failed to read link status\n");
+ return false;
+ }
+
+ /* Trigger link retrain if link appears down */
+ if (!(lnksta & PCI_EXP_LNKSTA_DLLLA)) {
+ netdev_info(adapter->netdev,
+ "Link down, attempting retrain\n");
+
+ /* Read link control register */
+ ret = pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnkctl);
+ if (ret == 0) {
+ /* Trigger retrain by setting RL bit */
+ pcie_capability_write_word(pdev, PCI_EXP_LNKCTL,
+ lnkctl | PCI_EXP_LNKCTL_RL);
+
+ /* Wait for retrain to complete (up to 1 second) */
+ for (i = 0; i < 100; i++) {
+ usleep_range(10000, 20000);
+ ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA,
+ &lnksta);
+ if (ret == 0 && (lnksta & PCI_EXP_LNKSTA_DLLLA) &&
+ !(lnksta & PCI_EXP_LNKSTA_LT)) {
+ netdev_info(adapter->netdev,
+ "PCIe link recovery successful\n");
+ return true;
+ }
+ }
+ }
+ }
+
+ /* Give the link some additional time to recover on its own */
+ msleep(100);
+
+ /* Check if device is responding now */
+ if (pci_device_is_present(pdev)) {
+ ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+ if (ret == 0 && (lnksta & PCI_EXP_LNKSTA_DLLLA)) {
+ netdev_info(adapter->netdev,
+ "PCIe link recovered after delay\n");
+ return true;
+ }
+ }
+
+ netdev_warn(adapter->netdev, "PCIe link recovery failed\n");
+ return false;
+}
+
u32 igc_rd32(struct igc_hw *hw, u32 reg)
{
struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
+ struct net_device *netdev = igc->netdev;
u32 value = 0;
+ int retry;
if (IGC_REMOVED(hw_addr))
return ~value;
@@ -6997,13 +7094,49 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
/* reads should not return all F's */
if (!(~value) && (!reg || !(~readl(hw_addr)))) {
- struct net_device *netdev = igc->netdev;
+ /* Layer 1: Immediate retries with short delays (100-200μs) */
+ for (retry = 0; retry < IGC_REGISTER_READ_RETRIES; retry++) {
+ usleep_range(100, 200);
+ value = readl(&hw_addr[reg]);
+
+ /* If we got a valid read, return immediately */
+ if (~value || (reg && ~readl(hw_addr))) {
+ netdev_dbg(netdev,
+ "Register read recovered after %d retries\n",
+ retry + 1);
+ return value;
+ }
+ }
+
+ /* Layer 2: Attempt full PCIe link recovery */
+ netdev_warn(netdev,
+ "All immediate retries failed, attempting PCIe link recovery\n");
+
+ if (igc_pcie_link_recover(igc)) {
+ /* Recovery succeeded, re-read the register */
+ hw_addr = READ_ONCE(hw->hw_addr);
+ if (hw_addr && !IGC_REMOVED(hw_addr)) {
+ value = readl(&hw_addr[reg]);
+
+ /* Verify the read is valid */
+ if (~value || (reg && ~readl(hw_addr))) {
+ netdev_info(netdev,
+ "Register read successful after link recovery\n");
+ return value;
+ }
+ }
+ }
+
+ /* Layer 3: All recovery attempts failed, detach device */
+ netdev_err(netdev,
+ "PCIe link lost after %d retries and recovery attempts, device now detached\n",
+ IGC_REGISTER_READ_RETRIES);
hw->hw_addr = NULL;
netif_device_detach(netdev);
- netdev_err(netdev, "PCIe link lost, device now detached\n");
+
WARN(pci_device_is_present(igc->pdev),
- "igc: Failed to read reg 0x%x!\n", reg);
+ "igc: Failed to read reg 0x%x after all recovery attempts!\n", reg);
}
return value;
--
2.52.0
Powered by blists - more mailing lists