[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250429155205.1444438-1-maciej.fijalkowski@intel.com>
Date: Tue, 29 Apr 2025 17:52:05 +0200
From: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
To: intel-wired-lan@...ts.osuosl.org
Cc: netdev@...r.kernel.org,
anthony.l.nguyen@...el.com,
magnus.karlsson@...el.com,
michal.kubiak@...el.com,
Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
Tobias Böhm <tobias.boehm@...zner-cloud.de>,
Marcus Wichelmann <marcus.wichelmann@...zner-cloud.de>
Subject: [PATCH iwl-net] ixgbe: fix ndo_xdp_xmit() workloads
Currently ixgbe driver checks periodically in its watchdog subtask if
there is anything to be transmitted (consdidering both Tx and XDP rings)
under state of carrier not being 'ok'. Such event is interpreted as Tx
hang and therefore results in interface reset.
This is currently problematic for ndo_xdp_xmit() as it is allowed to
produce descriptors when interface is going through reset or its carrier
is turned off.
Furthermore, XDP rings should not really be objects of Tx hang
detection. This mechanism is rather a matter of ndo_tx_timeout() being
called from dev_watchdog against Tx rings exposed to networking stack.
Taking into account issues described above, let us have a two fold fix -
do not respect XDP rings in local ixgbe watchdog and do not produce Tx
descriptors in ndo_xdp_xmit callback when there is some problem with
carrier currently. For now, keep the Tx hang checks in clean Tx irq
routine, but adjust it to not execute it for XDP rings.
Cc: Tobias Böhm <tobias.boehm@...zner-cloud.de>
Reported-by: Marcus Wichelmann <marcus.wichelmann@...zner-cloud.de>
Closes: https://lore.kernel.org/netdev/eca1880f-253a-4955-afe6-732d7c6926ee@hetzner-cloud.de/
Fixes: 6453073987ba ("ixgbe: add initial support for xdp redirect")
Fixes: 33fdc82f0883 ("ixgbe: add support for XDP_TX action")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@...el.com>
---
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 34 ++++++-------------
1 file changed, 11 insertions(+), 23 deletions(-)
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 467f81239e12..21bfea8aeb67 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -966,10 +966,6 @@ static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
for (i = 0; i < adapter->num_tx_queues; i++)
clear_bit(__IXGBE_HANG_CHECK_ARMED,
&adapter->tx_ring[i]->state);
-
- for (i = 0; i < adapter->num_xdp_queues; i++)
- clear_bit(__IXGBE_HANG_CHECK_ARMED,
- &adapter->xdp_ring[i]->state);
}
static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
@@ -1263,10 +1259,13 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
total_bytes);
adapter->tx_ipsec += total_ipsec;
+ if (ring_is_xdp(tx_ring))
+ return !!budget;
+
if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
/* schedule immediate reset if we believe we hung */
struct ixgbe_hw *hw = &adapter->hw;
- e_err(drv, "Detected Tx Unit Hang %s\n"
+ e_err(drv, "Detected Tx Unit Hang\n"
" Tx Queue <%d>\n"
" TDH, TDT <%x>, <%x>\n"
" next_to_use <%x>\n"
@@ -1274,16 +1273,14 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
"tx_buffer_info[next_to_clean]\n"
" time_stamp <%lx>\n"
" jiffies <%lx>\n",
- ring_is_xdp(tx_ring) ? "(XDP)" : "",
tx_ring->queue_index,
IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
tx_ring->next_to_use, i,
tx_ring->tx_buffer_info[i].time_stamp, jiffies);
- if (!ring_is_xdp(tx_ring))
- netif_stop_subqueue(tx_ring->netdev,
- tx_ring->queue_index);
+ netif_stop_subqueue(tx_ring->netdev,
+ tx_ring->queue_index);
e_info(probe,
"tx hang %d detected on queue %d, resetting adapter\n",
@@ -1296,9 +1293,6 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
return true;
}
- if (ring_is_xdp(tx_ring))
- return !!budget;
-
#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
if (!__netif_txq_completed_wake(txq, total_packets, total_bytes,
@@ -7791,12 +7785,9 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
return;
/* Force detection of hung controller */
- if (netif_carrier_ok(adapter->netdev)) {
+ if (netif_carrier_ok(adapter->netdev))
for (i = 0; i < adapter->num_tx_queues; i++)
set_check_for_tx_hang(adapter->tx_ring[i]);
- for (i = 0; i < adapter->num_xdp_queues; i++)
- set_check_for_tx_hang(adapter->xdp_ring[i]);
- }
if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
/*
@@ -8011,13 +8002,6 @@ static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter)
return true;
}
- for (i = 0; i < adapter->num_xdp_queues; i++) {
- struct ixgbe_ring *ring = adapter->xdp_ring[i];
-
- if (ring->next_to_use != ring->next_to_clean)
- return true;
- }
-
return false;
}
@@ -10742,6 +10726,10 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n,
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN;
+ if (!netif_carrier_ok(adapter->netdev) ||
+ !netif_running(adapter->netdev))
+ return -ENETDOWN;
+
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
--
2.43.0
Powered by blists - more mailing lists