lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 18 Jan 2023 01:02:34 +0200
From:   Vladimir Oltean <vladimir.oltean@....com>
To:     netdev@...r.kernel.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        Claudiu Manoil <claudiu.manoil@....com>
Subject: [PATCH net-next 12/12] net: enetc: prioritize ability to go down over packet processing

napi_synchronize() from enetc_stop() waits until the softirq has
finished execution and no longer wants to be rescheduled. However under
high traffic load, this will never happen, and the interface can never
be closed.

The problem is the fact that the NAPI poll routine is written to update
the consumer index which makes the device want to put more buffers in
the RX ring, which restarts the madness again.

Browsing around, it seems that some drivers like i40e keep a bit
(__I40E_VSI_DOWN) which they use as communication between the control
path and the data path. But that isn't my first choice, because
complications ensue - since the enetc hardirq may trigger while we are
in a theoretical ENETC_DOWN state, it may happen that enetc_msix() masks
it, but enetc_poll() never unmasks it. To prevent a stall in that case,
one would need to schedule all NAPI instances when ENETC_DOWN gets
cleared, to process what's pending.

I find it more desirable for the control path - enetc_stop() - to just
quiesce the RX ring and let the softirq finish what remains there,
without any explicit communication, just by making hardware not provide
any more packets.

This seems possible with the Enable bit of the RX BD ring (RBaMR[EN]).
I can't seem to find an exact definition of what this bit does, but when
the RX ring is disabled, the port seems to no longer update the producer
index, and not react to software updates of the consumer index.

In fact, the RBaMR[EN] bit is already toggled by the driver, but too
late for what we want:

enetc_close()
-> enetc_stop()
   -> napi_synchronize()
-> enetc_clear_bdrs()
   -> enetc_clear_rxbdr()

The enetc_clear_bdrs() function contains not only logic to disable the
RX and TX rings, but also logic to wait for the TX ring stop being busy.

We split enetc_clear_bdrs() into enetc_disable_bdrs() and
enetc_wait_bdrs(). One needs to run before napi_synchronize() and the
other after (NAPI also processes TX completions, so we maximize our
chances of not waiting for the ENETC_TBSR_BUSY bit - unless a packet is
stuck for some reason, ofc).

We also split off enetc_enable_bdrs() from enetc_setup_bdrs(), and call
this from the mirror position in enetc_start() compared to enetc_stop(),
i.e. right before netif_tx_start_all_queues().

Signed-off-by: Vladimir Oltean <vladimir.oltean@....com>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 81 +++++++++++++++-----
 1 file changed, 63 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index eeff69336a80..6e54d49176ad 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -2088,7 +2088,7 @@ static void enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
 	/* enable Tx ints by setting pkt thr to 1 */
 	enetc_txbdr_wr(hw, idx, ENETC_TBICR0, ENETC_TBICR0_ICEN | 0x1);
 
-	tbmr = ENETC_TBMR_EN | ENETC_TBMR_SET_PRIO(tx_ring->prio);
+	tbmr = ENETC_TBMR_SET_PRIO(tx_ring->prio);
 	if (tx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_TX)
 		tbmr |= ENETC_TBMR_VIH;
 
@@ -2104,7 +2104,7 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
 			      bool extended)
 {
 	int idx = rx_ring->index;
-	u32 rbmr;
+	u32 rbmr = 0;
 
 	enetc_rxbdr_wr(hw, idx, ENETC_RBBAR0,
 		       lower_32_bits(rx_ring->bd_dma_base));
@@ -2131,8 +2131,6 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
 	/* enable Rx ints by setting pkt thr to 1 */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBICR0, ENETC_RBICR0_ICEN | 0x1);
 
-	rbmr = ENETC_RBMR_EN;
-
 	rx_ring->ext_en = extended;
 	if (rx_ring->ext_en)
 		rbmr |= ENETC_RBMR_BDS;
@@ -2151,7 +2149,6 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
 	enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
 	enetc_unlock_mdio();
 
-	/* enable ring */
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
 }
 
@@ -2167,7 +2164,39 @@ static void enetc_setup_bdrs(struct enetc_ndev_priv *priv, bool extended)
 		enetc_setup_rxbdr(hw, priv->rx_ring[i], extended);
 }
 
-static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+static void enetc_enable_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+{
+	int idx = tx_ring->index;
+	u32 tbmr;
+
+	tbmr = enetc_txbdr_rd(hw, idx, ENETC_TBMR);
+	tbmr |= ENETC_TBMR_EN;
+	enetc_txbdr_wr(hw, idx, ENETC_TBMR, tbmr);
+}
+
+static void enetc_enable_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
+{
+	int idx = rx_ring->index;
+	u32 rbmr;
+
+	rbmr = enetc_rxbdr_rd(hw, idx, ENETC_RBMR);
+	rbmr |= ENETC_RBMR_EN;
+	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
+}
+
+static void enetc_enable_bdrs(struct enetc_ndev_priv *priv)
+{
+	struct enetc_hw *hw = &priv->si->hw;
+	int i;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_enable_txbdr(hw, priv->tx_ring[i]);
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_enable_rxbdr(hw, priv->rx_ring[i]);
+}
+
+static void enetc_disable_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 {
 	int idx = rx_ring->index;
 
@@ -2175,13 +2204,30 @@ static void enetc_clear_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 	enetc_rxbdr_wr(hw, idx, ENETC_RBMR, 0);
 }
 
-static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+static void enetc_disable_txbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
 {
-	int delay = 8, timeout = 100;
-	int idx = tx_ring->index;
+	int idx = rx_ring->index;
 
 	/* disable EN bit on ring */
 	enetc_txbdr_wr(hw, idx, ENETC_TBMR, 0);
+}
+
+static void enetc_disable_bdrs(struct enetc_ndev_priv *priv)
+{
+	struct enetc_hw *hw = &priv->si->hw;
+	int i;
+
+	for (i = 0; i < priv->num_tx_rings; i++)
+		enetc_disable_txbdr(hw, priv->tx_ring[i]);
+
+	for (i = 0; i < priv->num_rx_rings; i++)
+		enetc_disable_rxbdr(hw, priv->rx_ring[i]);
+}
+
+static void enetc_wait_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+{
+	int delay = 8, timeout = 100;
+	int idx = tx_ring->index;
 
 	/* wait for busy to clear */
 	while (delay < timeout &&
@@ -2195,18 +2241,13 @@ static void enetc_clear_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
 			    idx);
 }
 
-static void enetc_clear_bdrs(struct enetc_ndev_priv *priv)
+static void enetc_wait_bdrs(struct enetc_ndev_priv *priv)
 {
 	struct enetc_hw *hw = &priv->si->hw;
 	int i;
 
 	for (i = 0; i < priv->num_tx_rings; i++)
-		enetc_clear_txbdr(hw, priv->tx_ring[i]);
-
-	for (i = 0; i < priv->num_rx_rings; i++)
-		enetc_clear_rxbdr(hw, priv->rx_ring[i]);
-
-	udelay(1);
+		enetc_wait_txbdr(hw, priv->tx_ring[i]);
 }
 
 static int enetc_setup_irqs(struct enetc_ndev_priv *priv)
@@ -2381,6 +2422,8 @@ void enetc_start(struct net_device *ndev)
 		enable_irq(irq);
 	}
 
+	enetc_enable_bdrs(priv);
+
 	netif_tx_start_all_queues(ndev);
 }
 
@@ -2452,6 +2495,8 @@ void enetc_stop(struct net_device *ndev)
 
 	netif_tx_stop_all_queues(ndev);
 
+	enetc_disable_bdrs(priv);
+
 	for (i = 0; i < priv->bdr_int_num; i++) {
 		int irq = pci_irq_vector(priv->si->pdev,
 					 ENETC_BDR_INT_BASE_IDX + i);
@@ -2461,6 +2506,8 @@ void enetc_stop(struct net_device *ndev)
 		napi_disable(&priv->int_vector[i]->napi);
 	}
 
+	enetc_wait_bdrs(priv);
+
 	enetc_clear_interrupts(priv);
 }
 
@@ -2469,7 +2516,6 @@ int enetc_close(struct net_device *ndev)
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 
 	enetc_stop(ndev);
-	enetc_clear_bdrs(priv);
 
 	if (priv->phylink) {
 		phylink_stop(priv->phylink);
@@ -2521,7 +2567,6 @@ static int enetc_reconfigure(struct enetc_ndev_priv *priv, bool extended,
 	}
 
 	enetc_stop(priv->ndev);
-	enetc_clear_bdrs(priv);
 	enetc_free_rxtx_rings(priv);
 
 	/* Interface is down, run optional callback now */
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ