lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 19 Jan 2018 23:34:53 +0200
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
Cc:     netdev@...r.kernel.org, Eran Ben Elisha <eranbe@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 11/15] net/mlx5e: Poll event queue upon TX timeout before performing full channels recovery

From: Eran Ben Elisha <eranbe@...lanox.com>

Up until this patch, on every TX timeout we would try to do channels
recovery.  However, in case of a lost interrupt for an EQ, the channel
associated to it cannot be recovered if reopened as it would never get
another interrupt on sent/received traffic, and eventually ends up with
another TX timeout (Restarting the EQ is not part of channel recovery).

This patch adds a mechanism for explicitly polling EQ in case of a TX
timeout in order to recover from a lost interrupt. If this is not the
case (no pending EQEs), perform a channels full recovery as usual.

Once a lost EQE is recovered, it triggers the NAPI to run and handle all
pending completions. This will free some budget in the bql (via calling
netdev_tx_completed_queue) or by clearing pending TXWQEs and waking up
the queue.  One of the above actions will move the queue to be ready for
transmit again.

Signed-off-by: Eran Ben Elisha <eranbe@...lanox.com>
Reviewed-by: Tariq Toukan <tariqt@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 51 +++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       | 18 ++++++++
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  1 +
 3 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 71b8b171c565..12ea5e319e38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3757,10 +3757,37 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 	return features;
 }
 
+static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev,
+					struct mlx5e_txqsq *sq)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int irqn_not_used, eqn;
+	struct mlx5_eq *eq;
+	u32 eqe_count;
+
+	if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn, &irqn_not_used))
+		return false;
+
+	eq = mlx5_eqn2eq(mdev, eqn);
+	if (IS_ERR(eq))
+		return false;
+
+	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
+		   eqn, eq->cons_index, eq->irqn);
+
+	eqe_count = mlx5_eq_poll_irq_disabled(eq);
+	if (!eqe_count)
+		return false;
+
+	netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->eqn);
+	return true;
+}
+
 static void mlx5e_tx_timeout(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
-	bool sched_work = false;
+	bool reopen_channels = false;
 	int i;
 
 	netdev_err(dev, "TX timeout detected\n");
@@ -3768,29 +3795,23 @@ static void mlx5e_tx_timeout(struct net_device *dev)
 	for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) {
 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i);
 		struct mlx5e_txqsq *sq = priv->txq2sq[i];
-		struct mlx5_core_dev *mdev = priv->mdev;
-		int irqn_not_used, eqn;
-		struct mlx5_eq *eq;
 
 		if (!netif_xmit_stopped(dev_queue))
 			continue;
-		sched_work = true;
-		clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
 			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
 			   jiffies_to_usecs(jiffies - dev_queue->trans_start));
 
-		if (mlx5_vector2eqn(mdev, sq->cq.mcq.vector, &eqn,
-				    &irqn_not_used))
-			continue;
-
-		eq = mlx5_eqn2eq(mdev, eqn);
-		if (!IS_ERR(eq))
-			netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
-				   eqn, eq->cons_index, eq->irqn);
+		/* If we recover a lost interrupt, most likely TX timeout will
+		 * be resolved, skip reopening channels
+		 */
+		if (!mlx5e_tx_timeout_eq_recover(dev, sq)) {
+			clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
+			reopen_channels = true;
+		}
 	}
 
-	if (sched_work && test_bit(MLX5E_STATE_OPENED, &priv->state))
+	if (reopen_channels && test_bit(MLX5E_STATE_OPENED, &priv->state))
 		schedule_work(&priv->tx_timeout_work);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index e7e7cef2bde4..4d98ce0901af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -530,6 +530,24 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
 	return IRQ_HANDLED;
 }
 
+/* Some architectures don't latch interrupts when they are disabled, so using
+ * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
+ * avoid losing them.  It is not recommended to use it, unless this is the last
+ * resort.
+ */
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
+{
+	u32 count_eqe;
+
+	disable_irq(eq->irqn);
+	count_eqe = eq->cons_index;
+	mlx5_eq_int(eq->irqn, eq);
+	count_eqe = eq->cons_index - count_eqe;
+	enable_irq(eq->irqn);
+
+	return count_eqe;
+}
+
 static void init_eq_buf(struct mlx5_eq *eq)
 {
 	struct mlx5_eqe *eqe;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index ff4a0b889a6f..b5a46c128b28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -116,6 +116,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
+u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq);
 void mlx5_cq_tasklet_cb(unsigned long data);
 
 int mlx5_query_pcam_reg(struct mlx5_core_dev *dev, u32 *pcam, u8 feature_group,
-- 
2.13.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ