[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1763415729-1238421-5-git-send-email-tariqt@nvidia.com>
Date: Mon, 17 Nov 2025 23:42:08 +0200
From: Tariq Toukan <tariqt@...dia.com>
To: Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>, "David
S. Miller" <davem@...emloft.net>
CC: Saeed Mahameed <saeedm@...dia.com>, Leon Romanovsky <leon@...nel.org>,
Tariq Toukan <tariqt@...dia.com>, Mark Bloch <mbloch@...dia.com>, "Richard
Cochran" <richardcochran@...il.com>, <netdev@...r.kernel.org>,
<linux-rdma@...r.kernel.org>, <linux-kernel@...r.kernel.org>, Gal Pressman
<gal@...dia.com>, Moshe Shemesh <moshe@...dia.com>, Dragos Tatulea
<dtatulea@...dia.com>
Subject: [PATCH net-next 4/5] net/mlx5: Abort new commands if all command slots are stalled
From: Saeed Mahameed <saeedm@...dia.com>
In case of a FW issue, FW might be not responding to FW commands,
causing kernel lockout for a long period of time, e.g. rtnl_lock held
while ethtool is trying to collect stats waiting for FW to respond to
multiple commands, when all of them will timeout.
While there's no immediate indication of the FW lockout, we can safely
assume that something is wrong when all command slots are busy and in
a timeout state and no FW completion was received on any of them.
In such case, start immediately failing new commands.
Signed-off-by: Saeed Mahameed <saeedm@...dia.com>
Reviewed-by: Moshe Shemesh <moshe@...dia.com>
Signed-off-by: Tariq Toukan <tariqt@...dia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 55 +++++++++++++++++++
include/linux/mlx5/driver.h | 1 +
2 files changed, 56 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 722282cebce9..5b08e5ffe0e2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -181,6 +181,7 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd, struct mlx5_cmd_work_ent *ent)
static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
{
lockdep_assert_held(&cmd->alloc_lock);
+ cmd->ent_arr[idx] = NULL;
set_bit(idx, &cmd->vars.bitmask);
}
@@ -1200,6 +1201,44 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
return err;
}
+/* Check if all command slots are stalled (timed out and not recovered).
+ * returns true if all slots timed out on a recent command and have not been
+ * completed by FW yet. (stalled state)
+ * false otherwise (at least one slot is not stalled).
+ *
+ * In such odd situation "all_stalled", this serves as a protection mechanism
+ * to avoid blocking the kernel for long periods of time in case FW is not
+ * responding to commands.
+ */
+static bool mlx5_cmd_all_stalled(struct mlx5_core_dev *dev)
+{
+ struct mlx5_cmd *cmd = &dev->cmd;
+ bool all_stalled = true;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&cmd->alloc_lock, flags);
+
+ /* at least one command slot is free */
+ if (bitmap_weight(&cmd->vars.bitmask, cmd->vars.max_reg_cmds) > 0) {
+ all_stalled = false;
+ goto out;
+ }
+
+ for_each_clear_bit(i, &cmd->vars.bitmask, cmd->vars.max_reg_cmds) {
+ struct mlx5_cmd_work_ent *ent = dev->cmd.ent_arr[i];
+
+ if (!test_bit(MLX5_CMD_ENT_STATE_TIMEDOUT, &ent->state)) {
+ all_stalled = false;
+ break;
+ }
+ }
+out:
+ spin_unlock_irqrestore(&cmd->alloc_lock, flags);
+
+ return all_stalled;
+}
+
/* Notes:
* 1. Callback functions may not sleep
* 2. page queue commands do not support asynchrous completion
@@ -1230,6 +1269,15 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
if (callback && page_queue)
return -EINVAL;
+ if (!page_queue && mlx5_cmd_all_stalled(dev)) {
+ mlx5_core_err_rl(dev,
+ "All CMD slots are stalled, aborting command\n");
+ /* there's no reason to wait and block the whole kernel if FW
+ * isn't currently responding to all slots, fail immediately
+ */
+ return -EAGAIN;
+ }
+
ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
callback, context, page_queue);
if (IS_ERR(ent))
@@ -1700,6 +1748,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
if (test_bit(i, &vector)) {
ent = cmd->ent_arr[i];
+ if (forced && ent->ret == -ETIMEDOUT)
+ set_bit(MLX5_CMD_ENT_STATE_TIMEDOUT,
+ &ent->state);
+ else if (!forced) /* real FW completion */
+ clear_bit(MLX5_CMD_ENT_STATE_TIMEDOUT,
+ &ent->state);
+
/* if we already completed the command, ignore it */
if (!test_and_clear_bit(MLX5_CMD_ENT_STATE_PENDING_COMP,
&ent->state)) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 046396269ccf..7aec53371cf0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -819,6 +819,7 @@ typedef void (*mlx5_cmd_cbk_t)(int status, void *context);
enum {
MLX5_CMD_ENT_STATE_PENDING_COMP,
+ MLX5_CMD_ENT_STATE_TIMEDOUT,
};
struct mlx5_cmd_work_ent {
--
2.31.1
Powered by blists - more mailing lists