[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220504070256.694458-15-saeedm@nvidia.com>
Date: Wed, 4 May 2022 00:02:55 -0700
From: Saeed Mahameed <saeedm@...dia.com>
To: "David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: netdev@...r.kernel.org, Moshe Shemesh <moshe@...dia.com>,
Maher Sanalla <msanalla@...dia.com>,
Shay Drory <shayd@...dia.com>,
Saeed Mahameed <saeedm@...dia.com>
Subject: [net 14/15] net/mlx5: Avoid double clear or set of sync reset requested
From: Moshe Shemesh <moshe@...dia.com>
Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.
Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event")
Signed-off-by: Moshe Shemesh <moshe@...dia.com>
Reviewed-by: Maher Sanalla <msanalla@...dia.com>
Reviewed-by: Shay Drory <shayd@...dia.com>
Signed-off-by: Saeed Mahameed <saeedm@...dia.com>
---
.../ethernet/mellanox/mlx5/core/fw_reset.c | 28 +++++++++++++------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index ec18d4ccbc11..ca1aba845dd6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
del_timer_sync(&fw_reset->timer);
}
-static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
+static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already cleared\n");
+ return -EALREADY;
+ }
+
mlx5_stop_sync_reset_poll(dev);
- clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
+ return 0;
}
static void mlx5_sync_reset_reload_work(struct work_struct *work)
@@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
-static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
+static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
+ if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
+ mlx5_core_warn(dev, "Reset request was already set\n");
+ return -EALREADY;
+ }
mlx5_stop_health_poll(dev, true);
- set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
+ return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
@@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
- mlx5_sync_reset_set_reset_requested(dev);
+ if (mlx5_sync_reset_set_reset_requested(dev))
+ return;
+
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
@@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
- mlx5_sync_reset_clear_reset_requested(dev, false);
+ if (mlx5_sync_reset_clear_reset_requested(dev, false))
+ return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
@@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
- if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
+ if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
-
- mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}
--
2.35.1
Powered by blists - more mailing lists