[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230814214144.159464-6-saeed@kernel.org>
Date: Mon, 14 Aug 2023 14:41:35 -0700
From: Saeed Mahameed <saeed@...nel.org>
To: "David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Eric Dumazet <edumazet@...gle.com>
Cc: Saeed Mahameed <saeedm@...dia.com>,
netdev@...r.kernel.org,
Tariq Toukan <tariqt@...dia.com>,
Moshe Shemesh <moshe@...dia.com>,
Shay Drory <shayd@...dia.com>
Subject: [net-next 05/14] net/mlx5: Check with FW that sync reset completed successfully
From: Moshe Shemesh <moshe@...dia.com>
Even if the PF driver had no error on his part of the sync reset flow,
the firmware can see wider picture as it syncs all the PFs in the flow.
So add at end of sync reset flow check with firmware by reading MFRL
register and initialization segment that the flow had no issue from
firmware point of view too.
Signed-off-by: Moshe Shemesh <moshe@...dia.com>
Reviewed-by: Shay Drory <shayd@...dia.com>
Signed-off-by: Saeed Mahameed <saeedm@...dia.com>
---
.../net/ethernet/mellanox/mlx5/core/devlink.c | 3 ++
.../ethernet/mellanox/mlx5/core/fw_reset.c | 39 ++++++++++++++++---
.../ethernet/mellanox/mlx5/core/fw_reset.h | 2 +
include/linux/mlx5/mlx5_ifc.h | 3 +-
4 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 3d82ec890666..af8460bb257b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -212,6 +212,9 @@ static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_a
/* On fw_activate action, also driver is reloaded and reinit performed */
*actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT);
ret = mlx5_load_one_devl_locked(dev, true);
+ if (ret)
+ return ret;
+ ret = mlx5_fw_reset_verify_fw_complete(dev, extack);
break;
default:
/* Unsupported action should not get to this function */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index 4804990b7f22..e87766f91150 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -127,17 +127,23 @@ static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
goto out;
+ if (!reset_state)
+ return 0;
+
switch (reset_state) {
case MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION:
case MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS:
- NL_SET_ERR_MSG_MOD(extack, "Sync reset was already triggered");
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset still in progress");
return -EBUSY;
- case MLX5_MFRL_REG_RESET_STATE_TIMEOUT:
- NL_SET_ERR_MSG_MOD(extack, "Sync reset got timeout");
+ case MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset negotiation timeout");
return -ETIMEDOUT;
case MLX5_MFRL_REG_RESET_STATE_NACK:
NL_SET_ERR_MSG_MOD(extack, "One of the hosts disabled reset");
return -EPERM;
+ case MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT:
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset unload timeout");
+ return -ETIMEDOUT;
}
out:
@@ -151,7 +157,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
- int err;
+ int err, rst_res;
set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
@@ -164,13 +170,34 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
return 0;
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
- if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state))
- return mlx5_fw_reset_get_reset_state_err(dev, extack);
+ if (err == -EREMOTEIO && MLX5_CAP_MCAM_FEATURE(dev, reset_state)) {
+ rst_res = mlx5_fw_reset_get_reset_state_err(dev, extack);
+ return rst_res ? rst_res : err;
+ }
NL_SET_ERR_MSG_MOD(extack, "Sync reset command failed");
return mlx5_cmd_check(dev, err, in, out);
}
+int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack)
+{
+ u8 rst_state;
+ int err;
+
+ err = mlx5_fw_reset_get_reset_state_err(dev, extack);
+ if (err)
+ return err;
+
+ rst_state = mlx5_get_fw_rst_state(dev);
+ if (!rst_state)
+ return 0;
+
+ mlx5_core_err(dev, "Sync reset did not complete, state=%d\n", rst_state);
+ NL_SET_ERR_MSG_MOD(extack, "Sync reset did not complete successfully");
+ return rst_state;
+}
+
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev)
{
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
index c57465595f7c..ea527d06a85f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
@@ -12,6 +12,8 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel,
int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev);
int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev);
+int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev,
+ struct netlink_ext_ack *extack);
void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev);
void mlx5_drain_fw_reset(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 87fd6f9ed82c..9aed7e9b9f29 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10858,8 +10858,9 @@ enum {
MLX5_MFRL_REG_RESET_STATE_IDLE = 0,
MLX5_MFRL_REG_RESET_STATE_IN_NEGOTIATION = 1,
MLX5_MFRL_REG_RESET_STATE_RESET_IN_PROGRESS = 2,
- MLX5_MFRL_REG_RESET_STATE_TIMEOUT = 3,
+ MLX5_MFRL_REG_RESET_STATE_NEG_TIMEOUT = 3,
MLX5_MFRL_REG_RESET_STATE_NACK = 4,
+ MLX5_MFRL_REG_RESET_STATE_UNLOAD_TIMEOUT = 5,
};
enum {
--
2.41.0
Powered by blists - more mailing lists