[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20211220105614.68622-5-idosch@nvidia.com>
Date: Mon, 20 Dec 2021 12:56:14 +0200
From: Ido Schimmel <idosch@...dia.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, kuba@...nel.org, petrm@...dia.com,
danieller@...dia.com, mlxsw@...dia.com,
Ido Schimmel <idosch@...dia.com>
Subject: [PATCH net-next 4/4] mlxsw: core: Extend devlink health reporter with new events and parameters
From: Danielle Ratson <danieller@...dia.com>
Extend the devlink health reporter registered by mlxsw to report new
health events and their related parameters. These are meant to aid in
debugging of hardware / firmware issues.
Beside the test event ('MLXSW_REG_MFDE_EVENT_ID_TEST') that is triggered
following the devlink health 'test' sub-command, the new events are used
to report the triggering of asserts in firmware code
('MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT') and hardware issues
('MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE').
Each event is accompanied with a severity parameter and per-event
parameters that are meant to help root cause the detected issue.
Signed-off-by: Danielle Ratson <danieller@...dia.com>
Signed-off-by: Ido Schimmel <idosch@...dia.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.c | 131 +++++++++++++++++++++
1 file changed, 131 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index d9f12d9cd0ff..866b9357939b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1708,12 +1708,93 @@ static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
static const struct mlxsw_listener mlxsw_core_health_listener =
MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
+static int
+mlxsw_core_health_fw_fatal_dump_fatal_cause(const char *mfde_pl,
+ struct devlink_fmsg *fmsg)
+{
+ u32 val, tile_v;
+ int err;
+
+ val = mlxsw_reg_mfde_fatal_cause_id_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "cause_id", val);
+ if (err)
+ return err;
+ tile_v = mlxsw_reg_mfde_fatal_cause_tile_v_get(mfde_pl);
+ if (tile_v) {
+ val = mlxsw_reg_mfde_fatal_cause_tile_index_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_core_health_fw_fatal_dump_fw_assert(const char *mfde_pl,
+ struct devlink_fmsg *fmsg)
+{
+ u32 val, tile_v;
+ int err;
+
+ val = mlxsw_reg_mfde_fw_assert_var0_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "var0", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_var1_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "var1", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_var2_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "var2", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_var3_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "var3", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_var4_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "var4", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_existptr_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "existptr", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_callra_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "callra", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_fw_assert_oe_get(mfde_pl);
+ err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val);
+ if (err)
+ return err;
+ tile_v = mlxsw_reg_mfde_fw_assert_tile_v_get(mfde_pl);
+ if (tile_v) {
+ val = mlxsw_reg_mfde_fw_assert_tile_index_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "tile_index", val);
+ if (err)
+ return err;
+ }
+ val = mlxsw_reg_mfde_fw_assert_ext_synd_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", val);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static int
mlxsw_core_health_fw_fatal_dump_kvd_im_stop(const char *mfde_pl,
struct devlink_fmsg *fmsg)
{
u32 val;
+ int err;
+ val = mlxsw_reg_mfde_kvd_im_stop_oe_get(mfde_pl);
+ err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val);
+ if (err)
+ return err;
val = mlxsw_reg_mfde_kvd_im_stop_pipes_mask_get(mfde_pl);
return devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
}
@@ -1727,6 +1808,10 @@ mlxsw_core_health_fw_fatal_dump_crspace_to(const char *mfde_pl,
val = mlxsw_reg_mfde_crspace_to_log_address_get(mfde_pl);
err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_crspace_to_oe_get(mfde_pl);
+ err = devlink_fmsg_bool_pair_put(fmsg, "old_event", val);
if (err)
return err;
val = mlxsw_reg_mfde_crspace_to_log_id_get(mfde_pl);
@@ -1774,6 +1859,15 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
val_str = "KVD insertion machine stopped";
break;
+ case MLXSW_REG_MFDE_EVENT_ID_TEST:
+ val_str = "Test";
+ break;
+ case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT:
+ val_str = "FW assert";
+ break;
+ case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE:
+ val_str = "Fatal cause";
+ break;
default:
val_str = NULL;
}
@@ -1782,6 +1876,38 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
if (err)
return err;
}
+
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "severity");
+ if (err)
+ return err;
+
+ val = mlxsw_reg_mfde_severity_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "id", val);
+ if (err)
+ return err;
+ switch (val) {
+ case MLXSW_REG_MFDE_SEVERITY_FATL:
+ val_str = "Fatal";
+ break;
+ case MLXSW_REG_MFDE_SEVERITY_NRML:
+ val_str = "Normal";
+ break;
+ case MLXSW_REG_MFDE_SEVERITY_INTR:
+ val_str = "Debug";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str);
+ if (err)
+ return err;
+ }
+
err = devlink_fmsg_arr_pair_nest_end(fmsg);
if (err)
return err;
@@ -1840,6 +1966,11 @@ static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *repor
case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
return mlxsw_core_health_fw_fatal_dump_kvd_im_stop(mfde_pl,
fmsg);
+ case MLXSW_REG_MFDE_EVENT_ID_FW_ASSERT:
+ return mlxsw_core_health_fw_fatal_dump_fw_assert(mfde_pl, fmsg);
+ case MLXSW_REG_MFDE_EVENT_ID_FATAL_CAUSE:
+ return mlxsw_core_health_fw_fatal_dump_fatal_cause(mfde_pl,
+ fmsg);
}
return 0;
--
2.33.1
Powered by blists - more mailing lists