[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200915084058.18555-9-idosch@idosch.org>
Date: Tue, 15 Sep 2020 11:40:58 +0300
From: Ido Schimmel <idosch@...sch.org>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, kuba@...nel.org, jiri@...dia.com,
mlxsw@...dia.com, Ido Schimmel <idosch@...dia.com>
Subject: [PATCH net-next 8/8] mlxsw: core: Introduce fw_fatal health reporter
From: Jiri Pirko <jiri@...dia.com>
Introduce devlink health reporter to report FW fatal events. Implement
the event listener using MFDE trap and enable the events to be
propagated using MFGD register configuration.
Signed-off-by: Jiri Pirko <jiri@...dia.com>
Signed-off-by: Ido Schimmel <idosch@...dia.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.c | 240 ++++++++++++++++++
drivers/net/ethernet/mellanox/mlxsw/core.h | 1 +
drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 +
.../net/ethernet/mellanox/mlxsw/spectrum.c | 12 +
drivers/net/ethernet/mellanox/mlxsw/trap.h | 2 +
5 files changed, 256 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 7b5939e068d1..1bb21fe295b9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -84,6 +84,9 @@ struct mlxsw_core {
struct mlxsw_core_port *ports;
unsigned int max_ports;
bool fw_flash_in_progress;
+ struct {
+ struct devlink_health_reporter *fw_fatal;
+ } health;
unsigned long driver_priv[];
/* driver_priv has to be always the last item */
};
@@ -1612,6 +1615,236 @@ static void mlxsw_core_params_unregister(struct mlxsw_core *mlxsw_core)
mlxsw_core->driver->params_unregister(mlxsw_core);
}
+struct mlxsw_core_health_event {
+ struct mlxsw_core *mlxsw_core;
+ char mfde_pl[MLXSW_REG_MFDE_LEN];
+ struct work_struct work;
+};
+
+static void mlxsw_core_health_event_work(struct work_struct *work)
+{
+ struct mlxsw_core_health_event *event;
+ struct mlxsw_core *mlxsw_core;
+
+ event = container_of(work, struct mlxsw_core_health_event, work);
+ mlxsw_core = event->mlxsw_core;
+ devlink_health_report(mlxsw_core->health.fw_fatal, "FW fatal event occurred",
+ event->mfde_pl);
+ kfree(event);
+}
+
+static void mlxsw_core_health_listener_func(const struct mlxsw_reg_info *reg,
+ char *mfde_pl, void *priv)
+{
+ struct mlxsw_core_health_event *event;
+ struct mlxsw_core *mlxsw_core = priv;
+
+ event = kmalloc(sizeof(*event), GFP_ATOMIC);
+ if (!event)
+ return;
+ event->mlxsw_core = mlxsw_core;
+ memcpy(event->mfde_pl, mfde_pl, sizeof(event->mfde_pl));
+ INIT_WORK(&event->work, mlxsw_core_health_event_work);
+ mlxsw_core_schedule_work(&event->work);
+}
+
+static const struct mlxsw_listener mlxsw_core_health_listener =
+ MLXSW_EVENTL(mlxsw_core_health_listener_func, MFDE, MFDE);
+
+static int mlxsw_core_health_fw_fatal_dump(struct devlink_health_reporter *reporter,
+ struct devlink_fmsg *fmsg, void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ char *mfde_pl = priv_ctx;
+ char *val_str;
+ u8 event_id;
+ u32 val;
+ int err;
+
+ if (!priv_ctx)
+ /* User-triggered dumps are not possible */
+ return -EOPNOTSUPP;
+
+ val = mlxsw_reg_mfde_irisc_id_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "irisc_id", val);
+ if (err)
+ return err;
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, "event");
+ if (err)
+ return err;
+
+ event_id = mlxsw_reg_mfde_event_id_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "id", event_id);
+ if (err)
+ return err;
+ switch (event_id) {
+ case MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO:
+ val_str = "CR space timeout";
+ break;
+ case MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP:
+ val_str = "KVD insertion machine stopped";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "desc", val_str);
+ if (err)
+ return err;
+ }
+ err = devlink_fmsg_arr_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ val = mlxsw_reg_mfde_method_get(mfde_pl);
+ switch (val) {
+ case MLXSW_REG_MFDE_METHOD_QUERY:
+ val_str = "query";
+ break;
+ case MLXSW_REG_MFDE_METHOD_WRITE:
+ val_str = "write";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "method", val_str);
+ if (err)
+ return err;
+ }
+
+ val = mlxsw_reg_mfde_long_process_get(mfde_pl);
+ err = devlink_fmsg_bool_pair_put(fmsg, "long_process", val);
+ if (err)
+ return err;
+
+ val = mlxsw_reg_mfde_command_type_get(mfde_pl);
+ switch (val) {
+ case MLXSW_REG_MFDE_COMMAND_TYPE_MAD:
+ val_str = "mad";
+ break;
+ case MLXSW_REG_MFDE_COMMAND_TYPE_EMAD:
+ val_str = "emad";
+ break;
+ case MLXSW_REG_MFDE_COMMAND_TYPE_CMDIF:
+ val_str = "cmdif";
+ break;
+ default:
+ val_str = NULL;
+ }
+ if (val_str) {
+ err = devlink_fmsg_string_pair_put(fmsg, "command_type", val_str);
+ if (err)
+ return err;
+ }
+
+ val = mlxsw_reg_mfde_reg_attr_id_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "reg_attr_id", val);
+ if (err)
+ return err;
+
+ if (event_id == MLXSW_REG_MFDE_EVENT_ID_CRSPACE_TO) {
+ val = mlxsw_reg_mfde_log_address_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "log_address", val);
+ if (err)
+ return err;
+ val = mlxsw_reg_mfde_log_id_get(mfde_pl);
+ err = devlink_fmsg_u8_pair_put(fmsg, "log_irisc_id", val);
+ if (err)
+ return err;
+ } else if (event_id == MLXSW_REG_MFDE_EVENT_ID_KVD_IM_STOP) {
+ val = mlxsw_reg_mfde_pipes_mask_get(mfde_pl);
+ err = devlink_fmsg_u32_pair_put(fmsg, "pipes_mask", val);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+mlxsw_core_health_fw_fatal_test(struct devlink_health_reporter *reporter,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_core *mlxsw_core = devlink_health_reporter_priv(reporter);
+ char mfgd_pl[MLXSW_REG_MFGD_LEN];
+ int err;
+
+ /* Read the register first to make sure no other bits are changed. */
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mfgd_trigger_test_set(mfgd_pl, true);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+}
+
+static const struct devlink_health_reporter_ops
+mlxsw_core_health_fw_fatal_ops = {
+ .name = "fw_fatal",
+ .dump = mlxsw_core_health_fw_fatal_dump,
+ .test = mlxsw_core_health_fw_fatal_test,
+};
+
+static int mlxsw_core_health_fw_fatal_config(struct mlxsw_core *mlxsw_core,
+ bool enable)
+{
+ char mfgd_pl[MLXSW_REG_MFGD_LEN];
+ int err;
+
+ /* Read the register first to make sure no other bits are changed. */
+ err = mlxsw_reg_query(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+ if (err)
+ return err;
+ mlxsw_reg_mfgd_fatal_event_mode_set(mfgd_pl, enable);
+ return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mfgd), mfgd_pl);
+}
+
+static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ struct devlink_health_reporter *fw_fatal;
+ int err;
+
+ if (!mlxsw_core->driver->fw_fatal_enabled)
+ return 0;
+
+ fw_fatal = devlink_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops,
+ 0, mlxsw_core);
+ if (IS_ERR(fw_fatal)) {
+ dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter");
+ return PTR_ERR(fw_fatal);
+ }
+ mlxsw_core->health.fw_fatal = fw_fatal;
+
+ err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+ if (err)
+ goto err_trap_register;
+
+ err = mlxsw_core_health_fw_fatal_config(mlxsw_core, true);
+ if (err)
+ goto err_fw_fatal_config;
+
+ return 0;
+
+err_fw_fatal_config:
+ mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+err_trap_register:
+ devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
+ return err;
+}
+
+static void mlxsw_core_health_fini(struct mlxsw_core *mlxsw_core)
+{
+ if (!mlxsw_core->driver->fw_fatal_enabled)
+ return;
+
+ mlxsw_core_health_fw_fatal_config(mlxsw_core, false);
+ mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_core_health_listener, mlxsw_core);
+ /* Make sure there is no more event work scheduled */
+ mlxsw_core_flush_owq();
+ devlink_health_reporter_destroy(mlxsw_core->health.fw_fatal);
+}
+
static int
__mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
const struct mlxsw_bus *mlxsw_bus,
@@ -1695,6 +1928,10 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_fw_rev_validate;
+ err = mlxsw_core_health_init(mlxsw_core);
+ if (err)
+ goto err_health_init;
+
if (mlxsw_driver->init) {
err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info, extack);
if (err)
@@ -1723,6 +1960,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (mlxsw_core->driver->fini)
mlxsw_core->driver->fini(mlxsw_core);
err_driver_init:
+ mlxsw_core_health_fini(mlxsw_core);
+err_health_init:
err_fw_rev_validate:
if (!reload)
mlxsw_core_params_unregister(mlxsw_core);
@@ -1795,6 +2034,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
mlxsw_hwmon_fini(mlxsw_core->hwmon);
if (mlxsw_core->driver->fini)
mlxsw_core->driver->fini(mlxsw_core);
+ mlxsw_core_health_fini(mlxsw_core);
if (!reload)
mlxsw_core_params_unregister(mlxsw_core);
if (!reload)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 6ec769906637..2ca085a44774 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -370,6 +370,7 @@ struct mlxsw_driver {
u8 txhdr_len;
const struct mlxsw_config_profile *profile;
bool res_query_enabled;
+ bool fw_fatal_enabled;
};
int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 421f02eac20f..6e3d55006089 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5579,6 +5579,7 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+ MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 18d2eacfae83..351d385158e6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2529,11 +2529,20 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
{
char htgt_pl[MLXSW_REG_HTGT_LEN];
+ int err;
mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
MLXSW_REG_HTGT_INVALID_POLICER,
MLXSW_REG_HTGT_DEFAULT_PRIORITY,
MLXSW_REG_HTGT_DEFAULT_TC);
+ err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_MFDE,
+ MLXSW_REG_HTGT_INVALID_POLICER,
+ MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+ MLXSW_REG_HTGT_DEFAULT_TC);
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
@@ -3287,6 +3296,7 @@ static struct mlxsw_driver mlxsw_sp1_driver = {
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp1_config_profile,
.res_query_enabled = true,
+ .fw_fatal_enabled = true,
};
static struct mlxsw_driver mlxsw_sp2_driver = {
@@ -3326,6 +3336,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
+ .fw_fatal_enabled = true,
};
static struct mlxsw_driver mlxsw_sp3_driver = {
@@ -3365,6 +3376,7 @@ static struct mlxsw_driver mlxsw_sp3_driver = {
.txhdr_len = MLXSW_TXHDR_LEN,
.profile = &mlxsw_sp2_config_profile,
.res_query_enabled = true,
+ .fw_fatal_enabled = true,
};
bool mlxsw_sp_port_dev_check(const struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 33909887d0ac..fe0b8af287a7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -120,6 +120,8 @@ enum {
};
enum mlxsw_event_trap_id {
+ /* Fatal Event generated by FW */
+ MLXSW_TRAP_ID_MFDE = 0x3,
/* Port Up/Down event generated by hardware */
MLXSW_TRAP_ID_PUDE = 0x8,
/* PTP Ingress FIFO has a new entry */
--
2.26.2
Powered by blists - more mailing lists