lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 5 May 2019 00:33:23 +0000
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
CC:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Jiri Pirko <jiri@...lanox.com>,
        Moshe Shemesh <moshe@...lanox.com>,
        Eran Ben Elisha <eranbe@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 09/15] net/mlx5: Create FW devlink health reporter

From: Moshe Shemesh <moshe@...lanox.com>

Create mlx5_devlink_health_reporter for FW reporter. The FW reporter
implements devlink_health_reporter diagnose callback.

The fw reporter diagnose command can be triggered any time by the user
to check current fw status.
In healthy status, it will return clear syndrome. Otherwise it will dump
the health info buffer.

Command example and output on healthy status:
$ devlink health diagnose pci/0000:82:00.0 reporter fw
Syndrome: 0

Command example and output on non healthy status:
$ devlink health diagnose pci/0000:82:00.0 reporter fw
diagnose data:
assert_var[0] 0xfc3fc043
assert_var[1] 0x0001b41c
assert_var[2] 0x00000000
assert_var[3] 0x00000000
assert_var[4] 0x00000000
assert_exit_ptr 0x008033b4
assert_callra 0x0080365c
fw_ver 16.24.1000
hw_id 0x0000020d
irisc_index 0
synd 0x8: unrecoverable hardware error
ext_synd 0x003d
raw fw_ver 0x101803e8

Signed-off-by: Moshe Shemesh <moshe@...lanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/health.c  | 55 +++++++++++++++++++
 include/linux/mlx5/driver.h                   |  1 +
 2 files changed, 56 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index a3c7e46aafd9..9ffa9c7f81a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -428,6 +428,58 @@ static void mlx5_print_health_info(struct mlx5_core_dev *dev)
 	mutex_unlock(&health->info_buf_lock);
 }
 
+static int
+mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+			  struct devlink_fmsg *fmsg)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	struct mlx5_core_health *health = &dev->priv.health;
+	u8 synd;
+	int err;
+
+	mutex_lock(&health->info_buf_lock);
+	mlx5_get_health_info(dev, &synd);
+
+	if (!synd) {
+		mutex_unlock(&health->info_buf_lock);
+		return devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
+	}
+
+	err = devlink_fmsg_string_pair_put(fmsg, "diagnose data",
+					   health->info_buf);
+
+	mutex_unlock(&health->info_buf_lock);
+	return err;
+}
+
+static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
+		.name = "fw",
+		.diagnose = mlx5_fw_reporter_diagnose,
+};
+
+static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	health->fw_reporter =
+		devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
+					       0, false, dev);
+	if (IS_ERR(health->fw_reporter))
+		mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
+			       PTR_ERR(health->fw_reporter));
+}
+
+static void mlx5_fw_reporter_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	if (IS_ERR_OR_NULL(health->fw_reporter))
+		return;
+
+	devlink_health_reporter_destroy(health->fw_reporter);
+}
+
 static unsigned long get_next_poll_jiffies(void)
 {
 	unsigned long next;
@@ -539,6 +591,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 
 	kfree(health->info_buf);
 	destroy_workqueue(health->wq);
+	mlx5_fw_reporter_destroy(dev);
 }
 
 int mlx5_health_init(struct mlx5_core_dev *dev)
@@ -565,6 +618,8 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 		goto err_alloc_buff;
 	mutex_init(&health->info_buf_lock);
 
+	mlx5_fw_reporter_create(dev);
+
 	return 0;
 
 err_alloc_buff:
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index df8f4c4e21c6..a362aa6c799c 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -448,6 +448,7 @@ struct mlx5_core_health {
 	int				info_buf_len;
 	/* protect info buf access */
 	struct mutex			info_buf_lock;
+	struct devlink_health_reporter *fw_reporter;
 };
 
 struct mlx5_qp_table {
-- 
2.20.1

Powered by blists - more mailing lists