[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190613203825.31049-6-saeedm@mellanox.com>
Date: Thu, 13 Jun 2019 20:39:23 +0000
From: Saeed Mahameed <saeedm@...lanox.com>
To: "David S. Miller" <davem@...emloft.net>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
Jiri Pirko <jiri@...lanox.com>,
Alex Vesker <valex@...lanox.com>,
Moshe Shemesh <moshe@...lanox.com>,
Feras Daoud <ferasda@...lanox.com>,
Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next v2 05/15] net/mlx5: Add Crdump support
From: Alex Vesker <valex@...lanox.com>
Crdump allows the driver to retrieve a dump of the FW PCI crspace.
This is useful in case of catastrophic issues which may require FW
reset. The crspace dump can be used for later debug.
Signed-off-by: Alex Vesker <valex@...lanox.com>
Signed-off-by: Moshe Shemesh <moshe@...lanox.com>
Reviewed-by: Feras Daoud <ferasda@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
.../net/ethernet/mellanox/mlx5/core/Makefile | 2 +-
.../ethernet/mellanox/mlx5/core/diag/crdump.c | 106 ++++++++++++++++++
.../ethernet/mellanox/mlx5/core/lib/mlx5.h | 3 +
.../net/ethernet/mellanox/mlx5/core/main.c | 5 +
include/linux/mlx5/driver.h | 1 +
5 files changed, 116 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 8e07354faea1..5fe2bf916c06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \
- diag/fw_tracer.o devlink.o
+ diag/fw_tracer.o diag/crdump.o devlink.o
#
# Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
new file mode 100644
index 000000000000..dfb34172c69b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2019 Mellanox Technologies */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+#include "lib/pci_vsc.h"
+#include "lib/mlx5.h"
+
+#define BAD_ACCESS 0xBADACCE5
+#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7
+
+static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev)
+{
+ return !!dev->priv.health.crdump_size;
+}
+
+static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+ u32 crdump_size = dev->priv.health.crdump_size;
+ int i, ret;
+
+ for (i = 0; i < (crdump_size / 4); i++)
+ cr_data[i] = BAD_ACCESS;
+
+ ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size);
+ if (ret <= 0) {
+ if (ret == 0)
+ return -EIO;
+ return ret;
+ }
+
+ if (crdump_size != ret) {
+ mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n",
+ ret, crdump_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data)
+{
+ int ret;
+
+ if (!mlx5_crdump_enabled(dev))
+ return -ENODEV;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret) {
+ mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL);
+ if (ret)
+ goto unlock;
+
+ ret = mlx5_crdump_fill(dev, cr_data);
+
+unlock:
+ mlx5_vsc_gw_unlock(dev);
+ return ret;
+}
+
+int mlx5_crdump_enable(struct mlx5_core_dev *dev)
+{
+ struct mlx5_priv *priv = &dev->priv;
+ u32 space_size;
+ int ret;
+
+ if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) ||
+ mlx5_crdump_enabled(dev))
+ return 0;
+
+ ret = mlx5_vsc_gw_lock(dev);
+ if (ret)
+ return ret;
+
+ /* Check if space is supported and get space size */
+ ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE,
+ &space_size);
+ if (ret) {
+ /* Unlock and mask error since space is not supported */
+ mlx5_vsc_gw_unlock(dev);
+ return 0;
+ }
+
+ if (!space_size) {
+ mlx5_core_warn(dev, "Invalid Crspace size, zero\n");
+ mlx5_vsc_gw_unlock(dev);
+ return -EINVAL;
+ }
+
+ ret = mlx5_vsc_gw_unlock(dev);
+ if (ret)
+ return ret;
+
+ priv->health.crdump_size = space_size;
+ return 0;
+}
+
+void mlx5_crdump_disable(struct mlx5_core_dev *dev)
+{
+ dev->priv.health.crdump_size = 0;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
index 397a2847867a..d918e44491f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h
@@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count);
void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count);
int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index);
void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index);
+int mlx5_crdump_enable(struct mlx5_core_dev *dev);
+void mlx5_crdump_disable(struct mlx5_core_dev *dev);
+int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data);
/* TODO move to lib/events.h */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 3adc09a1a312..c70e97071b87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1313,6 +1313,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto clean_load;
+ err = mlx5_crdump_enable(dev);
+ if (err)
+ dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
pci_save_state(pdev);
return 0;
@@ -1334,6 +1338,7 @@ static void remove_one(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
struct devlink *devlink = priv_to_devlink(dev);
+ mlx5_crdump_disable(dev);
mlx5_devlink_unregister(devlink);
mlx5_unregister_device(dev);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f732445bcbdb..4ae533b3da07 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -435,6 +435,7 @@ struct mlx5_core_health {
u32 prev;
int miss_counter;
bool sick;
+ u32 crdump_size;
/* wq spinlock to synchronize draining */
spinlock_t wq_lock;
struct workqueue_struct *wq;
--
2.21.0
Powered by blists - more mailing lists