lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Sun, 5 May 2019 00:33:27 +0000
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
CC:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Jiri Pirko <jiri@...lanox.com>,
        Moshe Shemesh <moshe@...lanox.com>,
        Eran Ben Elisha <eranbe@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next 11/15] net/mlx5: Add support for FW reporter dump

From: Moshe Shemesh <moshe@...lanox.com>

Add support of dump callback for mlx5 FW reporter.
Once we trigger FW dump, the FW will write the core dump to its raw data
buffer. The tracer translates the raw data to traces and save it to a
buffer. Once dump is done, the saved traces data is filled as objects
into the dump buffer.

FW dump example:
$ devlink health dump show pci/0000:82:00.1 reporter fw
dump traces:
   trace: 0000:82:00.1 [0x69cd6c5283e] 0 [0xb8] dump general info
GVMI=0x0001
   trace: 0000:82:00.1 [0x69cd6c53bec] 0 [0xb8] GVMI management info,
gvmi_management context:
   trace: 0000:82:00.1 [0x69cd6c55eff] 0 [0xb8] [000]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c5657f] 0 [0xb8] [010]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c56608] 0 [0xb8] [020]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c566ff] 0 [0xb8] [030]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c5677f] 0 [0xb8] [040]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c5687f] 0 [0xb8] [050]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c568ff] 0 [0xb8] [060]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c569a5] 0 [0xb8] [070]:  00000000
00000000  00000000  00000000
   trace: 0000:82:00.1 [0x69cd6c57021] 0 [0xb8] CMDIF dbase from IRON:
active_dbase_slots = 0x00000000
   trace: 0000:82:00.1 [0x69cd6c58dae] 0 [0xb8] GVMI=0x0001 hw_toc
context:
   trace: 0000:82:00.1 [0x69cd6c58e7f] 0 [0xb8] [000]:  00400100
00000000  00000000  fffff000
   trace: 0000:82:00.1 [0x69cd6c58f7f] 0 [0xb8] [010]:  00000000
00000000  00000000  00000000
...
...

Signed-off-by: Moshe Shemesh <moshe@...lanox.com>
Signed-off-by: Eran Ben Elisha <eranbe@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../mellanox/mlx5/core/diag/fw_tracer.c       | 109 ++++++++++++++++++
 .../mellanox/mlx5/core/diag/fw_tracer.h       |  14 +++
 .../net/ethernet/mellanox/mlx5/core/health.c  |  46 ++++++++
 3 files changed, 169 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index 56025797cd1e..8c3e6727a984 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -243,6 +243,27 @@ static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer)
 	return -ENOMEM;
 }
 
+static int
+mlx5_fw_tracer_allocate_saved_traces_buff(struct mlx5_fw_tracer *tracer)
+{
+	int traces_buff_size = SAVED_TRACES_BUFFER_SIZE_BYTE;
+
+	tracer->sbuff.traces_buff = kzalloc(traces_buff_size, GFP_KERNEL);
+	if (!tracer->sbuff.traces_buff)
+		return -ENOMEM;
+	tracer->sbuff.saved_traces_index = 0;
+	mutex_init(&tracer->sbuff.lock);
+
+	return 0;
+}
+
+static void
+mlx5_fw_tracer_free_saved_traces_buff(struct mlx5_fw_tracer *tracer)
+{
+	kfree(tracer->sbuff.traces_buff);
+	tracer->sbuff.traces_buff = NULL;
+}
+
 static void mlx5_tracer_read_strings_db(struct work_struct *work)
 {
 	struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer,
@@ -522,6 +543,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer)
 		list_del(&str_frmt->list);
 }
 
+static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer,
+				      u64 timestamp, bool lost,
+				      u8 event_id, char *msg)
+{
+	char *saved_traces = tracer->sbuff.traces_buff;
+	u32 offset;
+
+	mutex_lock(&tracer->sbuff.lock);
+	offset = tracer->sbuff.saved_traces_index * TRACE_STR_LINE;
+	snprintf(saved_traces + offset, TRACE_STR_LINE,
+		 "%s [0x%llx] %d [0x%x] %s", dev_name(&tracer->dev->pdev->dev),
+		 timestamp, lost, event_id, msg);
+
+	tracer->sbuff.saved_traces_index =
+		(tracer->sbuff.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1);
+	mutex_unlock(&tracer->sbuff.lock);
+}
+
 static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 				    struct mlx5_core_dev *dev,
 				    u64 trace_timestamp)
@@ -540,6 +579,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt,
 	trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost,
 		      str_frmt->event_id, tmp);
 
+	mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp,
+				  str_frmt->lost, str_frmt->event_id, tmp);
+
 	/* remove it from hash */
 	mlx5_tracer_clean_message(str_frmt);
 }
@@ -820,6 +862,64 @@ int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev)
 	return 0;
 }
 
+static int
+mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg,
+			     char *trace)
+{
+	int err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_string_pair_put(fmsg, "trace", trace);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+	return 0;
+}
+
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_fmsg *fmsg)
+{
+	char *saved_traces = tracer->sbuff.traces_buff;
+	u32 index, start_index, end_index;
+	u32 saved_traces_index;
+	int err;
+
+	if (!saved_traces[0])
+		return -ENOMSG;
+
+	mutex_lock(&tracer->sbuff.lock);
+	saved_traces_index = tracer->sbuff.saved_traces_index;
+	if (saved_traces[saved_traces_index * TRACE_STR_LINE])
+		start_index = saved_traces_index;
+	else
+		start_index = 0;
+	end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1);
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump traces");
+	if (err)
+		goto unlock;
+	index = start_index;
+	while (index != end_index) {
+		err = mlx5_devlink_fmsg_fill_trace(fmsg,
+						   saved_traces + index * TRACE_STR_LINE);
+		if (err)
+			goto unlock;
+
+		index = (index + 1) & (SAVED_TRACES_NUM - 1);
+	}
+
+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
+unlock:
+	mutex_unlock(&tracer->sbuff.lock);
+	return err;
+}
+
 /* Create software resources (Buffers, etc ..) */
 struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 {
@@ -867,10 +967,18 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev)
 		goto free_log_buf;
 	}
 
+	err = mlx5_fw_tracer_allocate_saved_traces_buff(tracer);
+	if (err) {
+		mlx5_core_warn(dev, "FWTracer: Create saved traces buffer failed %d\n", err);
+		goto free_strings_db;
+	}
+
 	mlx5_core_dbg(dev, "FWTracer: Tracer created\n");
 
 	return tracer;
 
+free_strings_db:
+	mlx5_fw_tracer_free_strings_db(tracer);
 free_log_buf:
 	mlx5_fw_tracer_destroy_log_buf(tracer);
 destroy_workqueue:
@@ -951,6 +1059,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer)
 	cancel_work_sync(&tracer->read_fw_strings_work);
 	mlx5_fw_tracer_clean_ready_list(tracer);
 	mlx5_fw_tracer_clean_print_hash(tracer);
+	mlx5_fw_tracer_free_saved_traces_buff(tracer);
 	mlx5_fw_tracer_free_strings_db(tracer);
 	mlx5_fw_tracer_destroy_log_buf(tracer);
 	flush_workqueue(tracer->work_queue);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index a8b8747f2b61..9dcf40a43399 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -46,6 +46,10 @@
 #define TRACER_BLOCK_SIZE_BYTE 256
 #define TRACES_PER_BLOCK 32
 
+#define TRACE_STR_LINE 256
+#define SAVED_TRACES_NUM 1024
+#define SAVED_TRACES_BUFFER_SIZE_BYTE (SAVED_TRACES_NUM * TRACE_STR_LINE)
+
 #define TRACER_MAX_PARAMS 7
 #define MESSAGE_HASH_BITS 6
 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS)
@@ -83,6 +87,13 @@ struct mlx5_fw_tracer {
 		u32 consumer_index;
 	} buff;
 
+	/* Saved Traces Buffer */
+	struct {
+		void *traces_buff;
+		u32 saved_traces_index;
+		struct mutex lock; /* Protect sbuff access */
+	} sbuff;
+
 	u64 last_timestamp;
 	struct work_struct handle_traces_work;
 	struct hlist_head hash[MESSAGE_HASH_SIZE];
@@ -171,5 +182,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev);
 int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer);
 void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer);
+int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev);
+int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer,
+					    struct devlink_fmsg *fmsg);
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9ffa9c7f81a0..34b8252afad5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -41,6 +41,7 @@
 #include "lib/eq.h"
 #include "lib/mlx5.h"
 #include "lib/pci_vsc.h"
+#include "diag/fw_tracer.h"
 
 enum {
 	MLX5_HEALTH_POLL_INTERVAL	= 2 * HZ,
@@ -452,9 +453,54 @@ mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 	return err;
 }
 
+struct mlx5_fw_reporter_ctx {
+	u8 err_synd;
+	int miss_counter;
+};
+
+static int
+mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
+			       struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
+{
+	int err;
+
+	err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome",
+				       fw_reporter_ctx->err_synd);
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
+					fw_reporter_ctx->miss_counter);
+	if (err)
+		return err;
+	return 0;
+}
+
+static int
+mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
+		      struct devlink_fmsg *fmsg, void *priv_ctx)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+	int err;
+
+	err = mlx5_fw_tracer_trigger_core_dump_general(dev);
+	if (err)
+		return err;
+
+	if (priv_ctx) {
+		struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
+
+		err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
+		if (err)
+			return err;
+	}
+
+	return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
+}
+
 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
 		.name = "fw",
 		.diagnose = mlx5_fw_reporter_diagnose,
+		.dump = mlx5_fw_reporter_dump,
 };
 
 static void mlx5_fw_reporter_create(struct mlx5_core_dev *dev)
-- 
2.20.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ