lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <k24eu6e64meeunvif2g3m4xqkzj3h7jmgs4axvmcic2tjpxewx@d4uyeqraobuj>
Date: Tue, 11 Feb 2025 15:26:46 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: Tony Nguyen <anthony.l.nguyen@...el.com>
Cc: davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com, 
	edumazet@...gle.com, andrew+netdev@...n.ch, netdev@...r.kernel.org, 
	Przemek Kitszel <przemyslaw.kitszel@...el.com>, mateusz.polchlopek@...el.com, joe@...ches.com, horms@...nel.org, 
	apw@...onical.com, lukas.bulwahn@...il.com, dwaipayanray1@...il.com, 
	Igor Bagnucki <igor.bagnucki@...el.com>, Pucha Himasekhar Reddy <himasekharx.reddy.pucha@...el.com>
Subject: Re: [PATCH net-next v2 5/6] ice: add Tx hang devlink health reporter

Tue, Dec 17, 2024 at 10:08:32PM +0100, anthony.l.nguyen@...el.com wrote:
>From: Przemek Kitszel <przemyslaw.kitszel@...el.com>
>
>Add Tx hang devlink health reporter, see struct ice_tx_hang_event to see
>what exactly is reported. For now dump descriptors with little metadata
>and skb diagnostic information.
>
>Reviewed-by: Igor Bagnucki <igor.bagnucki@...el.com>
>Reviewed-by: Wojciech Drewek <wojciech.drewek@...el.com>
>Co-developed-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
>Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
>Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@...el.com> (A Contingent worker at Intel)
>Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@...el.com>
>Signed-off-by: Tony Nguyen <anthony.l.nguyen@...el.com>
>---
> drivers/net/ethernet/intel/ice/Makefile       |   1 +
> .../net/ethernet/intel/ice/devlink/health.c   | 192 ++++++++++++++++++
> .../net/ethernet/intel/ice/devlink/health.h   |  47 +++++
> drivers/net/ethernet/intel/ice/ice.h          |   2 +
> drivers/net/ethernet/intel/ice/ice_main.c     |  18 +-
> 5 files changed, 255 insertions(+), 5 deletions(-)
> create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.c
> create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.h
>
>diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
>index 56aa23aee472..9e0d9f710441 100644
>--- a/drivers/net/ethernet/intel/ice/Makefile
>+++ b/drivers/net/ethernet/intel/ice/Makefile
>@@ -32,6 +32,7 @@ ice-y := ice_main.o	\
> 	 ice_parser_rt.o \
> 	 ice_idc.o	\
> 	 devlink/devlink.o	\
>+	 devlink/health.o \
> 	 devlink/port.o \
> 	 ice_sf_eth.o	\
> 	 ice_sf_vsi_vlan_ops.o \
>diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c
>new file mode 100644
>index 000000000000..984d910fc41d
>--- /dev/null
>+++ b/drivers/net/ethernet/intel/ice/devlink/health.c
>@@ -0,0 +1,192 @@
>+// SPDX-License-Identifier: GPL-2.0
>+/* Copyright (c) 2024, Intel Corporation. */
>+
>+#include "health.h"
>+#include "ice.h"
>+
>+#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \
>+	devlink_fmsg_put(fmsg, #name, (obj)->name)
>+
>+/**
>+ * ice_devlink_health_report - boilerplate to call given @reporter
>+ *
>+ * @reporter: devlink health reporter to call, do nothing on NULL
>+ * @msg: message to pass up, "event name" is fine
>+ * @priv_ctx: typically some event struct
>+ */
>+static void ice_devlink_health_report(struct devlink_health_reporter *reporter,
>+				      const char *msg, void *priv_ctx)
>+{
>+	if (!reporter)
>+		return;
>+
>+	/* We do not do auto recovering, so return value of the below function
>+	 * will always be 0, thus we do ignore it.
>+	 */
>+	devlink_health_report(reporter, msg, priv_ctx);
>+}
>+
>+/**
>+ * ice_fmsg_put_ptr - put hex value of pointer into fmsg
>+ *
>+ * @fmsg: devlink fmsg under construction
>+ * @name: name to pass
>+ * @ptr: 64 bit value to print as hex and put into fmsg
>+ */
>+static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name,
>+			     void *ptr)
>+{
>+	char buf[sizeof(ptr) * 3];
>+
>+	sprintf(buf, "%p", ptr);
>+	devlink_fmsg_put(fmsg, name, buf);
>+}
>+
>+struct ice_tx_hang_event {
>+	u32 head;
>+	u32 intr;
>+	u16 vsi_num;
>+	u16 queue;
>+	u16 next_to_clean;
>+	u16 next_to_use;
>+	struct ice_tx_ring *tx_ring;
>+};
>+
>+static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter,
>+				     struct devlink_fmsg *fmsg, void *priv_ctx,
>+				     struct netlink_ext_ack *extack)
>+{
>+	struct ice_tx_hang_event *event = priv_ctx;
>+	struct sk_buff *skb;
>+
>+	if (!event)
>+		return 0;
>+
>+	skb = event->tx_ring->tx_buf->skb;
>+	devlink_fmsg_obj_nest_start(fmsg);
>+	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head);
>+	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr);
>+	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num);
>+	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue);
>+	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean);
>+	ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use);
>+	devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name);
>+	ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc);
>+	ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma);
>+	ice_fmsg_put_ptr(fmsg, "skb-ptr", skb);

Interesting. What is the kernel pointer put into the message good for?


>+	devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc,
>+				     event->tx_ring->count * sizeof(struct ice_tx_desc));
>+	devlink_fmsg_dump_skb(fmsg, skb);
>+	devlink_fmsg_obj_nest_end(fmsg);
>+
>+	return 0;
>+}
>+
>+void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring,
>+			     u16 vsi_num, u32 head, u32 intr)
>+{
>+	struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
>+
>+	buf->tx_ring = tx_ring;
>+	buf->vsi_num = vsi_num;
>+	buf->head = head;
>+	buf->intr = intr;
>+}
>+
>+void ice_report_tx_hang(struct ice_pf *pf)
>+{
>+	struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
>+	struct ice_tx_ring *tx_ring = buf->tx_ring;
>+
>+	struct ice_tx_hang_event ev = {
>+		.head = buf->head,
>+		.intr = buf->intr,
>+		.vsi_num = buf->vsi_num,
>+		.queue = tx_ring->q_index,
>+		.next_to_clean = tx_ring->next_to_clean,
>+		.next_to_use = tx_ring->next_to_use,
>+		.tx_ring = tx_ring,
>+	};
>+
>+	ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev);
>+}
>+
>+static struct devlink_health_reporter *
>+ice_init_devlink_rep(struct ice_pf *pf,
>+		     const struct devlink_health_reporter_ops *ops)
>+{
>+	struct devlink *devlink = priv_to_devlink(pf);
>+	struct devlink_health_reporter *rep;
>+	const u64 graceful_period = 0;
>+
>+	rep = devl_health_reporter_create(devlink, ops, graceful_period, pf);

Why this is not per-port? devl_port_health_reporter_create()? Tx is
port-related thing, isn't it?

Someting like this is already implemented in mlx5:

auxiliary/mlx5_core.eth.1/131071:
  reporter tx
    state healthy error 0 recover 0 grace_period 500 auto_recover true auto_dump true
  reporter rx
    state healthy error 0 recover 0 grace_period 500 auto_recover true auto_dump true


See mlx5e_reporter_tx_timeout() for example.


>+	if (IS_ERR(rep)) {
>+		struct device *dev = ice_pf_to_dev(pf);
>+
>+		dev_err(dev, "failed to create devlink %s health report er",
>+			ops->name);
>+		return NULL;
>+	}
>+	return rep;
>+}
>+

[...]

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ