[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <k24eu6e64meeunvif2g3m4xqkzj3h7jmgs4axvmcic2tjpxewx@d4uyeqraobuj>
Date: Tue, 11 Feb 2025 15:26:46 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: Tony Nguyen <anthony.l.nguyen@...el.com>
Cc: davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
edumazet@...gle.com, andrew+netdev@...n.ch, netdev@...r.kernel.org,
Przemek Kitszel <przemyslaw.kitszel@...el.com>, mateusz.polchlopek@...el.com, joe@...ches.com, horms@...nel.org,
apw@...onical.com, lukas.bulwahn@...il.com, dwaipayanray1@...il.com,
Igor Bagnucki <igor.bagnucki@...el.com>, Pucha Himasekhar Reddy <himasekharx.reddy.pucha@...el.com>
Subject: Re: [PATCH net-next v2 5/6] ice: add Tx hang devlink health reporter
Tue, Dec 17, 2024 at 10:08:32PM +0100, anthony.l.nguyen@...el.com wrote:
>From: Przemek Kitszel <przemyslaw.kitszel@...el.com>
>
>Add Tx hang devlink health reporter, see struct ice_tx_hang_event to see
>what exactly is reported. For now dump descriptors with little metadata
>and skb diagnostic information.
>
>Reviewed-by: Igor Bagnucki <igor.bagnucki@...el.com>
>Reviewed-by: Wojciech Drewek <wojciech.drewek@...el.com>
>Co-developed-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
>Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
>Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@...el.com> (A Contingent worker at Intel)
>Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@...el.com>
>Signed-off-by: Tony Nguyen <anthony.l.nguyen@...el.com>
>---
> drivers/net/ethernet/intel/ice/Makefile | 1 +
> .../net/ethernet/intel/ice/devlink/health.c | 192 ++++++++++++++++++
> .../net/ethernet/intel/ice/devlink/health.h | 47 +++++
> drivers/net/ethernet/intel/ice/ice.h | 2 +
> drivers/net/ethernet/intel/ice/ice_main.c | 18 +-
> 5 files changed, 255 insertions(+), 5 deletions(-)
> create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.c
> create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.h
>
>diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
>index 56aa23aee472..9e0d9f710441 100644
>--- a/drivers/net/ethernet/intel/ice/Makefile
>+++ b/drivers/net/ethernet/intel/ice/Makefile
>@@ -32,6 +32,7 @@ ice-y := ice_main.o \
> ice_parser_rt.o \
> ice_idc.o \
> devlink/devlink.o \
>+ devlink/health.o \
> devlink/port.o \
> ice_sf_eth.o \
> ice_sf_vsi_vlan_ops.o \
>diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c
>new file mode 100644
>index 000000000000..984d910fc41d
>--- /dev/null
>+++ b/drivers/net/ethernet/intel/ice/devlink/health.c
>@@ -0,0 +1,192 @@
>+// SPDX-License-Identifier: GPL-2.0
>+/* Copyright (c) 2024, Intel Corporation. */
>+
>+#include "health.h"
>+#include "ice.h"
>+
>+#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \
>+ devlink_fmsg_put(fmsg, #name, (obj)->name)
>+
>+/**
>+ * ice_devlink_health_report - boilerplate to call given @reporter
>+ *
>+ * @reporter: devlink health reporter to call, do nothing on NULL
>+ * @msg: message to pass up, "event name" is fine
>+ * @priv_ctx: typically some event struct
>+ */
>+static void ice_devlink_health_report(struct devlink_health_reporter *reporter,
>+ const char *msg, void *priv_ctx)
>+{
>+ if (!reporter)
>+ return;
>+
>+ /* We do not do auto recovering, so return value of the below function
>+ * will always be 0, thus we do ignore it.
>+ */
>+ devlink_health_report(reporter, msg, priv_ctx);
>+}
>+
>+/**
>+ * ice_fmsg_put_ptr - put hex value of pointer into fmsg
>+ *
>+ * @fmsg: devlink fmsg under construction
>+ * @name: name to pass
>+ * @ptr: 64 bit value to print as hex and put into fmsg
>+ */
>+static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name,
>+ void *ptr)
>+{
>+ char buf[sizeof(ptr) * 3];
>+
>+ sprintf(buf, "%p", ptr);
>+ devlink_fmsg_put(fmsg, name, buf);
>+}
>+
>+struct ice_tx_hang_event {
>+ u32 head;
>+ u32 intr;
>+ u16 vsi_num;
>+ u16 queue;
>+ u16 next_to_clean;
>+ u16 next_to_use;
>+ struct ice_tx_ring *tx_ring;
>+};
>+
>+static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter,
>+ struct devlink_fmsg *fmsg, void *priv_ctx,
>+ struct netlink_ext_ack *extack)
>+{
>+ struct ice_tx_hang_event *event = priv_ctx;
>+ struct sk_buff *skb;
>+
>+ if (!event)
>+ return 0;
>+
>+ skb = event->tx_ring->tx_buf->skb;
>+ devlink_fmsg_obj_nest_start(fmsg);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use);
>+ devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name);
>+ ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc);
>+ ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma);
>+ ice_fmsg_put_ptr(fmsg, "skb-ptr", skb);
Interesting. What is the kernel pointer put into the message good for?
>+ devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc,
>+ event->tx_ring->count * sizeof(struct ice_tx_desc));
>+ devlink_fmsg_dump_skb(fmsg, skb);
>+ devlink_fmsg_obj_nest_end(fmsg);
>+
>+ return 0;
>+}
>+
>+void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring,
>+ u16 vsi_num, u32 head, u32 intr)
>+{
>+ struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
>+
>+ buf->tx_ring = tx_ring;
>+ buf->vsi_num = vsi_num;
>+ buf->head = head;
>+ buf->intr = intr;
>+}
>+
>+void ice_report_tx_hang(struct ice_pf *pf)
>+{
>+ struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf;
>+ struct ice_tx_ring *tx_ring = buf->tx_ring;
>+
>+ struct ice_tx_hang_event ev = {
>+ .head = buf->head,
>+ .intr = buf->intr,
>+ .vsi_num = buf->vsi_num,
>+ .queue = tx_ring->q_index,
>+ .next_to_clean = tx_ring->next_to_clean,
>+ .next_to_use = tx_ring->next_to_use,
>+ .tx_ring = tx_ring,
>+ };
>+
>+ ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev);
>+}
>+
>+static struct devlink_health_reporter *
>+ice_init_devlink_rep(struct ice_pf *pf,
>+ const struct devlink_health_reporter_ops *ops)
>+{
>+ struct devlink *devlink = priv_to_devlink(pf);
>+ struct devlink_health_reporter *rep;
>+ const u64 graceful_period = 0;
>+
>+ rep = devl_health_reporter_create(devlink, ops, graceful_period, pf);
Why this is not per-port? devl_port_health_reporter_create()? Tx is
port-related thing, isn't it?
Someting like this is already implemented in mlx5:
auxiliary/mlx5_core.eth.1/131071:
reporter tx
state healthy error 0 recover 0 grace_period 500 auto_recover true auto_dump true
reporter rx
state healthy error 0 recover 0 grace_period 500 auto_recover true auto_dump true
See mlx5e_reporter_tx_timeout() for example.
>+ if (IS_ERR(rep)) {
>+ struct device *dev = ice_pf_to_dev(pf);
>+
>+ dev_err(dev, "failed to create devlink %s health report er",
>+ ops->name);
>+ return NULL;
>+ }
>+ return rep;
>+}
>+
[...]
Powered by blists - more mailing lists