[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <k2stckvckusd7pdjkvxpbfqabnarrqc7igcirnhorj2gobidgj@iugsqakc45b6>
Date: Tue, 11 Feb 2025 15:30:23 +0100
From: Jiri Pirko <jiri@...nulli.us>
To: Tony Nguyen <anthony.l.nguyen@...el.com>
Cc: davem@...emloft.net, kuba@...nel.org, pabeni@...hat.com,
edumazet@...gle.com, andrew+netdev@...n.ch, netdev@...r.kernel.org,
Ben Shelton <benjamin.h.shelton@...el.com>, przemyslaw.kitszel@...el.com, mateusz.polchlopek@...el.com,
joe@...ches.com, horms@...nel.org, apw@...onical.com, lukas.bulwahn@...il.com,
dwaipayanray1@...il.com, Igor Bagnucki <igor.bagnucki@...el.com>,
Pucha Himasekhar Reddy <himasekharx.reddy.pucha@...el.com>
Subject: Re: [PATCH net-next v2 6/6] ice: Add MDD logging via devlink health
Tue, Dec 17, 2024 at 10:08:33PM +0100, anthony.l.nguyen@...el.com wrote:
>From: Ben Shelton <benjamin.h.shelton@...el.com>
>
>Add a devlink health reporter for MDD events. The 'dump' handler will
>return the information captured in each call to ice_handle_mdd_event().
>A device reset (CORER/PFR) will put the reporter back in healthy state.
>
>Signed-off-by: Ben Shelton <benjamin.h.shelton@...el.com>
>Reviewed-by: Igor Bagnucki <igor.bagnucki@...el.com>
>Reviewed-by: Wojciech Drewek <wojciech.drewek@...el.com>
>Reviewed-by: Simon Horman <horms@...nel.org>
>Signed-off-by: Mateusz Polchlopek <mateusz.polchlopek@...el.com>
>Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@...el.com> (A Contingent worker at Intel)
>Co-developed-by: Przemek Kitszel <przemyslaw.kitszel@...el.com>
>Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@...el.com>
>Signed-off-by: Tony Nguyen <anthony.l.nguyen@...el.com>
>---
> .../net/ethernet/intel/ice/devlink/health.c | 77 +++++++++++++++++++
> .../net/ethernet/intel/ice/devlink/health.h | 11 +++
> drivers/net/ethernet/intel/ice/ice_main.c | 6 ++
> 3 files changed, 94 insertions(+)
>
>diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c
>index 984d910fc41d..d23ae3aafaa7 100644
>--- a/drivers/net/ethernet/intel/ice/devlink/health.c
>+++ b/drivers/net/ethernet/intel/ice/devlink/health.c
>@@ -26,6 +26,79 @@ static void ice_devlink_health_report(struct devlink_health_reporter *reporter,
> devlink_health_report(reporter, msg, priv_ctx);
> }
>
>+struct ice_mdd_event {
>+ enum ice_mdd_src src;
>+ u16 vf_num;
>+ u16 queue;
>+ u8 pf_num;
>+ u8 event;
>+};
>+
>+static const char *ice_mdd_src_to_str(enum ice_mdd_src src)
>+{
>+ switch (src) {
>+ case ICE_MDD_SRC_TX_PQM:
>+ return "tx_pqm";
>+ case ICE_MDD_SRC_TX_TCLAN:
>+ return "tx_tclan";
>+ case ICE_MDD_SRC_TX_TDPU:
>+ return "tx_tdpu";
>+ case ICE_MDD_SRC_RX:
>+ return "rx";
>+ default:
>+ return "invalid";
>+ }
>+}
>+
>+static int
>+ice_mdd_reporter_dump(struct devlink_health_reporter *reporter,
>+ struct devlink_fmsg *fmsg, void *priv_ctx,
>+ struct netlink_ext_ack *extack)
>+{
>+ struct ice_mdd_event *mdd_event = priv_ctx;
>+ const char *src;
>+
>+ if (!mdd_event)
>+ return 0;
>+
>+ src = ice_mdd_src_to_str(mdd_event->src);
>+
>+ devlink_fmsg_obj_nest_start(fmsg);
>+ devlink_fmsg_put(fmsg, "src", src);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, pf_num);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, vf_num);
Why you don't attach this reported to representor devlink port? I mean,
exposing pf/vf num just because the reporter is not attached to proper
object looks wrong to me. We have object hierarchy in devlink, benefit
from it.
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, event);
>+ ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, queue);
>+ devlink_fmsg_obj_nest_end(fmsg);
>+
>+ return 0;
>+}
>+
>+/**
>+ * ice_report_mdd_event - Report an MDD event through devlink health
>+ * @pf: the PF device structure
>+ * @src: the HW block that was the source of this MDD event
>+ * @pf_num: the pf_num on which the MDD event occurred
>+ * @vf_num: the vf_num on which the MDD event occurred
>+ * @event: the event type of the MDD event
>+ * @queue: the queue on which the MDD event occurred
>+ *
>+ * Report an MDD event that has occurred on this PF.
>+ */
>+void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num,
>+ u16 vf_num, u8 event, u16 queue)
>+{
>+ struct ice_mdd_event ev = {
>+ .src = src,
>+ .pf_num = pf_num,
>+ .vf_num = vf_num,
>+ .event = event,
>+ .queue = queue,
>+ };
>+
>+ ice_devlink_health_report(pf->health_reporters.mdd, "MDD event", &ev);
>+}
>+
> /**
> * ice_fmsg_put_ptr - put hex value of pointer into fmsg
> *
>@@ -136,6 +209,7 @@ ice_init_devlink_rep(struct ice_pf *pf,
> .dump = ice_ ## _name ## _reporter_dump, \
> }
>
>+ICE_DEFINE_HEALTH_REPORTER_OPS(mdd);
> ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang);
>
> /**
>@@ -148,6 +222,7 @@ void ice_health_init(struct ice_pf *pf)
> {
> struct ice_health *reps = &pf->health_reporters;
>
>+ reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops);
> reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops);
> }
>
>@@ -169,6 +244,7 @@ static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter)
> */
> void ice_health_deinit(struct ice_pf *pf)
> {
>+ ice_deinit_devl_reporter(pf->health_reporters.mdd);
> ice_deinit_devl_reporter(pf->health_reporters.tx_hang);
> }
>
>@@ -188,5 +264,6 @@ void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter)
> */
> void ice_health_clear(struct ice_pf *pf)
> {
>+ ice_health_assign_healthy_state(pf->health_reporters.mdd);
> ice_health_assign_healthy_state(pf->health_reporters.tx_hang);
> }
>diff --git a/drivers/net/ethernet/intel/ice/devlink/health.h b/drivers/net/ethernet/intel/ice/devlink/health.h
>index 5ce601227acb..532277fc57d7 100644
>--- a/drivers/net/ethernet/intel/ice/devlink/health.h
>+++ b/drivers/net/ethernet/intel/ice/devlink/health.h
>@@ -16,9 +16,17 @@
> struct ice_pf;
> struct ice_tx_ring;
>
>+enum ice_mdd_src {
>+ ICE_MDD_SRC_TX_PQM,
>+ ICE_MDD_SRC_TX_TCLAN,
>+ ICE_MDD_SRC_TX_TDPU,
>+ ICE_MDD_SRC_RX,
>+};
>+
> /**
> * struct ice_health - stores ice devlink health reporters and accompanied data
> * @tx_hang: devlink health reporter for tx_hang event
>+ * @mdd: devlink health reporter for MDD detection event
> * @tx_hang_buf: pre-allocated place to put info for Tx hang reporter from
> * non-sleeping context
> * @tx_ring: ring that the hang occurred on
>@@ -27,6 +35,7 @@ struct ice_tx_ring;
> * @vsi_num: VSI owning the queue that the hang occurred on
> */
> struct ice_health {
>+ struct devlink_health_reporter *mdd;
> struct devlink_health_reporter *tx_hang;
> struct_group_tagged(ice_health_tx_hang_buf, tx_hang_buf,
> struct ice_tx_ring *tx_ring;
>@@ -42,6 +51,8 @@ void ice_health_clear(struct ice_pf *pf);
>
> void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring,
> u16 vsi_num, u32 head, u32 intr);
>+void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num,
>+ u16 vf_num, u8 event, u16 queue);
> void ice_report_tx_hang(struct ice_pf *pf);
>
> #endif /* _HEALTH_H_ */
>diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
>index 316f5109bd3f..1701f7143f24 100644
>--- a/drivers/net/ethernet/intel/ice/ice_main.c
>+++ b/drivers/net/ethernet/intel/ice/ice_main.c
>@@ -1816,6 +1816,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
> if (netif_msg_tx_err(pf))
> dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
> event, queue, pf_num, vf_num);
>+ ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num,
>+ event, queue);
> wr32(hw, GL_MDET_TX_PQM, 0xffffffff);
> }
>
>@@ -1829,6 +1831,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
> if (netif_msg_tx_err(pf))
> dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
> event, queue, pf_num, vf_num);
>+ ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num,
>+ event, queue);
> wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX);
> }
>
>@@ -1842,6 +1846,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
> if (netif_msg_rx_err(pf))
> dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
> event, queue, pf_num, vf_num);
>+ ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event,
>+ queue);
> wr32(hw, GL_MDET_RX, 0xffffffff);
> }
>
>--
>2.47.1
>
Powered by blists - more mailing lists