lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260123090741.1566469-4-o.rempel@pengutronix.de>
Date: Fri, 23 Jan 2026 10:07:39 +0100
From: Oleksij Rempel <o.rempel@...gutronix.de>
To: "David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Andrew Lunn <andrew+netdev@...n.ch>,
	Thangaraj Samynathan <Thangaraj.S@...rochip.com>,
	Rengarajan Sundararajan <Rengarajan.S@...rochip.com>
Cc: Oleksij Rempel <o.rempel@...gutronix.de>,
	kernel@...gutronix.de,
	linux-kernel@...r.kernel.org,
	netdev@...r.kernel.org,
	UNGLinuxDriver@...rochip.com
Subject: [RFC PATCH 3/4] net: lan78xx: Enhance health reporting with workqueue and detailed flow control stats

Refactor the health reporting to:

1. Introduce a dedicated workqueue for TX timeouts. This prevents
   calling devlink_health_report (which may sleep) from an atomic
   context (netdev tx_timeout).

2. Update statistics tracking and reporting context to separate TX Pause
   and RX Pause frames, allowing finer-grained stall analysis (local vs.
   link partner induced flow control storm).

3. Change the devlink recovery function to call
   phylink_mac_change(false). This leverages the newly robust link_down
   path which performs the necessary locking and conditional Lite Reset.

Signed-off-by: Oleksij Rempel <o.rempel@...gutronix.de>
---
 drivers/net/usb/lan78xx.c | 133 +++++++++++++++++++++++++-------------
 1 file changed, 87 insertions(+), 46 deletions(-)

diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 9dadca4101bc..316a3a8d0534 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -425,15 +425,36 @@ struct lan78xx_stat_snapshot {
 	ktime_t time;
 
 	u64 tx_pause_total;
+	u64 rx_pause_total;
 	u64 tx_unicast_total;
 	u64 rx_total_frames;
 	u64 rx_hw_drop_total;
 	u64 rx_sw_packets_total;
 
-	u32 last_delta_pause;
+	u32 last_delta_rx_pause;
+	u32 last_delta_tx_pause;
 	u32 last_delta_drops;
 };
 
+struct lan78xx_dump_ctx {
+	const char *msg;
+	ktime_t ts; /* Timestamp of detection */
+
+	union {
+		struct {
+			u64 delta_tx_pause;
+			u64 delta_rx_pause;
+			u64 delta_rx;
+			u64 delta_hw_drop;
+			u64 delta_sw_rx;
+		} fifo;
+		struct {
+			u32 int_sts; /* The ISR's view of INT_STS */
+			u32 int_enp; /* The ISR's view of INT_ENP_CTL */
+		} err;
+	};
+};
+
 struct irq_domain_data {
 	struct irq_domain	*irqdomain;
 	unsigned int		phyirq;
@@ -505,27 +526,10 @@ struct lan78xx_net {
 	struct devlink_health_reporter	*fifo_reporter;
 	struct devlink_health_reporter	*internal_err_reporter;
 	struct lan78xx_stat_snapshot	snapshot;
+	struct work_struct		tx_timeout_work;
+	struct lan78xx_dump_ctx		timeout_ctx;
 };
 
-struct lan78xx_dump_ctx {
-	const char *msg;
-	ktime_t ts; /* Timestamp of detection */
-
-	union {
-		struct {
-			u64 delta_pause;
-			u64 delta_rx;
-			u64 delta_hw_drop;
-			u64 delta_sw_rx;
-		} fifo;
-		struct {
-			u32 int_sts; /* The ISR's view of INT_STS */
-			u32 int_enp; /* The ISR's view of INT_ENP_CTL */
-		} err;
-	};
-};
-
-/* Register Dump Map Structure */
 struct lan78xx_reg_map {
 	u32 reg;
 	const char *name;
@@ -966,7 +970,7 @@ static void lan78xx_check_stat_rollover(struct lan78xx_net *dev,
 
 static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev)
 {
-	u64 delta_pause, delta_rx, delta_hw_drop, delta_sw_rx;
+	u64 delta_tx_pause, delta_rx_pause, delta_rx, delta_hw_drop, delta_sw_rx;
 	struct lan78xx_dump_ctx ctx = {0};
 	struct lan78xx_stat_snapshot now;
 	const char *anomaly_msg = NULL;
@@ -976,6 +980,7 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev)
 
 	mutex_lock(&dev->stats.access_lock);
 	now.tx_pause_total = dev->stats.curr_stat.tx_pause_frames;
+	now.rx_pause_total = dev->stats.curr_stat.rx_pause_frames;
 	now.rx_total_frames = dev->stats.curr_stat.rx_unicast_frames +
 			      dev->stats.curr_stat.rx_broadcast_frames +
 			      dev->stats.curr_stat.rx_multicast_frames;
@@ -985,17 +990,19 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev)
 
 	now.rx_sw_packets_total = dev->net->stats.rx_packets;
 
-	delta_pause = now.tx_pause_total - dev->snapshot.tx_pause_total;
+	delta_tx_pause = now.tx_pause_total - dev->snapshot.tx_pause_total;
+	delta_rx_pause = now.rx_pause_total - dev->snapshot.rx_pause_total;
 	delta_rx = now.rx_total_frames - dev->snapshot.rx_total_frames;
 	delta_hw_drop = now.rx_hw_drop_total - dev->snapshot.rx_hw_drop_total;
 	delta_sw_rx = now.rx_sw_packets_total - dev->snapshot.rx_sw_packets_total;
 
-	now.last_delta_pause = (u32)delta_pause;
+	now.last_delta_tx_pause = (u32)delta_tx_pause;
+	now.last_delta_rx_pause = (u32)delta_rx_pause;
 	now.last_delta_drops = (u32)delta_hw_drop;
 
 	dev->snapshot = now;
 
-	if (delta_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) {
+	if (delta_tx_pause > LAN78XX_STALL_PAUSE_THRESH && delta_rx == 0) {
 		anomaly_msg = "Stall: Pause Storm & No RX";
 	} else if (delta_hw_drop > LAN78XX_LIVELOCK_DROP_THRESH &&
 		   delta_hw_drop > (delta_sw_rx * LAN78XX_LIVELOCK_DROP_RATIO)) {
@@ -1008,10 +1015,11 @@ static void lan78xx_check_stat_anomalies(struct lan78xx_net *dev)
 	/* 5. Reporting */
 	ctx.msg = anomaly_msg;
 	ctx.ts = now.time;
-	ctx.fifo.delta_pause   = delta_pause;
-	ctx.fifo.delta_rx      = delta_rx;
+	ctx.fifo.delta_tx_pause = delta_tx_pause;
+	ctx.fifo.delta_rx_pause = delta_rx_pause;
+	ctx.fifo.delta_rx = delta_rx;
 	ctx.fifo.delta_hw_drop = delta_hw_drop;
-	ctx.fifo.delta_sw_rx   = delta_sw_rx;
+	ctx.fifo.delta_sw_rx = delta_sw_rx;
 
 	netdev_warn(dev->net, "%s (HW Drops: +%llu, SW RX: +%llu)\n",
 		    ctx.msg, delta_hw_drop, delta_sw_rx);
@@ -2495,6 +2503,24 @@ static void lan78xx_mac_config(struct phylink_config *config, unsigned int mode,
 			   ERR_PTR(ret));
 }
 
+static int lan78xx_configure_flowcontrol(struct lan78xx_net *dev,
+					 bool tx_pause, bool rx_pause);
+static int lan78xx_reset(struct lan78xx_net *dev);
+
+static void lan78xx_dump_status(struct lan78xx_net *dev, const char *msg)
+{
+	u32 int_sts, mac_tx, fct_tx_ctl, mac_rx, fct_rx_ctl;
+
+	lan78xx_read_reg(dev, INT_STS, &int_sts);
+	lan78xx_read_reg(dev, MAC_TX, &mac_tx);
+	lan78xx_read_reg(dev, FCT_TX_CTL, &fct_tx_ctl);
+	lan78xx_read_reg(dev, MAC_RX, &mac_rx);
+	lan78xx_read_reg(dev, FCT_RX_CTL, &fct_rx_ctl);
+
+	netdev_info(dev->net, "[%s] INT_STS: 0x%08x, MAC_TX: 0x%08x, FCT_TX: 0x%08x, MAC_RX: 0x%08x, FCT_RX: 0x%08x\n",
+		    msg, int_sts, mac_tx, fct_tx_ctl, mac_rx, fct_rx_ctl);
+}
+
 static void lan78xx_mac_link_down(struct phylink_config *config,
 				  unsigned int mode, phy_interface_t interface)
 {
@@ -4939,8 +4965,10 @@ static int lan78xx_fifo_dump(struct devlink_health_reporter *reporter,
 					  ktime_to_ns(ctx->ts));
 
 		devlink_fmsg_obj_nest_start(fmsg);
-		devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_pause",
-					  ctx->fifo.delta_pause);
+		devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_tx_pause",
+					  ctx->fifo.delta_tx_pause);
+		devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx_pause",
+					  ctx->fifo.delta_rx_pause);
 		devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_rx",
 					  ctx->fifo.delta_rx);
 		devlink_fmsg_u64_pair_put(fmsg, "trigger_delta_hw_drop",
@@ -4989,8 +5017,9 @@ static int lan78xx_fifo_recover(struct devlink_health_reporter *reporter,
 {
 	struct lan78xx_net *dev = devlink_health_reporter_priv(reporter);
 
-	netdev_warn(dev->net, "Recovering from FIFO stall via Lite Reset\n");
-	return lan78xx_reset(dev);
+	netdev_warn(dev->net, "Recovering via Lite Reset\n");
+	phylink_mac_change(dev->phylink, false);
+	return 0;
 }
 
 static const struct devlink_health_reporter_ops lan78xx_fifo_ops = {
@@ -5075,6 +5104,7 @@ static void lan78xx_disconnect(struct usb_interface *intf)
 
 	lan78xx_health_cleanup(dev);
 	if (dev->devlink) {
+		cancel_work_sync(&dev->tx_timeout_work);
 		devlink_unregister(dev->devlink);
 		devlink_free(dev->devlink);
 		dev->devlink = NULL;
@@ -5107,36 +5137,45 @@ static void lan78xx_disconnect(struct usb_interface *intf)
 	usb_put_dev(udev);
 }
 
+static void lan78xx_tx_timeout_work(struct work_struct *work)
+{
+	struct lan78xx_net *dev = container_of(work, struct lan78xx_net,
+					       tx_timeout_work);
+
+	devlink_health_report(dev->fifo_reporter, dev->timeout_ctx.msg,
+			      &dev->timeout_ctx);
+}
+
 static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue)
 {
 	struct lan78xx_net *dev = netdev_priv(net);
-	struct lan78xx_dump_ctx ctx = {0};
-	s64 diff_ms;
+	s64 diff_ms = 0;
 
 	/* Calculate time since last health check */
-	ctx.ts = ktime_get_real();
-	diff_ms = ktime_ms_delta(ctx.ts, dev->snapshot.time);
+	dev->timeout_ctx.ts = ktime_get_real();
+	diff_ms = ktime_ms_delta(dev->timeout_ctx.ts, dev->snapshot.time);
 
 	/* We rely on the trend data captured during the last valid stat update
 	 * to infer the system state before the crash.
 	 */
-	if (dev->snapshot.last_delta_pause > LAN78XX_STALL_PAUSE_THRESH)
-		ctx.msg = "TX Timeout (Flow Control Storm?)";
+	if (dev->snapshot.last_delta_rx_pause > LAN78XX_STALL_PAUSE_THRESH)
+		dev->timeout_ctx.msg = "TX Timeout (Link Partner Pause Storm?)";
+	else if (dev->snapshot.last_delta_tx_pause > LAN78XX_STALL_PAUSE_THRESH)
+		dev->timeout_ctx.msg = "TX Timeout (Local Flow Control Storm?)";
 	else if (dev->snapshot.last_delta_drops > LAN78XX_TX_TIMEOUT_DROP_THRESH)
-		ctx.msg = "TX Timeout (FIFO Drop Storm?)";
+		dev->timeout_ctx.msg = "TX Timeout (FIFO Drop Storm?)";
 	else
-		ctx.msg = "TX Timeout";
+		dev->timeout_ctx.msg = "TX Timeout";
 
-	ctx.fifo.delta_pause = dev->snapshot.last_delta_pause;
-	ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops;
+	dev->timeout_ctx.fifo.delta_rx_pause = dev->snapshot.last_delta_rx_pause;
+	dev->timeout_ctx.fifo.delta_tx_pause = dev->snapshot.last_delta_tx_pause;
+	dev->timeout_ctx.fifo.delta_hw_drop = dev->snapshot.last_delta_drops;
 
 	netdev_warn(dev->net, "%s (Last stat update: %lld ms ago)\n",
-		    ctx.msg, diff_ms);
+		    dev->timeout_ctx.msg, diff_ms);
 
-	devlink_health_report(dev->fifo_reporter, ctx.msg, &ctx);
-
-	unlink_urbs(dev, &dev->txq);
-	napi_schedule(&dev->napi);
+	/* Defer report to worker to avoid sleeping in atomic context */
+	schedule_work(&dev->tx_timeout_work);
 }
 
 static netdev_features_t lan78xx_features_check(struct sk_buff *skb,
@@ -5542,6 +5581,8 @@ static int lan78xx_probe(struct usb_interface *intf,
 	pm_runtime_set_autosuspend_delay(&udev->dev,
 					 DEFAULT_AUTOSUSPEND_DELAY);
 
+	INIT_WORK(&dev->tx_timeout_work, lan78xx_tx_timeout_work);
+
 	dev->devlink = devlink_alloc(&lan78xx_devlink_ops,
 				     sizeof(struct lan78xx_devlink_priv),
 				     &udev->dev);
-- 
2.47.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ