lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZczSEBFtq6E6APUJ@gmail.com>
Date: Wed, 14 Feb 2024 06:45:36 -0800
From: Breno Leitao <leitao@...ian.org>
To: Jakub Kicinski <kuba@...nel.org>, edumazet@...gle.com
Cc: Eric Dumazet <edumazet@...gle.com>, davem@...emloft.net,
	pabeni@...hat.com, Steven Rostedt <rostedt@...dmis.org>,
	Masami Hiramatsu <mhiramat@...nel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Andrew Morton <akpm@...ux-foundation.org>, weiwan@...gle.com,
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
	horms@...nel.org, Jonathan Corbet <corbet@....net>,
	Randy Dunlap <rdunlap@...radead.org>,
	Bjorn Helgaas <bhelgaas@...gle.com>,
	Johannes Berg <johannes.berg@...el.com>,
	Thomas Weißschuh <linux@...ssschuh.net>,
	"open list:TRACING" <linux-trace-kernel@...r.kernel.org>
Subject: Re: [PATCH net-next v3] net: dqs: add NIC stall detector based on BQL

On Tue, Feb 13, 2024 at 10:04:57AM -0800, Jakub Kicinski wrote:
> On Tue, 13 Feb 2024 14:57:49 +0100 Eric Dumazet wrote:
> > Please note that adding other sysfs entries is expensive for workloads
> > creating/deleting netdev and netns often.
> > 
> > I _think_ we should find a way for not creating
> > /sys/class/net/<interface>/queues/tx-{Q}/byte_queue_limits  directory
> > and files
> > for non BQL enabled devices (like loopback !)
> 
> We should try, see if anyone screams. We could use IFF_NO_QUEUE, and
> NETIF_F_LLTX as a proxy for "device doesn't have a real queue so BQL 
> would be pointless"? Obviously better to annotate the drivers which
> do have BQL support, but there's >50 of them on a quick count..

Let me make sure I understand the suggestion above. We want to disable
BQL completely for devices that has dev->features & NETIF_F_LLTX or
dev->priv_flags & IFF_NO_QUEUE, right?

Maybe we can add a ->enabled field in struct dql, and set it according
to the features above. Then we can created the sysfs and process the dql
operations based on that field. This should avoid some unnecessary calls
also, if we are not display sysfs.

Here is a very simple PoC to represent what I had in mind. Am I in the
right direction?

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 407c2f281b64..a9d17597ea80 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -62,6 +62,7 @@ struct dql {
 	unsigned int	max_limit;		/* Max limit */
 	unsigned int	min_limit;		/* Minimum limit */
 	unsigned int	slack_hold_time;	/* Time to measure slack */
+	bool		enabled;		/* Queue is active */
 };
 
 /* Set some static maximums */
@@ -101,7 +102,7 @@ void dql_completed(struct dql *dql, unsigned int count);
 void dql_reset(struct dql *dql);
 
 /* Initialize dql state */
-void dql_init(struct dql *dql, unsigned int hold_time);
+void dql_init(struct net_device *dev, struct dql *dql, unsigned int hold_time);
 
 #endif /* _KERNEL_ */
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef7bfbb98497..5c69bbf4267d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3541,6 +3541,9 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
 #ifdef CONFIG_BQL
+	if (!dev_queue->dql.enabled)
+		return
+
 	dql_queued(&dev_queue->dql, bytes);
 
 	if (likely(dql_avail(&dev_queue->dql) >= 0))
@@ -3573,7 +3576,8 @@ static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 {
 	if (xmit_more) {
 #ifdef CONFIG_BQL
-		dql_queued(&dev_queue->dql, bytes);
+		if (dev_queue->dql.enabled)
+			dql_queued(&dev_queue->dql, bytes);
 #endif
 		return netif_tx_queue_stopped(dev_queue);
 	}
@@ -3617,7 +3621,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 					     unsigned int pkts, unsigned int bytes)
 {
 #ifdef CONFIG_BQL
-	if (unlikely(!bytes))
+	if (unlikely(!bytes) || !dev_queue->dql.enabled)
 		return;
 
 	dql_completed(&dev_queue->dql, bytes);
@@ -3656,6 +3660,9 @@ static inline void netdev_completed_queue(struct net_device *dev,
 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 {
 #ifdef CONFIG_BQL
+	if (!q->dql.enabled)
+		return;
+
 	clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
 	dql_reset(&q->dql);
 #endif
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index fde0aa244148..0a0a51f06c3b 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -10,6 +10,7 @@
 #include <linux/dynamic_queue_limits.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
+#include <linux/netdevice.h>
 
 #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
 #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
@@ -128,11 +129,21 @@ void dql_reset(struct dql *dql)
 }
 EXPORT_SYMBOL(dql_reset);
 
-void dql_init(struct dql *dql, unsigned int hold_time)
+static bool netdev_dql_supported(struct net_device *dev)
+{
+	if (dev->features & NETIF_F_LLTX ||
+	    dev->priv_flags & IFF_NO_QUEUE)
+		return false;
+
+	return true;
+}
+
+void dql_init(struct net_device *dev, struct dql *dql, unsigned int hold_time)
 {
 	dql->max_limit = DQL_MAX_LIMIT;
 	dql->min_limit = 0;
 	dql->slack_hold_time = hold_time;
 	dql_reset(dql);
+	dql->enabled = netdev_dql_supported(dev);
 }
 EXPORT_SYMBOL(dql_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9bb792cecc16..76aa70ee2c87 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10052,7 +10052,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
 #ifdef CONFIG_BQL
-	dql_init(&queue->dql, HZ);
+	dql_init(dev, &queue->dql, HZ);
 #endif
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a09d507c5b03..144ce4bb57bc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1709,9 +1709,11 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
 		goto err;
 
 #ifdef CONFIG_BQL
-	error = sysfs_create_group(kobj, &dql_group);
-	if (error)
-		goto err;
+	if (queue->dql.enabled) {
+		error = sysfs_create_group(kobj, &dql_group);
+		if (error)
+			goto err;
+	}
 #endif
 
 	kobject_uevent(kobj, KOBJ_ADD);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ