[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZczSEBFtq6E6APUJ@gmail.com>
Date: Wed, 14 Feb 2024 06:45:36 -0800
From: Breno Leitao <leitao@...ian.org>
To: Jakub Kicinski <kuba@...nel.org>, edumazet@...gle.com
Cc: Eric Dumazet <edumazet@...gle.com>, davem@...emloft.net,
pabeni@...hat.com, Steven Rostedt <rostedt@...dmis.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Andrew Morton <akpm@...ux-foundation.org>, weiwan@...gle.com,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
horms@...nel.org, Jonathan Corbet <corbet@....net>,
Randy Dunlap <rdunlap@...radead.org>,
Bjorn Helgaas <bhelgaas@...gle.com>,
Johannes Berg <johannes.berg@...el.com>,
Thomas Weißschuh <linux@...ssschuh.net>,
"open list:TRACING" <linux-trace-kernel@...r.kernel.org>
Subject: Re: [PATCH net-next v3] net: dqs: add NIC stall detector based on BQL
On Tue, Feb 13, 2024 at 10:04:57AM -0800, Jakub Kicinski wrote:
> On Tue, 13 Feb 2024 14:57:49 +0100 Eric Dumazet wrote:
> > Please note that adding other sysfs entries is expensive for workloads
> > creating/deleting netdev and netns often.
> >
> > I _think_ we should find a way for not creating
> > /sys/class/net/<interface>/queues/tx-{Q}/byte_queue_limits directory
> > and files
> > for non BQL enabled devices (like loopback !)
>
> We should try, see if anyone screams. We could use IFF_NO_QUEUE, and
> NETIF_F_LLTX as a proxy for "device doesn't have a real queue so BQL
> would be pointless"? Obviously better to annotate the drivers which
> do have BQL support, but there's >50 of them on a quick count..
Let me make sure I understand the suggestion above. We want to disable
BQL completely for devices that has dev->features & NETIF_F_LLTX or
dev->priv_flags & IFF_NO_QUEUE, right?
Maybe we can add a ->enabled field in struct dql, and set it according
to the features above. Then we can created the sysfs and process the dql
operations based on that field. This should avoid some unnecessary calls
also, if we are not display sysfs.
Here is a very simple PoC to represent what I had in mind. Am I in the
right direction?
diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 407c2f281b64..a9d17597ea80 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -62,6 +62,7 @@ struct dql {
unsigned int max_limit; /* Max limit */
unsigned int min_limit; /* Minimum limit */
unsigned int slack_hold_time; /* Time to measure slack */
+ bool enabled; /* Queue is active */
};
/* Set some static maximums */
@@ -101,7 +102,7 @@ void dql_completed(struct dql *dql, unsigned int count);
void dql_reset(struct dql *dql);
/* Initialize dql state */
-void dql_init(struct dql *dql, unsigned int hold_time);
+void dql_init(struct net_device *dev, struct dql *dql, unsigned int hold_time);
#endif /* _KERNEL_ */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ef7bfbb98497..5c69bbf4267d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3541,6 +3541,9 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
unsigned int bytes)
{
#ifdef CONFIG_BQL
+ if (!dev_queue->dql.enabled)
+ return
+
dql_queued(&dev_queue->dql, bytes);
if (likely(dql_avail(&dev_queue->dql) >= 0))
@@ -3573,7 +3576,8 @@ static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
{
if (xmit_more) {
#ifdef CONFIG_BQL
- dql_queued(&dev_queue->dql, bytes);
+ if (dev_queue->dql.enabled)
+ dql_queued(&dev_queue->dql, bytes);
#endif
return netif_tx_queue_stopped(dev_queue);
}
@@ -3617,7 +3621,7 @@ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
unsigned int pkts, unsigned int bytes)
{
#ifdef CONFIG_BQL
- if (unlikely(!bytes))
+ if (unlikely(!bytes) || !dev_queue->dql.enabled)
return;
dql_completed(&dev_queue->dql, bytes);
@@ -3656,6 +3660,9 @@ static inline void netdev_completed_queue(struct net_device *dev,
static inline void netdev_tx_reset_queue(struct netdev_queue *q)
{
#ifdef CONFIG_BQL
+ if (!q->dql.enabled)
+ return;
+
clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state);
dql_reset(&q->dql);
#endif
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index fde0aa244148..0a0a51f06c3b 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -10,6 +10,7 @@
#include <linux/dynamic_queue_limits.h>
#include <linux/compiler.h>
#include <linux/export.h>
+#include <linux/netdevice.h>
#define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0)
#define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0)
@@ -128,11 +129,21 @@ void dql_reset(struct dql *dql)
}
EXPORT_SYMBOL(dql_reset);
-void dql_init(struct dql *dql, unsigned int hold_time)
+static bool netdev_dql_supported(struct net_device *dev)
+{
+ if (dev->features & NETIF_F_LLTX ||
+ dev->priv_flags & IFF_NO_QUEUE)
+ return false;
+
+ return true;
+}
+
+void dql_init(struct net_device *dev, struct dql *dql, unsigned int hold_time)
{
dql->max_limit = DQL_MAX_LIMIT;
dql->min_limit = 0;
dql->slack_hold_time = hold_time;
dql_reset(dql);
+ dql->enabled = netdev_dql_supported(dev);
}
EXPORT_SYMBOL(dql_init);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9bb792cecc16..76aa70ee2c87 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10052,7 +10052,7 @@ static void netdev_init_one_queue(struct net_device *dev,
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
#ifdef CONFIG_BQL
- dql_init(&queue->dql, HZ);
+ dql_init(dev, &queue->dql, HZ);
#endif
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a09d507c5b03..144ce4bb57bc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1709,9 +1709,11 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
goto err;
#ifdef CONFIG_BQL
- error = sysfs_create_group(kobj, &dql_group);
- if (error)
- goto err;
+ if (queue->dql.enabled) {
+ error = sysfs_create_group(kobj, &dql_group);
+ if (error)
+ goto err;
+ }
#endif
kobject_uevent(kobj, KOBJ_ADD);
Powered by blists - more mailing lists