[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240506203207.1307971-1-witu@nvidia.com>
Date: Mon, 6 May 2024 20:32:07 +0000
From: William Tu <witu@...dia.com>
To: <netdev@...r.kernel.org>
CC: <jiri@...dia.com>, <bodong@...dia.com>, <kuba@...nel.org>,
<witu@...dia.com>
Subject: [PATCH RFC net-next] net: cache the __dev_alloc_name()
When a system has around 1000 netdevs, adding the 1001st device becomes
very slow. The devlink command to create an SF
$ devlink port add pci/0000:03:00.0 flavour pcisf \
pfnum 0 sfnum 1001
takes around 5 seconds, and Linux perf and flamegraph show 19% of time
spent on __dev_alloc_name() [1].
The reason is that devlink first requests for next available "eth%d".
And __dev_alloc_name will scan all existing netdev to match on "ethN",
set N to a 'inuse' bitmap, and find/return next available number,
in our case eth0.
And later on based on udev rule, we renamed it from eth0 to
"en3f0pf0sf1001" and with altname below
14: en3f0pf0sf1001: <BROADCAST,MULTICAST,UP,LOWER_UP> ...
altname enp3s0f0npf0sf1001
So eth0 is actually never being used, but as we have 1k "en3f0pf0sfN"
devices + 1k altnames, the __dev_alloc_name spends lots of time goint
through all existing netdev and try to build the 'inuse' bitmap of
pattern 'eth%d'. And the bitmap barely has any bit set, and it rescanes
every time.
I want to see if it makes sense to save/cache the result, or is there
any way to not go through the 'eth%d' pattern search. The RFC patch
adds name_pat (name pattern) hlist and saves the 'inuse' bitmap. It saves
pattens, ex: "eth%d", "veth%d", with the bitmap, and lookup before
scanning all existing netdevs.
Note: code is working just for quick performance benchmark, and still
missing lots of stuff. Using hlist seems to overkill, as I think
we only have few patterns
$ git grep alloc_netdev drivers/ net/ | grep %d
1. https://github.com/williamtu/net-next/issues/1
Signed-off-by: William Tu <witu@...dia.com>
---
include/net/net_namespace.h | 1 +
net/core/dev.c | 103 ++++++++++++++++++++++++++++++++++--
net/core/dev.h | 6 +++
3 files changed, 106 insertions(+), 4 deletions(-)
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 20c34bd7a077..82c15020916b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -109,6 +109,7 @@ struct net {
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
+ struct hlist_head *dev_name_pat_head;
struct xarray dev_by_index;
struct raw_notifier_head netdev_chain;
diff --git a/net/core/dev.c b/net/core/dev.c
index 331848eca7d3..08c988cac7a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -197,6 +197,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
}
+static inline struct hlist_head *
+dev_name_pat_hash(struct net *net, const char *pat)
+{
+ unsigned int hash = full_name_hash(net, pat, strnlen(pat, IFNAMSIZ));
+
+ return &net->dev_name_pat_head[hash_32(hash, NETDEV_HASHBITS)];
+}
+
static inline void rps_lock_irqsave(struct softnet_data *sd,
unsigned long *flags)
{
@@ -231,6 +239,64 @@ static inline void rps_unlock_irq_enable(struct softnet_data *sd)
local_irq_enable();
}
+static struct netdev_name_pat_node *
+netdev_name_pat_node_alloc(const char *pattern)
+{
+ struct netdev_name_pat_node *pat_node;
+ const int max_netdevices = 8*PAGE_SIZE;
+
+ pat_node = kmalloc(sizeof(*pat_node), GFP_KERNEL);
+ if (!pat_node)
+ return NULL;
+
+ pat_node->inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
+ if (!pat_node->inuse) {
+ kfree(pat_node);
+ return NULL;
+ }
+
+ INIT_HLIST_NODE(&pat_node->hlist);
+ strscpy(pat_node->name_pat, pattern, IFNAMSIZ);
+
+ return pat_node;
+}
+
+static void netdev_name_pat_node_free(struct netdev_name_pat_node *pat_node)
+{
+ bitmap_free(pat_node->inuse);
+ kfree(pat_node);
+}
+
+static void netdev_name_pat_node_add(struct net *net,
+ struct netdev_name_pat_node *pat_node)
+{
+ hlist_add_head(&pat_node->hlist,
+ dev_name_pat_hash(net, pat_node->name_pat));
+}
+
+static void netdev_name_pat_node_del(struct netdev_name_pat_node *pat_node)
+{
+ hlist_del_rcu(&pat_node->hlist);
+}
+
+static struct netdev_name_pat_node *
+netdev_name_pat_node_lookup(struct net *net, const char *pat)
+{
+ struct hlist_head *head = dev_name_pat_hash(net, pat);
+ struct netdev_name_pat_node *pat_node;
+
+ hlist_for_each_entry(pat_node, head, hlist) {
+ if (!strcmp(pat_node->name_pat, pat))
+ return pat_node;
+ }
+ return NULL;
+}
+
+static bool netdev_name_pat_in_use(struct net *net, const char *pat) // eth%d
+{
+ return netdev_name_pat_node_lookup(net, pat);
+}
+
static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
const char *name)
{
@@ -1057,6 +1123,7 @@ EXPORT_SYMBOL(dev_valid_name);
static int __dev_alloc_name(struct net *net, const char *name, char *res)
{
+ struct netdev_name_pat_node *pat_node;
int i = 0;
const char *p;
const int max_netdevices = 8*PAGE_SIZE;
@@ -1071,10 +1138,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
if (!p || p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
- /* Use one page as a bit array of possible slots */
- inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
- if (!inuse)
+ pat_node = netdev_name_pat_node_lookup(net, name);
+ if (pat_node) {
+ i = find_first_zero_bit(pat_node->inuse, max_netdevices);
+ __set_bit(i, pat_node->inuse);
+ strscpy(buf, name, IFNAMSIZ);
+ snprintf(res, IFNAMSIZ, buf, i);
+
+ return i;
+ }
+
+ pat_node = netdev_name_pat_node_alloc(name);
+ if (!pat_node)
return -ENOMEM;
+ netdev_name_pat_node_add(net, pat_node);
+ inuse = pat_node->inuse;
for_each_netdev(net, d) {
struct netdev_name_node *name_node;
@@ -1082,6 +1160,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
netdev_for_each_altname(d, name_node) {
if (!sscanf(name_node->name, name, &i))
continue;
+
if (i < 0 || i >= max_netdevices)
continue;
@@ -1102,13 +1181,14 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
}
i = find_first_zero_bit(inuse, max_netdevices);
- bitmap_free(inuse);
+ __set_bit(i, inuse);
if (i == max_netdevices)
return -ENFILE;
/* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
strscpy(buf, name, IFNAMSIZ);
snprintf(res, IFNAMSIZ, buf, i);
+
return i;
}
@@ -11464,12 +11544,20 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL)
goto err_idx;
+ net->dev_name_pat_head = netdev_create_hash();
+ if (net->dev_name_pat_head == NULL)
+ goto err_name_pat;
+
+ printk("%s head %px\n", __func__, net->dev_name_pat_head);
+
xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
return 0;
+err_name_pat:
+ kfree(net->dev_index_head);
err_idx:
kfree(net->dev_name_head);
err_name:
@@ -11563,6 +11651,7 @@ static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
+ kfree(net->dev_name_pat_head);
xa_destroy(&net->dev_by_index);
if (net != &init_net)
WARN_ON_ONCE(!list_empty(&net->dev_base_head));
@@ -11610,6 +11699,12 @@ static void __net_exit default_device_exit_net(struct net *net)
BUG();
}
}
+/*
+ hlist_for_each(pat_node, head, hlist) {
+ netdev_name_pat_node_del(pat_node);
+ netdev_name_pat_node_free(pat_node);
+ }
+*/
}
static void __net_exit default_device_exit_batch(struct list_head *net_list)
diff --git a/net/core/dev.h b/net/core/dev.h
index 2bcaf8eee50c..62133584dc14 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -59,6 +59,12 @@ struct netdev_name_node {
struct rcu_head rcu;
};
+struct netdev_name_pat_node {
+ struct hlist_node hlist;
+ char name_pat[IFNAMSIZ];
+ unsigned long *inuse;
+};
+
int netdev_get_name(struct net *net, char *name, int ifindex);
int dev_change_name(struct net_device *dev, const char *newname);
--
2.34.1
Powered by blists - more mailing lists