lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240506203207.1307971-1-witu@nvidia.com>
Date: Mon, 6 May 2024 20:32:07 +0000
From: William Tu <witu@...dia.com>
To: <netdev@...r.kernel.org>
CC: <jiri@...dia.com>, <bodong@...dia.com>, <kuba@...nel.org>,
	<witu@...dia.com>
Subject: [PATCH RFC net-next] net: cache the __dev_alloc_name()

When a system has around 1000 netdevs, adding the 1001st device becomes
very slow. The devlink command to create an SF
  $ devlink port add pci/0000:03:00.0 flavour pcisf \
    pfnum 0 sfnum 1001
takes around 5 seconds, and Linux perf and flamegraph show 19% of time
spent on __dev_alloc_name() [1].

The reason is that devlink first requests for next available "eth%d".
And __dev_alloc_name will scan all existing netdev to match on "ethN",
set N to a 'inuse' bitmap, and find/return next available number,
in our case eth0.

And later on based on udev rule, we renamed it from eth0 to
"en3f0pf0sf1001" and with altname below
  14: en3f0pf0sf1001: <BROADCAST,MULTICAST,UP,LOWER_UP> ...
      altname enp3s0f0npf0sf1001

So eth0 is actually never being used, but as we have 1k "en3f0pf0sfN"
devices + 1k altnames, the __dev_alloc_name spends lots of time goint
through all existing netdev and try to build the 'inuse' bitmap of
pattern 'eth%d'. And the bitmap barely has any bit set, and it rescanes
every time.

I want to see if it makes sense to save/cache the result, or is there
any way to not go through the 'eth%d' pattern search. The RFC patch
adds name_pat (name pattern) hlist and saves the 'inuse' bitmap. It saves
pattens, ex: "eth%d", "veth%d", with the bitmap, and lookup before
scanning all existing netdevs.

Note: code is working just for quick performance benchmark, and still
missing lots of stuff. Using hlist seems to overkill, as I think
we only have few patterns
$ git grep alloc_netdev drivers/ net/ | grep %d

1. https://github.com/williamtu/net-next/issues/1

Signed-off-by: William Tu <witu@...dia.com>
---
 include/net/net_namespace.h |   1 +
 net/core/dev.c              | 103 ++++++++++++++++++++++++++++++++++--
 net/core/dev.h              |   6 +++
 3 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 20c34bd7a077..82c15020916b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -109,6 +109,7 @@ struct net {
 
 	struct hlist_head 	*dev_name_head;
 	struct hlist_head	*dev_index_head;
+	struct hlist_head 	*dev_name_pat_head;
 	struct xarray		dev_by_index;
 	struct raw_notifier_head	netdev_chain;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 331848eca7d3..08c988cac7a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -197,6 +197,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
+static inline struct hlist_head *
+dev_name_pat_hash(struct net *net, const char *pat)
+{
+	unsigned int hash = full_name_hash(net, pat, strnlen(pat, IFNAMSIZ));
+
+	return &net->dev_name_pat_head[hash_32(hash, NETDEV_HASHBITS)];
+}
+
 static inline void rps_lock_irqsave(struct softnet_data *sd,
 				    unsigned long *flags)
 {
@@ -231,6 +239,64 @@ static inline void rps_unlock_irq_enable(struct softnet_data *sd)
 		local_irq_enable();
 }
 
+static struct netdev_name_pat_node *
+netdev_name_pat_node_alloc(const char *pattern)
+{
+	struct netdev_name_pat_node *pat_node;
+	const int max_netdevices = 8*PAGE_SIZE;
+
+	pat_node = kmalloc(sizeof(*pat_node), GFP_KERNEL);
+	if (!pat_node)
+		return NULL;
+
+	pat_node->inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
+	if (!pat_node->inuse) {
+		kfree(pat_node);
+		return NULL;
+	}
+
+	INIT_HLIST_NODE(&pat_node->hlist);
+	strscpy(pat_node->name_pat, pattern, IFNAMSIZ);
+
+	return pat_node;
+}
+
+static void netdev_name_pat_node_free(struct netdev_name_pat_node *pat_node)
+{
+	bitmap_free(pat_node->inuse);
+	kfree(pat_node);
+}
+
+static void netdev_name_pat_node_add(struct net *net,
+				     struct netdev_name_pat_node *pat_node)
+{
+	hlist_add_head(&pat_node->hlist,
+		       dev_name_pat_hash(net, pat_node->name_pat));
+}
+
+static void netdev_name_pat_node_del(struct netdev_name_pat_node *pat_node)
+{
+	hlist_del_rcu(&pat_node->hlist);
+}
+
+static struct netdev_name_pat_node *
+netdev_name_pat_node_lookup(struct net *net, const char *pat)
+{
+	struct hlist_head *head = dev_name_pat_hash(net, pat);
+	struct netdev_name_pat_node *pat_node;
+
+	hlist_for_each_entry(pat_node, head, hlist) {
+		if (!strcmp(pat_node->name_pat, pat))
+			return pat_node;
+	}
+	return NULL;
+}
+
+static bool netdev_name_pat_in_use(struct net *net, const char *pat) // eth%d
+{
+	return netdev_name_pat_node_lookup(net, pat);
+}
+
 static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
 						       const char *name)
 {
@@ -1057,6 +1123,7 @@ EXPORT_SYMBOL(dev_valid_name);
 
 static int __dev_alloc_name(struct net *net, const char *name, char *res)
 {
+	struct netdev_name_pat_node *pat_node;
 	int i = 0;
 	const char *p;
 	const int max_netdevices = 8*PAGE_SIZE;
@@ -1071,10 +1138,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
 	if (!p || p[1] != 'd' || strchr(p + 2, '%'))
 		return -EINVAL;
 
-	/* Use one page as a bit array of possible slots */
-	inuse = bitmap_zalloc(max_netdevices, GFP_ATOMIC);
-	if (!inuse)
+	pat_node = netdev_name_pat_node_lookup(net, name);
+	if (pat_node) {
+		i = find_first_zero_bit(pat_node->inuse, max_netdevices);
+		__set_bit(i, pat_node->inuse);
+		strscpy(buf, name, IFNAMSIZ);
+		snprintf(res, IFNAMSIZ, buf, i);
+
+		return i;
+	}
+
+	pat_node = netdev_name_pat_node_alloc(name);
+	if (!pat_node)
 		return -ENOMEM;
+	netdev_name_pat_node_add(net, pat_node);
+	inuse = pat_node->inuse;
 
 	for_each_netdev(net, d) {
 		struct netdev_name_node *name_node;
@@ -1082,6 +1160,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
 		netdev_for_each_altname(d, name_node) {
 			if (!sscanf(name_node->name, name, &i))
 				continue;
+
 			if (i < 0 || i >= max_netdevices)
 				continue;
 
@@ -1102,13 +1181,14 @@ static int __dev_alloc_name(struct net *net, const char *name, char *res)
 	}
 
 	i = find_first_zero_bit(inuse, max_netdevices);
-	bitmap_free(inuse);
+	__set_bit(i, inuse);
 	if (i == max_netdevices)
 		return -ENFILE;
 
 	/* 'res' and 'name' could overlap, use 'buf' as an intermediate buffer */
 	strscpy(buf, name, IFNAMSIZ);
 	snprintf(res, IFNAMSIZ, buf, i);
+
 	return i;
 }
 
@@ -11464,12 +11544,20 @@ static int __net_init netdev_init(struct net *net)
 	if (net->dev_index_head == NULL)
 		goto err_idx;
 
+	net->dev_name_pat_head = netdev_create_hash();
+	if (net->dev_name_pat_head == NULL)
+		goto err_name_pat;
+
+	printk("%s head %px\n", __func__, net->dev_name_pat_head);
+
 	xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC1);
 
 	RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
 
 	return 0;
 
+err_name_pat:
+	kfree(net->dev_index_head);
 err_idx:
 	kfree(net->dev_name_head);
 err_name:
@@ -11563,6 +11651,7 @@ static void __net_exit netdev_exit(struct net *net)
 {
 	kfree(net->dev_name_head);
 	kfree(net->dev_index_head);
+	kfree(net->dev_name_pat_head);
 	xa_destroy(&net->dev_by_index);
 	if (net != &init_net)
 		WARN_ON_ONCE(!list_empty(&net->dev_base_head));
@@ -11610,6 +11699,12 @@ static void __net_exit default_device_exit_net(struct net *net)
 			BUG();
 		}
 	}
+/*
+	hlist_for_each(pat_node, head, hlist) {
+		netdev_name_pat_node_del(pat_node);
+		netdev_name_pat_node_free(pat_node);
+	}
+*/
 }
 
 static void __net_exit default_device_exit_batch(struct list_head *net_list)
diff --git a/net/core/dev.h b/net/core/dev.h
index 2bcaf8eee50c..62133584dc14 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -59,6 +59,12 @@ struct netdev_name_node {
 	struct rcu_head rcu;
 };
 
+struct netdev_name_pat_node {
+	struct hlist_node hlist;
+	char name_pat[IFNAMSIZ];
+	unsigned long *inuse;
+};
+
 int netdev_get_name(struct net *net, char *name, int ifindex);
 int dev_change_name(struct net_device *dev, const char *newname);
 
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ