[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181024015832.16607-1-0xeffeff@gmail.com>
Date: Wed, 24 Oct 2018 01:58:32 +0000
From: Jeff Barnhill <0xeffeff@...il.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, kuznet@....inr.ac.ru, yoshfuji@...ux-ipv6.org,
Jeff Barnhill <0xeffeff@...il.com>
Subject: [PATCH net v2] net/ipv6: Add anycast addresses to a global hashtable
icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device. This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().
Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses. This is based on inet6_addr_lst in addrconf.c.
Signed-off-by: Jeff Barnhill <0xeffeff@...il.com>
---
include/net/addrconf.h | 2 +
include/net/if_inet6.h | 8 +++
net/ipv6/af_inet6.c | 5 ++
net/ipv6/anycast.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 145 insertions(+), 2 deletions(-)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 6def0351bcc3..0cee3f99c41d 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
/* Device notifier */
int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..55a4a1d8cebc 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
struct ipv6_ac_socklist *acl_next;
};
+struct ipv6_ac_addrlist {
+ struct in6_addr acal_addr;
+ possible_net_t acal_pnet;
+ int acal_users;
+ struct hlist_node acal_lst; /* inet6_acaddr_lst */
+ struct rcu_head rcu;
+};
+
struct ifacaddr6 {
struct in6_addr aca_addr;
struct fib6_info *aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..971a05fdd3bd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
+ err = anycast_init();
+ if (err)
+ goto anycast_fail;
err = addrconf_init();
if (err)
goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
ipv6_exthdrs_fail:
addrconf_cleanup();
addrconf_fail:
+ anycast_cleanup();
+anycast_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..def1e156d857 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
#include <net/checksum.h>
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
/*
* socket join an anycast group
*/
@@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk)
rtnl_unlock();
}
+static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
+ const struct in6_addr *addr)
+{
+ struct ipv6_ac_addrlist *acal;
+
+ acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+ if (!acal)
+ return NULL;
+
+ acal->acal_addr = *addr;
+ write_pnet(&acal->acal_pnet, get_net(net));
+ acal->acal_users = 1;
+ INIT_HLIST_NODE(&acal->acal_lst);
+
+ return acal;
+}
+
+static void acal_free_rcu(struct rcu_head *h)
+{
+ struct ipv6_ac_addrlist *acal;
+
+ acal = container_of(h, struct ipv6_ac_addrlist, rcu);
+ WARN_ON(acal->acal_users);
+ put_net(read_pnet(&acal->acal_pnet));
+ kfree(acal);
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct ipv6_ac_addrlist *acal;
+ int err = 0;
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+ if (!net_eq(read_pnet(&acal->acal_pnet), net))
+ continue;
+ if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+ acal->acal_users++;
+ goto out;
+ }
+ }
+
+ acal = acal_alloc(net, addr);
+ if (!acal) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+ spin_unlock(&acaddr_hash_lock);
+ return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct ipv6_ac_addrlist *acal;
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+ if (!net_eq(read_pnet(&acal->acal_pnet), net))
+ continue;
+ if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+ if (--acal->acal_users < 1) {
+ hlist_del_init_rcu(&acal->acal_lst);
+ call_rcu(&acal->rcu, acal_free_rcu);
+ }
+ spin_unlock(&acaddr_hash_lock);
+ return;
+ }
+ }
+ spin_unlock(&acaddr_hash_lock);
+}
+
static void aca_get(struct ifacaddr6 *aca)
{
refcount_inc(&aca->aca_refcnt);
@@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
err = -ENOMEM;
goto out;
}
+ err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+ if (err) {
+ fib6_info_release(f6i);
+ fib6_info_release(f6i);
+ kfree(aca);
+ goto out;
+ }
aca->aca_next = idev->ac_list;
idev->ac_list = aca;
@@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
prev_aca->aca_next = aca->aca_next;
else
idev->ac_list = aca->aca_next;
+ ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
write_unlock_bh(&idev->lock);
addrconf_leave_solict(idev, &aca->aca_addr);
@@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
write_lock_bh(&idev->lock);
while ((aca = idev->ac_list) != NULL) {
idev->ac_list = aca->aca_next;
+ ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
write_unlock_bh(&idev->lock);
addrconf_leave_solict(idev, &aca->aca_addr);
@@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
bool found = false;
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct ipv6_ac_addrlist *acal;
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
else
- for_each_netdev_rcu(net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
+ hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+ acal_lst) {
+ if (!net_eq(read_pnet(&acal->acal_pnet), net))
+ continue;
+ if (ipv6_addr_equal(&acal->acal_addr, addr)) {
found = true;
break;
}
+ }
rcu_read_unlock();
return found;
}
@@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net)
{
remove_proc_entry("anycast6", net->proc_net);
}
+
+/* Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}
#endif
--
2.14.1
Powered by blists - more mailing lists