[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20181028015159.10636-1-0xeffeff@gmail.com>
Date: Sun, 28 Oct 2018 01:51:59 +0000
From: Jeff Barnhill <0xeffeff@...il.com>
To: netdev@...r.kernel.org
Cc: davem@...emloft.net, kuznet@....inr.ac.ru, yoshfuji@...ux-ipv6.org,
Jeff Barnhill <0xeffeff@...il.com>
Subject: [PATCH net v5] net/ipv6: Add anycast addresses to a global hashtable
icmp6_send() function is expensive on systems with a large number of
interfaces. Every time it’s called, it has to verify that the source
address does not correspond to an existing anycast address by looping
through every device and every anycast address on the device. This can
result in significant delays for a CPU when there are a large number of
neighbors and ND timers are frequently timing out and calling
neigh_invalidate().
Add anycast addresses to a global hashtable to allow quick searching for
matching anycast addresses. This is based on inet6_addr_lst in addrconf.c.
Signed-off-by: Jeff Barnhill <0xeffeff@...il.com>
---
include/net/addrconf.h | 2 +
include/net/if_inet6.h | 8 ++++
net/ipv6/af_inet6.c | 5 ++
net/ipv6/anycast.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 134 insertions(+), 2 deletions(-)
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 14b789a123e7..799af1a037d1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -317,6 +317,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
const struct in6_addr *addr);
+int anycast_init(void);
+void anycast_cleanup(void);
/* Device notifier */
int register_inet6addr_notifier(struct notifier_block *nb);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index d7578cf49c3a..a445014b981d 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
struct ipv6_ac_socklist *acl_next;
};
+struct ipv6_ac_addrlist {
+ struct in6_addr acal_addr;
+ possible_net_t acal_pnet;
+ refcount_t acal_users;
+ struct hlist_node acal_lst; /* inet6_acaddr_lst */
+ struct rcu_head rcu;
+};
+
struct ifacaddr6 {
struct in6_addr aca_addr;
struct fib6_info *aca_rt;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3f4d61017a69..ddc8a6dbfba2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
err = ip6_flowlabel_init();
if (err)
goto ip6_flowlabel_fail;
+ err = anycast_init();
+ if (err)
+ goto anycast_fail;
err = addrconf_init();
if (err)
goto addrconf_fail;
@@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
ipv6_exthdrs_fail:
addrconf_cleanup();
addrconf_fail:
+ anycast_cleanup();
+anycast_fail:
ip6_flowlabel_cleanup();
ip6_flowlabel_fail:
ndisc_late_cleanup();
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 4e0ff7031edd..ca51c9d57ce5 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -44,8 +44,22 @@
#include <net/checksum.h>
+#define IN6_ADDR_HSIZE_SHIFT 8
+#define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT)
+/* anycast address hash table
+ */
+static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
+static DEFINE_SPINLOCK(acaddr_hash_lock);
+
static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
+static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
+
+ return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
+}
+
/*
* socket join an anycast group
*/
@@ -204,6 +218,73 @@ void ipv6_sock_ac_close(struct sock *sk)
rtnl_unlock();
}
+static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
+ const struct in6_addr *addr)
+{
+ struct ipv6_ac_addrlist *acal;
+
+ acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
+ if (!acal)
+ return NULL;
+
+ acal->acal_addr = *addr;
+ write_pnet(&acal->acal_pnet, net);
+ refcount_set(&acal->acal_users, 1);
+ INIT_HLIST_NODE(&acal->acal_lst);
+
+ return acal;
+}
+
+static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct ipv6_ac_addrlist *acal;
+ int err = 0;
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+ if (!net_eq(read_pnet(&acal->acal_pnet), net))
+ continue;
+ if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+ refcount_inc(&acal->acal_users);
+ goto out;
+ }
+ }
+
+ acal = acal_alloc(net, addr);
+ if (!acal) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
+
+out:
+ spin_unlock(&acaddr_hash_lock);
+ return err;
+}
+
+static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
+{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct ipv6_ac_addrlist *acal;
+
+ spin_lock(&acaddr_hash_lock);
+ hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
+ if (!net_eq(read_pnet(&acal->acal_pnet), net))
+ continue;
+ if (ipv6_addr_equal(&acal->acal_addr, addr)) {
+ if (refcount_dec_and_test(&acal->acal_users)) {
+ hlist_del_init_rcu(&acal->acal_lst);
+ kfree_rcu(acal, rcu);
+ }
+ spin_unlock(&acaddr_hash_lock);
+ return;
+ }
+ }
+ spin_unlock(&acaddr_hash_lock);
+}
+
static void aca_get(struct ifacaddr6 *aca)
{
refcount_inc(&aca->aca_refcnt);
@@ -275,6 +356,12 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
err = -ENOMEM;
goto out;
}
+ err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
+ if (err) {
+ aca_put(aca);
+ fib6_info_release(f6i);
+ goto out;
+ }
aca->aca_next = idev->ac_list;
idev->ac_list = aca;
@@ -324,6 +411,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
prev_aca->aca_next = aca->aca_next;
else
idev->ac_list = aca->aca_next;
+ ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
write_unlock_bh(&idev->lock);
addrconf_leave_solict(idev, &aca->aca_addr);
@@ -350,6 +438,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
write_lock_bh(&idev->lock);
while ((aca = idev->ac_list) != NULL) {
idev->ac_list = aca->aca_next;
+ ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
+
write_unlock_bh(&idev->lock);
addrconf_leave_solict(idev, &aca->aca_addr);
@@ -390,17 +480,23 @@ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *ad
bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
const struct in6_addr *addr)
{
+ unsigned int hash = inet6_acaddr_hash(net, addr);
+ struct ipv6_ac_addrlist *acal;
bool found = false;
rcu_read_lock();
if (dev)
found = ipv6_chk_acast_dev(dev, addr);
else
- for_each_netdev_rcu(net, dev)
- if (ipv6_chk_acast_dev(dev, addr)) {
+ hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
+ acal_lst) {
+ if (!net_eq(read_pnet(&acal->acal_pnet), net))
+ continue;
+ if (ipv6_addr_equal(&acal->acal_addr, addr)) {
found = true;
break;
}
+ }
rcu_read_unlock();
return found;
}
@@ -539,4 +635,25 @@ void ac6_proc_exit(struct net *net)
{
remove_proc_entry("anycast6", net->proc_net);
}
+
+/* Init / cleanup code
+ */
+int __init anycast_init(void)
+{
+ int i;
+
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
+ return 0;
+}
+
+void anycast_cleanup(void)
+{
+ int i;
+
+ spin_lock(&acaddr_hash_lock);
+ for (i = 0; i < IN6_ADDR_HSIZE; i++)
+ WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
+ spin_unlock(&acaddr_hash_lock);
+}
#endif
--
2.14.1
Powered by blists - more mailing lists