lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f9723555-8b1e-d4bb-1937-1c2edffad8b1@gmail.com>
Date:   Tue, 23 Oct 2018 20:12:10 -0700
From:   Eric Dumazet <eric.dumazet@...il.com>
To:     Jeff Barnhill <0xeffeff@...il.com>, netdev@...r.kernel.org
Cc:     davem@...emloft.net, kuznet@....inr.ac.ru, yoshfuji@...ux-ipv6.org
Subject: Re: [PATCH net v2] net/ipv6: Add anycast addresses to a global
 hashtable



On 10/23/2018 06:58 PM, Jeff Barnhill wrote:
> icmp6_send() function is expensive on systems with a large number of
> interfaces. Every time it’s called, it has to verify that the source
> address does not correspond to an existing anycast address by looping
> through every device and every anycast address on the device.  This can
> result in significant delays for a CPU when there are a large number of
> neighbors and ND timers are frequently timing out and calling
> neigh_invalidate().
> 
> Add anycast addresses to a global hashtable to allow quick searching for
> matching anycast addresses.  This is based on inet6_addr_lst in addrconf.c.
> 
> Signed-off-by: Jeff Barnhill <0xeffeff@...il.com>
> ---
>  include/net/addrconf.h |   2 +
>  include/net/if_inet6.h |   8 +++
>  net/ipv6/af_inet6.c    |   5 ++
>  net/ipv6/anycast.c     | 132 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  4 files changed, 145 insertions(+), 2 deletions(-)
> 
> diff --git a/include/net/addrconf.h b/include/net/addrconf.h
> index 6def0351bcc3..0cee3f99c41d 100644
> --- a/include/net/addrconf.h
> +++ b/include/net/addrconf.h
> @@ -312,6 +312,8 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
>  			 const struct in6_addr *addr);
>  bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev,
>  			     const struct in6_addr *addr);
> +int anycast_init(void);
> +void anycast_cleanup(void);
>  
>  /* Device notifier */
>  int register_inet6addr_notifier(struct notifier_block *nb);
> diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
> index d7578cf49c3a..55a4a1d8cebc 100644
> --- a/include/net/if_inet6.h
> +++ b/include/net/if_inet6.h
> @@ -142,6 +142,14 @@ struct ipv6_ac_socklist {
>  	struct ipv6_ac_socklist *acl_next;
>  };
>  
> +struct ipv6_ac_addrlist {
> +	struct in6_addr		acal_addr;
> +	possible_net_t		acal_pnet;
> +	int			acal_users;

That would be a refcount_t acal_users; so that CONFIG_REFCOUNT_FULL brings debugging for free.

> +	struct hlist_node	acal_lst; /* inet6_acaddr_lst */
> +	struct rcu_head		rcu;
> +};
> +
>  struct ifacaddr6 {
>  	struct in6_addr		aca_addr;
>  	struct fib6_info	*aca_rt;
> diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
> index 9a4261e50272..971a05fdd3bd 100644
> --- a/net/ipv6/af_inet6.c
> +++ b/net/ipv6/af_inet6.c
> @@ -1001,6 +1001,9 @@ static int __init inet6_init(void)
>  	err = ip6_flowlabel_init();
>  	if (err)
>  		goto ip6_flowlabel_fail;
> +	err = anycast_init();
> +	if (err)
> +		goto anycast_fail;
>  	err = addrconf_init();
>  	if (err)
>  		goto addrconf_fail;
> @@ -1091,6 +1094,8 @@ static int __init inet6_init(void)
>  ipv6_exthdrs_fail:
>  	addrconf_cleanup();
>  addrconf_fail:
> +	anycast_cleanup();
> +anycast_fail:
>  	ip6_flowlabel_cleanup();
>  ip6_flowlabel_fail:
>  	ndisc_late_cleanup();
> diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
> index 4e0ff7031edd..def1e156d857 100644
> --- a/net/ipv6/anycast.c
> +++ b/net/ipv6/anycast.c
> @@ -44,8 +44,22 @@
>  
>  #include <net/checksum.h>
>  
> +#define IN6_ADDR_HSIZE_SHIFT	8
> +#define IN6_ADDR_HSIZE		BIT(IN6_ADDR_HSIZE_SHIFT)
> +/*	anycast address hash table
> + */
> +static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE];
> +static DEFINE_SPINLOCK(acaddr_hash_lock);
> +
>  static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr);
>  
> +static u32 inet6_acaddr_hash(struct net *net, const struct in6_addr *addr)
> +{
> +	u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net);
> +
> +	return hash_32(val, IN6_ADDR_HSIZE_SHIFT);
> +}
> +
>  /*
>   *	socket join an anycast group
>   */
> @@ -204,6 +218,83 @@ void ipv6_sock_ac_close(struct sock *sk)
>  	rtnl_unlock();
>  }
>  
> +static struct ipv6_ac_addrlist *acal_alloc(struct net *net,
> +					   const struct in6_addr *addr)
> +{
> +	struct ipv6_ac_addrlist *acal;
> +
> +	acal = kzalloc(sizeof(*acal), GFP_ATOMIC);
> +	if (!acal)
> +		return NULL;
> +
> +	acal->acal_addr = *addr;
> +	write_pnet(&acal->acal_pnet, get_net(net));

I am not sure why you grab a reference on the netns.

The ipv6 address will be freed at some point before the netns disappears.
It would automatically remove the associated struct ipv6_ac_addrlist.

> +	acal->acal_users = 1;
> +	INIT_HLIST_NODE(&acal->acal_lst);
> +
> +	return acal;
> +}
> +
> +static void acal_free_rcu(struct rcu_head *h)
> +{
> +	struct ipv6_ac_addrlist *acal;
> +
> +	acal = container_of(h, struct ipv6_ac_addrlist, rcu);
> +	WARN_ON(acal->acal_users);

Not needed with refcount_t debugging infra.

> +	put_net(read_pnet(&acal->acal_pnet));
> +	kfree(acal);

So this could use kfree_rcu() in the caller, and get rid of acal_free_rcu() completely.

> +}
> +
> +static int ipv6_add_acaddr_hash(struct net *net, const struct in6_addr *addr)
> +{
> +	unsigned int hash = inet6_acaddr_hash(net, addr);
> +	struct ipv6_ac_addrlist *acal;
> +	int err = 0;
> +
> +	spin_lock(&acaddr_hash_lock);
> +	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
> +		if (!net_eq(read_pnet(&acal->acal_pnet), net))
> +			continue;
> +		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> +			acal->acal_users++;
> +			goto out;
> +		}
> +	}
> +
> +	acal = acal_alloc(net, addr);
> +	if (!acal) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	hlist_add_head_rcu(&acal->acal_lst, &inet6_acaddr_lst[hash]);
> +
> +out:
> +	spin_unlock(&acaddr_hash_lock);
> +	return err;
> +}
> +
> +static void ipv6_del_acaddr_hash(struct net *net, const struct in6_addr *addr)
> +{
> +	unsigned int hash = inet6_acaddr_hash(net, addr);
> +	struct ipv6_ac_addrlist *acal;
> +
> +	spin_lock(&acaddr_hash_lock);
> +	hlist_for_each_entry(acal, &inet6_acaddr_lst[hash], acal_lst) {
> +		if (!net_eq(read_pnet(&acal->acal_pnet), net))
> +			continue;
> +		if (ipv6_addr_equal(&acal->acal_addr, addr)) {
> +			if (--acal->acal_users < 1) {
> +				hlist_del_init_rcu(&acal->acal_lst);
> +				call_rcu(&acal->rcu, acal_free_rcu);
> +			}
> +			spin_unlock(&acaddr_hash_lock);
> +			return;
> +		}
> +	}
> +	spin_unlock(&acaddr_hash_lock);
> +}
> +
>  static void aca_get(struct ifacaddr6 *aca)
>  {
>  	refcount_inc(&aca->aca_refcnt);
> @@ -275,6 +366,13 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
>  		err = -ENOMEM;
>  		goto out;
>  	}
> +	err = ipv6_add_acaddr_hash(dev_net(idev->dev), addr);
> +	if (err) {
> +		fib6_info_release(f6i);
> +		fib6_info_release(f6i);

Double call to fib6_info_release() ? Why ?

> +		kfree(aca);
> +		goto out;
> +	}
>  
>  	aca->aca_next = idev->ac_list;
>  	idev->ac_list = aca;
> @@ -324,6 +422,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
>  		prev_aca->aca_next = aca->aca_next;
>  	else
>  		idev->ac_list = aca->aca_next;
> +	ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
>  	write_unlock_bh(&idev->lock);
>  	addrconf_leave_solict(idev, &aca->aca_addr);
>  
> @@ -350,6 +449,8 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
>  	write_lock_bh(&idev->lock);
>  	while ((aca = idev->ac_list) != NULL) {
>  		idev->ac_list = aca->aca_next;
> +		ipv6_del_acaddr_hash(dev_net(idev->dev), &aca->aca_addr);
> +
>  		write_unlock_bh(&idev->lock);
>  
>  		addrconf_leave_solict(idev, &aca->aca_addr);
> @@ -391,16 +492,22 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,
>  			 const struct in6_addr *addr)
>  {
>  	bool found = false;
> +	unsigned int hash = inet6_acaddr_hash(net, addr);
> +	struct ipv6_ac_addrlist *acal;

Reorder variable declaration in longest to shortest (reverse xmas tree),
per David Miller request :)

>  
>  	rcu_read_lock();
>  	if (dev)
>  		found = ipv6_chk_acast_dev(dev, addr);
>  	else
> -		for_each_netdev_rcu(net, dev)
> -			if (ipv6_chk_acast_dev(dev, addr)) {
> +		hlist_for_each_entry_rcu(acal, &inet6_acaddr_lst[hash],
> +					 acal_lst) {
> +			if (!net_eq(read_pnet(&acal->acal_pnet), net))
> +				continue;
> +			if (ipv6_addr_equal(&acal->acal_addr, addr)) {
>  				found = true;
>  				break;
>  			}
> +		}
>  	rcu_read_unlock();
>  	return found;
>  }
> @@ -539,4 +646,25 @@ void ac6_proc_exit(struct net *net)
>  {
>  	remove_proc_entry("anycast6", net->proc_net);
>  }
> +
> +/*	Init / cleanup code
> + */
> +int __init anycast_init(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < IN6_ADDR_HSIZE; i++)
> +		INIT_HLIST_HEAD(&inet6_acaddr_lst[i]);
> +	return 0;
> +}
> +
> +void anycast_cleanup(void)
> +{
> +	int i;
> +
> +	spin_lock(&acaddr_hash_lock);
> +	for (i = 0; i < IN6_ADDR_HSIZE; i++)
> +		WARN_ON(!hlist_empty(&inet6_acaddr_lst[i]));
> +	spin_unlock(&acaddr_hash_lock);
> +}
>  #endif
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ