lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <m1ws14kwln.fsf@fess.ebiederm.org>
Date:	Thu, 03 Dec 2009 05:36:52 -0800
From:	ebiederm@...ssion.com (Eric W. Biederman)
To:	Eric Dumazet <eric.dumazet@...il.com>
Cc:	David Miller <davem@...emloft.net>, netdev@...r.kernel.org,
	jamal <hadi@...erus.ca>, Daniel Lezcano <dlezcano@...ibm.com>
Subject: Re: [PATCH 7/7] net: Batch inet_twsk_purge

Eric Dumazet <eric.dumazet@...il.com> writes:

> Eric W. Biederman a écrit :
>> From: Eric W. Biederman <ebiederm@...ssion.com>
>> 
>> This function walks the whole hashtable so there is no point in
>> passing it a network namespace.  Instead I purge all timewait
>> sockets from dead network namespaces that I find.  If the namespace
>> is one of the once I am trying to purge I am guaranteed no new timewait
>> sockets can be formed so this will get them all.  If the namespace
>> is one I am not acting for it might form a few more but I will
>> call inet_twsk_purge again and  shortly to get rid of them.  In
>> any even if the network namespace is dead timewait sockets are
>> useless.
>> 
>> Move the calls of inet_twsk_purge into batch_exit routines so
>> that if I am killing a bunch of namespaces at once I will just
>> call inet_twsk_purge once and save a lot of redundant unnecessary
>> work.
>> 
>> My simple 4k network namespace exit test the cleanup time dropped from
>> roughly 8.2s to 1.6s.  While the time spent running inet_twsk_purge fell
>> to about 2ms.  1ms for ipv4 and 1ms for ipv6.
>> 
>> Signed-off-by: Eric W. Biederman <ebiederm@...ssion.com>
>> ---
>>  include/net/inet_timewait_sock.h |    6 +++---
>>  net/ipv4/inet_timewait_sock.c    |   10 +++++-----
>>  net/ipv4/tcp_ipv4.c              |   11 ++++++++---
>>  net/ipv6/tcp_ipv6.c              |   11 ++++++++---
>>  4 files changed, 24 insertions(+), 14 deletions(-)
>> 
>> diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
>> index 773b10f..4fd007f 100644
>> --- a/include/net/inet_timewait_sock.h
>> +++ b/include/net/inet_timewait_sock.h
>> @@ -212,14 +212,14 @@ extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
>>  extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
>>  				 struct inet_timewait_death_row *twdr);
>>  
>> -extern void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
>> +extern void inet_twsk_purge(struct inet_hashinfo *hashinfo,
>>  			    struct inet_timewait_death_row *twdr, int family);
>>  
>>  static inline
>>  struct net *twsk_net(const struct inet_timewait_sock *twsk)
>>  {
>>  #ifdef CONFIG_NET_NS
>> -	return twsk->tw_net;
>> +	return rcu_dereference(twsk->tw_net);
>>  #else
>>  	return &init_net;
>>  #endif
>> @@ -229,7 +229,7 @@ static inline
>>  void twsk_net_set(struct inet_timewait_sock *twsk, struct net *net)
>>  {
>>  #ifdef CONFIG_NET_NS
>> -	twsk->tw_net = net;
>> +	rcu_assign_pointer(twsk->tw_net, net);
>>  #endif
>>  }
>>  #endif	/* _INET_TIMEWAIT_SOCK_ */
>> diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
>> index 683ecec..a3699ac 100644
>> --- a/net/ipv4/inet_timewait_sock.c
>> +++ b/net/ipv4/inet_timewait_sock.c
>> @@ -421,7 +421,7 @@ out:
>>  
>>  EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
>>  
>> -void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
>> +void inet_twsk_purge(struct inet_hashinfo *hashinfo,
>>  		     struct inet_timewait_death_row *twdr, int family)
>>  {
>>  	struct inet_timewait_sock *tw;
>> @@ -436,15 +436,15 @@ restart_rcu:
>>  restart:
>>  		sk_nulls_for_each_rcu(sk, node, &head->twchain) {
>>  			tw = inet_twsk(sk);
>> -			if (!net_eq(twsk_net(tw), net) ||
>> -			    tw->tw_family != family)
>> +			if ((tw->tw_family != family) ||
>> +				atomic_read(&twsk_net(tw)->count))
>>  				continue;
>>  
>>  			if (unlikely(!atomic_inc_not_zero(&tw->tw_refcnt)))
>>  				continue;
>>  
>> -			if (unlikely(!net_eq(twsk_net(tw), net) ||
>> -				     tw->tw_family != family)) {
>> +			if (unlikely((tw->tw_family != family) ||
>> +				     atomic_read(&twsk_net(tw)->count))) {
>>  				inet_twsk_put(tw);
>>  				goto restart;
>>  			}
>> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
>> index df18ce0..e30f026 100644
>> --- a/net/ipv4/tcp_ipv4.c
>> +++ b/net/ipv4/tcp_ipv4.c
>> @@ -2468,12 +2468,17 @@ static int __net_init tcp_sk_init(struct net *net)
>>  static void __net_exit tcp_sk_exit(struct net *net)
>>  {
>>  	inet_ctl_sock_destroy(net->ipv4.tcp_sock);
>> -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET);
>> +}
>> +
>> +static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
>> +{
>> +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
>>  }
>>  
>>  static struct pernet_operations __net_initdata tcp_sk_ops = {
>> -       .init = tcp_sk_init,
>> -       .exit = tcp_sk_exit,
>> +       .init	   = tcp_sk_init,
>> +       .exit	   = tcp_sk_exit,
>> +       .exit_batch = tcp_sk_exit_batch,
>>  };
>>  
>>  void __init tcp_v4_init(void)
>> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
>> index de70909..5f46d36 100644
>> --- a/net/ipv6/tcp_ipv6.c
>> +++ b/net/ipv6/tcp_ipv6.c
>> @@ -2126,12 +2126,17 @@ static int tcpv6_net_init(struct net *net)
>>  static void tcpv6_net_exit(struct net *net)
>>  {
>>  	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
>> -	inet_twsk_purge(net, &tcp_hashinfo, &tcp_death_row, AF_INET6);
>> +}
>> +
>> +static void tcpv6_net_exit_batch(struct list_head *net_exit_list)
>> +{
>> +	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
>>  }
>>  
>>  static struct pernet_operations tcpv6_net_ops = {
>> -	.init = tcpv6_net_init,
>> -	.exit = tcpv6_net_exit,
>> +	.init	    = tcpv6_net_init,
>> +	.exit	    = tcpv6_net_exit,
>> +	.exit_batch = tcpv6_net_exit_batch,
>>  };
>>  
>>  int __init tcpv6_init(void)
>
>
> OK, but why calling inet_twsk_purge() twice, one for AF_INET, once for AF_INET6
>
> I believe you could zap family check as well in inet_twsk_purge(), and not
> need tcpv6_net_ops.exit_batch = tcpv6_net_exit_batch

Technically it is needed if someone does rmmod ipv6.  rmmod ipv6 didn't seem
to work for me, but if it ever does...  That and the cost is now in the noise
in human terms.

I think dccp may also need a inet_twsk_purge as well, but I couldn't figure out what
it was doing with timewait sockets.

Eric


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ