lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 02 Dec 2009 16:08:59 +0100
From:	Eric Dumazet <eric.dumazet@...il.com>
To:	David Miller <davem@...emloft.net>
CC:	kdakhane@...il.com, netdev@...r.kernel.org,
	netfilter@...r.kernel.org, zbr@...emap.net,
	Evgeniy Polyakov <zbr@...emap.net>
Subject: [PATCH net-next-2.6] tcp: connect() race with timewait reuse

Eric Dumazet a écrit :
> Eric Dumazet a écrit :
>> But even if sysctl_tw_reuse is cleared, we might trigger the bug if
>> local port is bound to a value.
> 
> Oh well, that's more subtle than that.
> 
> __inet_check_established() is called not only with bh disabled,
> but also with a lock on bind list if twp != NULL.
> 
> However, if twp is NULL, lock is not held by caller.
> 
> [ Thats the final
>   ret = check_established(death_row, sk, snum, NULL);
>   in __inet_hash_connect()]
> 
> So triggering this bug with tw_reuse clear is tricky :
> 
> You need several threads, using sockets with REUSEADDR set,
> and bind() to same address/port before connect() to same target.
> 
> We need another patch to correct this.
> 

Here is a separate patch for this issue, cooked on top of net-next-2.6
for testing purposes, and public discussion.

Thanks

[PATCH net-next-2.6] tcp: connect() race with timewait reuse

Its currently possible that several threads issuing a connect() find the same
timewait socket and try to reuse it, leading to list corruptions.

Condition for bug is that these threads bound their socket on same address/port
of to be found timewait socket, and connected to same target. (SO_REUSEADDR needed)

To fix this problem, we could unhash timewait socket while holding ehash lock,
to make sure lookups/changes will be serialized. Only first one find the timewait
socket, other ones find the established socket and return an EADDRNOTAVAIL error.

Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
 include/net/inet_timewait_sock.h |    2 +
 net/ipv4/inet_hashtables.c       |    7 +++--
 net/ipv4/inet_timewait_sock.c    |   36 ++++++++++++++++++++---------
 net/ipv6/inet6_hashtables.c      |   12 +++++----
 4 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 773b10f..59c80a0 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -199,6 +199,8 @@ static inline __be32 inet_rcv_saddr(const struct sock *sk)
 
 extern void inet_twsk_put(struct inet_timewait_sock *tw);
 
+extern void inet_twsk_unhash(struct inet_timewait_sock *tw);
+
 extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 						  const int state);
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 94ef51a..143ddb4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -318,20 +318,21 @@ unique:
 	sk->sk_hash = hash;
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
+	if (tw) {
+		inet_twsk_unhash(tw);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+	}
 	spin_unlock(lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
 	if (twp) {
 		*twp = tw;
-		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 	} else if (tw) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
-
 	return 0;
 
 not_unique:
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1f5d508..680d09b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -14,6 +14,21 @@
 #include <net/inet_timewait_sock.h>
 #include <net/ip.h>
 
+
+/*
+ * unhash a timewait socket from established hash
+ * lock must be hold by caller
+ */
+void inet_twsk_unhash(struct inet_timewait_sock *tw)
+{
+	if (hlist_nulls_unhashed(&tw->tw_node))
+		return;
+
+	hlist_nulls_del_rcu(&tw->tw_node);
+	sk_nulls_node_init(&tw->tw_node);
+	inet_twsk_put(tw);
+}
+
 /* Must be called with locally disabled BHs. */
 static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 			     struct inet_hashinfo *hashinfo)
@@ -24,12 +39,9 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 	spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
 
 	spin_lock(lock);
-	if (hlist_nulls_unhashed(&tw->tw_node)) {
-		spin_unlock(lock);
-		return;
-	}
-	hlist_nulls_del_rcu(&tw->tw_node);
-	sk_nulls_node_init(&tw->tw_node);
+
+	inet_twsk_unhash(tw);
+
 	spin_unlock(lock);
 
 	/* Disassociate with bind bucket. */
@@ -37,9 +49,11 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 			hashinfo->bhash_size)];
 	spin_lock(&bhead->lock);
 	tb = tw->tw_tb;
-	__hlist_del(&tw->tw_bind_node);
-	tw->tw_tb = NULL;
-	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+	if (tb) {
+		__hlist_del(&tw->tw_bind_node);
+		tw->tw_tb = NULL;
+		inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+	}
 	spin_unlock(&bhead->lock);
 #ifdef SOCK_REFCNT_DEBUG
 	if (atomic_read(&tw->tw_refcnt) != 1) {
@@ -47,7 +61,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
 		       tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
 	}
 #endif
-	inet_twsk_put(tw);
+	if (tb)
+		inet_twsk_put(tw);
 }
 
 static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -92,6 +107,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	tw->tw_tb = icsk->icsk_bind_hash;
 	WARN_ON(!icsk->icsk_bind_hash);
 	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+	atomic_inc(&tw->tw_refcnt);
 	spin_unlock(&bhead->lock);
 
 	spin_lock(lock);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 00c6a3e..3681c00 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -250,19 +250,21 @@ unique:
 	 * in hash table socket with a funny identity. */
 	inet->inet_num = lport;
 	inet->inet_sport = htons(lport);
+	sk->sk_hash = hash;
 	WARN_ON(!sk_unhashed(sk));
 	__sk_nulls_add_node_rcu(sk, &head->chain);
-	sk->sk_hash = hash;
+	if (tw) {
+		inet_twsk_unhash(tw);
+		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+	}
 	spin_unlock(lock);
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
-	if (twp != NULL) {
+	if (twp) {
 		*twp = tw;
-		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw != NULL) {
+	} else if (tw) {
 		/* Silly. Should hash-dance instead... */
 		inet_twsk_deschedule(tw, death_row);
-		NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
 
 		inet_twsk_put(tw);
 	}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ