[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4B16830B.4010801@gmail.com>
Date: Wed, 02 Dec 2009 16:08:59 +0100
From: Eric Dumazet <eric.dumazet@...il.com>
To: David Miller <davem@...emloft.net>
CC: kdakhane@...il.com, netdev@...r.kernel.org,
netfilter@...r.kernel.org, zbr@...emap.net,
Evgeniy Polyakov <zbr@...emap.net>
Subject: [PATCH net-next-2.6] tcp: connect() race with timewait reuse
Eric Dumazet a écrit :
> Eric Dumazet a écrit :
>> But even if sysctl_tw_reuse is cleared, we might trigger the bug if
>> local port is bound to a value.
>
> Oh well, that's more subtle than that.
>
> __inet_check_established() is called not only with bh disabled,
> but also with a lock on bind list if twp != NULL.
>
> However, if twp is NULL, lock is not held by caller.
>
> [ Thats the final
> ret = check_established(death_row, sk, snum, NULL);
> in __inet_hash_connect()]
>
> So triggering this bug with tw_reuse clear is tricky :
>
> You need several threads, using sockets with REUSEADDR set,
> and bind() to same address/port before connect() to same target.
>
> We need another patch to correct this.
>
Here is a separate patch for this issue, cooked on top of net-next-2.6
for testing purposes, and public discussion.
Thanks
[PATCH net-next-2.6] tcp: connect() race with timewait reuse
Its currently possible that several threads issuing a connect() find the same
timewait socket and try to reuse it, leading to list corruptions.
Condition for bug is that these threads bound their socket on same address/port
of to be found timewait socket, and connected to same target. (SO_REUSEADDR needed)
To fix this problem, we could unhash timewait socket while holding ehash lock,
to make sure lookups/changes will be serialized. Only first one find the timewait
socket, other ones find the established socket and return an EADDRNOTAVAIL error.
Signed-off-by: Eric Dumazet <eric.dumazet@...il.com>
---
include/net/inet_timewait_sock.h | 2 +
net/ipv4/inet_hashtables.c | 7 +++--
net/ipv4/inet_timewait_sock.c | 36 ++++++++++++++++++++---------
net/ipv6/inet6_hashtables.c | 12 +++++----
4 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 773b10f..59c80a0 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -199,6 +199,8 @@ static inline __be32 inet_rcv_saddr(const struct sock *sk)
extern void inet_twsk_put(struct inet_timewait_sock *tw);
+extern void inet_twsk_unhash(struct inet_timewait_sock *tw);
+
extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
const int state);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 94ef51a..143ddb4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -318,20 +318,21 @@ unique:
sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
+ if (tw) {
+ inet_twsk_unhash(tw);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ }
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) {
*twp = tw;
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
-
return 0;
not_unique:
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 1f5d508..680d09b 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -14,6 +14,21 @@
#include <net/inet_timewait_sock.h>
#include <net/ip.h>
+
+/*
+ * unhash a timewait socket from established hash
+ * lock must be hold by caller
+ */
+void inet_twsk_unhash(struct inet_timewait_sock *tw)
+{
+ if (hlist_nulls_unhashed(&tw->tw_node))
+ return;
+
+ hlist_nulls_del_rcu(&tw->tw_node);
+ sk_nulls_node_init(&tw->tw_node);
+ inet_twsk_put(tw);
+}
+
/* Must be called with locally disabled BHs. */
static void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
@@ -24,12 +39,9 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
spin_lock(lock);
- if (hlist_nulls_unhashed(&tw->tw_node)) {
- spin_unlock(lock);
- return;
- }
- hlist_nulls_del_rcu(&tw->tw_node);
- sk_nulls_node_init(&tw->tw_node);
+
+ inet_twsk_unhash(tw);
+
spin_unlock(lock);
/* Disassociate with bind bucket. */
@@ -37,9 +49,11 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
- __hlist_del(&tw->tw_bind_node);
- tw->tw_tb = NULL;
- inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+ if (tb) {
+ __hlist_del(&tw->tw_bind_node);
+ tw->tw_tb = NULL;
+ inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+ }
spin_unlock(&bhead->lock);
#ifdef SOCK_REFCNT_DEBUG
if (atomic_read(&tw->tw_refcnt) != 1) {
@@ -47,7 +61,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
}
#endif
- inet_twsk_put(tw);
+ if (tb)
+ inet_twsk_put(tw);
}
static noinline void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -92,6 +107,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+ atomic_inc(&tw->tw_refcnt);
spin_unlock(&bhead->lock);
spin_lock(lock);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 00c6a3e..3681c00 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -250,19 +250,21 @@ unique:
* in hash table socket with a funny identity. */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
+ sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
- sk->sk_hash = hash;
+ if (tw) {
+ inet_twsk_unhash(tw);
+ NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
+ }
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- if (twp != NULL) {
+ if (twp) {
*twp = tw;
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
- } else if (tw != NULL) {
+ } else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule(tw, death_row);
- NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
inet_twsk_put(tw);
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists