lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Thu, 22 Dec 2022 00:12:57 +0900 From: Kuniyuki Iwashima <kuniyu@...zon.com> To: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com> CC: Jiri Slaby <jirislaby@...nel.org>, Joanne Koong <joannelkoong@...il.com>, Kuniyuki Iwashima <kuniyu@...zon.com>, Kuniyuki Iwashima <kuni1840@...il.com>, <netdev@...r.kernel.org> Subject: [PATCH RFC net 1/2] tcp: Add TIME_WAIT sockets in bhash2. Jiri Slaby reported regression of bind() with a simple repro. [0] The repro creates a TIME_WAIT socket and tries to bind() a new socket with the same local address and port. Before commit 28044fc1d495 ("net: Add a bhash2 table hashed by port and address"), the bind() failed with -EADDRINUSE, but now it succeeds. The cited commit should have put TIME_WAIT sockets into bhash2; otherwise, inet_bhash2_conflict() misses TIME_WAIT sockets when validating bind() requests if the address is not a wildcard one. [0]: https://lore.kernel.org/netdev/6b971a4e-c7d8-411e-1f92-fda29b5b2fb9@kernel.org/ Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Reported-by: Jiri Slaby <jirislaby@...nel.org> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com> --- include/net/inet_timewait_sock.h | 2 ++ include/net/sock.h | 5 +++-- net/ipv4/inet_hashtables.c | 5 +++-- net/ipv4/inet_timewait_sock.c | 31 +++++++++++++++++++++++++++++-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b47545f22d3..c46ed239ad9a 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -44,6 +44,7 @@ struct inet_timewait_sock { #define tw_bound_dev_if __tw_common.skc_bound_dev_if #define tw_node __tw_common.skc_nulls_node #define tw_bind_node __tw_common.skc_bind_node +#define tw_bind2_node __tw_common.skc_bind2_node #define tw_refcnt __tw_common.skc_refcnt #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot @@ -73,6 +74,7 @@ struct inet_timewait_sock { u32 tw_priority; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; + struct inet_bind2_bucket *tw_tb2; }; #define tw_tclass tw_tos diff --git a/include/net/sock.h b/include/net/sock.h index dcd72e6285b2..aaec985c1b5b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -156,6 +156,7 @@ typedef __u64 __bitwise __addrpair; * @skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number * [union with @skc_incoming_cpu] * @skc_refcnt: reference count + * @skc_bind2_node: bind node in the bhash2 table * * This is the minimal network layer representation of sockets, the header * for struct sock and struct inet_timewait_sock. @@ -241,6 +242,7 @@ struct sock_common { u32 skc_window_clamp; u32 skc_tw_snd_nxt; /* struct tcp_timewait_sock */ }; + struct hlist_node skc_bind2_node; /* public: */ }; @@ -351,7 +353,6 @@ struct sk_filter; * @sk_txtime_report_errors: set report errors mode for SO_TXTIME * @sk_txtime_unused: unused txtime flags * @ns_tracker: tracker for netns reference - * @sk_bind2_node: bind node in the bhash2 table */ struct sock { /* @@ -384,6 +385,7 @@ struct sock { #define sk_net_refcnt __sk_common.skc_net_refcnt #define sk_bound_dev_if __sk_common.skc_bound_dev_if #define sk_bind_node __sk_common.skc_bind_node +#define sk_bind2_node __sk_common.skc_bind2_node #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net #define sk_v6_daddr __sk_common.skc_v6_daddr @@ -542,7 +544,6 @@ struct sock { #endif struct rcu_head sk_rcu; netns_tracker ns_tracker; - struct hlist_node sk_bind2_node; }; enum sk_pacing { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d039b4e732a3..1e81dc7c6de4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -1103,15 +1103,16 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); - spin_unlock(&head2->lock); - if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); + + spin_unlock(&head2->lock); spin_unlock(&head->lock); + if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 66fc940f9521..bec037d9ab8e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -29,6 +29,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { + struct inet_bind2_bucket *tb2 = tw->tw_tb2; struct inet_bind_bucket *tb = tw->tw_tb; if (!tb) @@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + + __hlist_del(&tw->tw_bind2_node); + tw->tw_tb2 = NULL; + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + __sock_put((struct sock *)tw); } @@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; spin_lock(lock); sk_nulls_del_node_init_rcu((struct sock *)tw); @@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, + twsk_net(tw), tw->tw_num); spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); inet_twsk_bind_unhash(tw, hashinfo); + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); refcount_dec(&tw->tw_dr->tw_refcount); @@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, hlist_add_head(&tw->tw_bind_node, list); } +static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind2_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; + /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); + spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); + tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + + tw->tw_tb2 = icsk->icsk_bind2_hash; + WARN_ON(!icsk->icsk_bind2_hash); + inet_twsk_add_bind2_node(tw, &tw->tw_tb2->owners); + + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); spin_lock(lock); -- 2.30.2
Powered by blists - more mailing lists