lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 21 Dec 2022 17:37:26 +0100
From:   Jiri Slaby <jirislaby@...nel.org>
To:     Kuniyuki Iwashima <kuniyu@...zon.com>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>
Cc:     Joanne Koong <joannelkoong@...il.com>,
        Kuniyuki Iwashima <kuni1840@...il.com>, netdev@...r.kernel.org
Subject: Re: [PATCH RFC net 1/2] tcp: Add TIME_WAIT sockets in bhash2.

On 21. 12. 22, 16:12, Kuniyuki Iwashima wrote:
> Jiri Slaby reported regression of bind() with a simple repro. [0]
> 
> The repro creates a TIME_WAIT socket and tries to bind() a new socket
> with the same local address and port.  Before commit 28044fc1d495 ("net:
> Add a bhash2 table hashed by port and address"), the bind() failed with
> -EADDRINUSE, but now it succeeds.
> 
> The cited commit should have put TIME_WAIT sockets into bhash2; otherwise,
> inet_bhash2_conflict() misses TIME_WAIT sockets when validating bind()
> requests if the address is not a wildcard one.
> 
> [0]: https://lore.kernel.org/netdev/6b971a4e-c7d8-411e-1f92-fda29b5b2fb9@kernel.org/
> 
> Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
> Reported-by: Jiri Slaby <jirislaby@...nel.org>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>

Tested-by: Jiri Slaby <jirislaby@...nel.org>


> ---
>   include/net/inet_timewait_sock.h |  2 ++
>   include/net/sock.h               |  5 +++--
>   net/ipv4/inet_hashtables.c       |  5 +++--
>   net/ipv4/inet_timewait_sock.c    | 31 +++++++++++++++++++++++++++++--
>   4 files changed, 37 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
> index 5b47545f22d3..c46ed239ad9a 100644
> --- a/include/net/inet_timewait_sock.h
> +++ b/include/net/inet_timewait_sock.h
> @@ -44,6 +44,7 @@ struct inet_timewait_sock {
>   #define tw_bound_dev_if		__tw_common.skc_bound_dev_if
>   #define tw_node			__tw_common.skc_nulls_node
>   #define tw_bind_node		__tw_common.skc_bind_node
> +#define tw_bind2_node		__tw_common.skc_bind2_node
>   #define tw_refcnt		__tw_common.skc_refcnt
>   #define tw_hash			__tw_common.skc_hash
>   #define tw_prot			__tw_common.skc_prot
> @@ -73,6 +74,7 @@ struct inet_timewait_sock {
>   	u32			tw_priority;
>   	struct timer_list	tw_timer;
>   	struct inet_bind_bucket	*tw_tb;
> +	struct inet_bind2_bucket	*tw_tb2;
>   };
>   #define tw_tclass tw_tos
>   
> diff --git a/include/net/sock.h b/include/net/sock.h
> index dcd72e6285b2..aaec985c1b5b 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -156,6 +156,7 @@ typedef __u64 __bitwise __addrpair;
>    *	@skc_tw_rcv_nxt: (aka tw_rcv_nxt) TCP window next expected seq number
>    *		[union with @skc_incoming_cpu]
>    *	@skc_refcnt: reference count
> + *	@skc_bind2_node: bind node in the bhash2 table
>    *
>    *	This is the minimal network layer representation of sockets, the header
>    *	for struct sock and struct inet_timewait_sock.
> @@ -241,6 +242,7 @@ struct sock_common {
>   		u32		skc_window_clamp;
>   		u32		skc_tw_snd_nxt; /* struct tcp_timewait_sock */
>   	};
> +	struct hlist_node	skc_bind2_node;
>   	/* public: */
>   };
>   
> @@ -351,7 +353,6 @@ struct sk_filter;
>     *	@sk_txtime_report_errors: set report errors mode for SO_TXTIME
>     *	@sk_txtime_unused: unused txtime flags
>     *	@ns_tracker: tracker for netns reference
> -  *	@sk_bind2_node: bind node in the bhash2 table
>     */
>   struct sock {
>   	/*
> @@ -384,6 +385,7 @@ struct sock {
>   #define sk_net_refcnt		__sk_common.skc_net_refcnt
>   #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
>   #define sk_bind_node		__sk_common.skc_bind_node
> +#define sk_bind2_node		__sk_common.skc_bind2_node
>   #define sk_prot			__sk_common.skc_prot
>   #define sk_net			__sk_common.skc_net
>   #define sk_v6_daddr		__sk_common.skc_v6_daddr
> @@ -542,7 +544,6 @@ struct sock {
>   #endif
>   	struct rcu_head		sk_rcu;
>   	netns_tracker		ns_tracker;
> -	struct hlist_node	sk_bind2_node;
>   };
>   
>   enum sk_pacing {
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index d039b4e732a3..1e81dc7c6de4 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -1103,15 +1103,16 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
>   	/* Head lock still held and bh's disabled */
>   	inet_bind_hash(sk, tb, tb2, port);
>   
> -	spin_unlock(&head2->lock);
> -
>   	if (sk_unhashed(sk)) {
>   		inet_sk(sk)->inet_sport = htons(port);
>   		inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
>   	}
>   	if (tw)
>   		inet_twsk_bind_unhash(tw, hinfo);
> +
> +	spin_unlock(&head2->lock);
>   	spin_unlock(&head->lock);
> +
>   	if (tw)
>   		inet_twsk_deschedule_put(tw);
>   	local_bh_enable();
> diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
> index 66fc940f9521..bec037d9ab8e 100644
> --- a/net/ipv4/inet_timewait_sock.c
> +++ b/net/ipv4/inet_timewait_sock.c
> @@ -29,6 +29,7 @@
>   void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
>   			  struct inet_hashinfo *hashinfo)
>   {
> +	struct inet_bind2_bucket *tb2 = tw->tw_tb2;
>   	struct inet_bind_bucket *tb = tw->tw_tb;
>   
>   	if (!tb)
> @@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
>   	__hlist_del(&tw->tw_bind_node);
>   	tw->tw_tb = NULL;
>   	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
> +
> +	__hlist_del(&tw->tw_bind2_node);
> +	tw->tw_tb2 = NULL;
> +	inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
> +
>   	__sock_put((struct sock *)tw);
>   }
>   
> @@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
>   {
>   	struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
>   	spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
> -	struct inet_bind_hashbucket *bhead;
> +	struct inet_bind_hashbucket *bhead, *bhead2;
>   
>   	spin_lock(lock);
>   	sk_nulls_del_node_init_rcu((struct sock *)tw);
> @@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
>   	/* Disassociate with bind bucket. */
>   	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
>   			hashinfo->bhash_size)];
> +	bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw,
> +				       twsk_net(tw), tw->tw_num);
>   
>   	spin_lock(&bhead->lock);
> +	spin_lock(&bhead2->lock);
>   	inet_twsk_bind_unhash(tw, hashinfo);
> +	spin_unlock(&bhead2->lock);
>   	spin_unlock(&bhead->lock);
>   
>   	refcount_dec(&tw->tw_dr->tw_refcount);
> @@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
>   	hlist_add_head(&tw->tw_bind_node, list);
>   }
>   
> +static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
> +				     struct hlist_head *list)
> +{
> +	hlist_add_head(&tw->tw_bind2_node, list);
> +}
> +
>   /*
>    * Enter the time wait state. This is called with locally disabled BH.
>    * Essentially we whip up a timewait bucket, copy the relevant info into it
> @@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
>   	const struct inet_connection_sock *icsk = inet_csk(sk);
>   	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
>   	spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
> -	struct inet_bind_hashbucket *bhead;
> +	struct inet_bind_hashbucket *bhead, *bhead2;
> +
>   	/* Step 1: Put TW into bind hash. Original socket stays there too.
>   	   Note, that any socket with inet->num != 0 MUST be bound in
>   	   binding cache, even if it is closed.
>   	 */
>   	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
>   			hashinfo->bhash_size)];
> +	bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
> +
>   	spin_lock(&bhead->lock);
> +	spin_lock(&bhead2->lock);
> +
>   	tw->tw_tb = icsk->icsk_bind_hash;
>   	WARN_ON(!icsk->icsk_bind_hash);
>   	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
> +
> +	tw->tw_tb2 = icsk->icsk_bind2_hash;
> +	WARN_ON(!icsk->icsk_bind2_hash);
> +	inet_twsk_add_bind2_node(tw, &tw->tw_tb2->owners);
> +
> +	spin_unlock(&bhead2->lock);
>   	spin_unlock(&bhead->lock);
>   
>   	spin_lock(lock);

-- 
js
suse labs

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ