lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 17 Jun 2024 10:59:07 -0700
From: Kuniyuki Iwashima <kuniyu@...zon.com>
To: <luoxuanqiang@...inos.cn>
CC: <alexandre.ferrieux@...nge.com>, <davem@...emloft.net>,
	<dccp@...r.kernel.org>, <dsahern@...nel.org>, <edumazet@...gle.com>,
	<fw@...len.de>, <kuba@...nel.org>, <kuniyu@...zon.com>,
	<linux-kernel@...r.kernel.org>, <netdev@...r.kernel.org>, <pabeni@...hat.com>
Subject: Re: [PATCH net v3] Fix race for duplicate reqsk on identical SYN

From: luoxuanqiang <luoxuanqiang@...inos.cn>
Date: Mon, 17 Jun 2024 15:56:40 +0800
> When bonding is configured in BOND_MODE_BROADCAST mode, if two identical
> SYN packets are received at the same time and processed on different CPUs,
> it can potentially create the same sk (sock) but two different reqsk
> (request_sock) in tcp_conn_request().
> 
> These two different reqsk will respond with two SYNACK packets, and since
> the generation of the seq (ISN) incorporates a timestamp, the final two
> SYNACK packets will have different seq values.
> 
> The consequence is that when the Client receives and replies with an ACK
> to the earlier SYNACK packet, we will reset(RST) it.
> 
> ========================================================================
> 
> This behavior is consistently reproducible in my local setup,
> which comprises:
> 
>                   | NETA1 ------ NETB1 |
> PC_A --- bond --- |                    | --- bond --- PC_B
>                   | NETA2 ------ NETB2 |
> 
> - PC_A is the Server and has two network cards, NETA1 and NETA2. I have
>   bonded these two cards using BOND_MODE_BROADCAST mode and configured
>   them to be handled by different CPU.
> 
> - PC_B is the Client, also equipped with two network cards, NETB1 and
>   NETB2, which are also bonded and configured in BOND_MODE_BROADCAST mode.
> 
> If the client attempts a TCP connection to the server, it might encounter
> a failure. Capturing packets from the server side reveals:
> 
> 10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
> 10.10.10.10.45182 > localhost: Flags [S], seq 320236027,
> localhost > 10.10.10.10.45182: Flags [S.], seq 2967855116,
> localhost > 10.10.10.10.45182: Flags [S.], seq 2967855123, <==
> 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
> 10.10.10.10.45182 > localhost: Flags [.], ack 4294967290,
> localhost > 10.10.10.10.45182: Flags [R], seq 2967855117, <==
> localhost > 10.10.10.10.45182: Flags [R], seq 2967855117,
> 
> Two SYNACKs with different seq numbers are sent by localhost,
> resulting in an anomaly.
> 
> ========================================================================
> 
> The attempted solution is as follows:
> In the tcp_conn_request(), while inserting reqsk into the ehash table,
> it also checks if an entry already exists. If found, it avoids
> reinsertion and releases it.
> 
> Simultaneously, In the reqsk_queue_hash_req(), the start of the
> req->rsk_timer is adjusted to be after successful insertion.
> 
> Signed-off-by: luoxuanqiang <luoxuanqiang@...inos.cn>
> ---
>  include/net/inet_connection_sock.h |  4 ++--
>  net/dccp/ipv4.c                    |  2 +-
>  net/dccp/ipv6.c                    |  2 +-
>  net/ipv4/inet_connection_sock.c    | 19 +++++++++++++------
>  net/ipv4/tcp_input.c               |  9 ++++++++-
>  5 files changed, 25 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
> index 7d6b1254c92d..8ebab6220dbc 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -263,8 +263,8 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
>  struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
>  				      struct request_sock *req,
>  				      struct sock *child);
> -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
> -				   unsigned long timeout);
> +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
> +				   unsigned long timeout, bool *found_dup_sk);
>  struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
>  					 struct request_sock *req,
>  					 bool own_req);
> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
> index ff41bd6f99c3..13aafdeb9205 100644
> --- a/net/dccp/ipv4.c
> +++ b/net/dccp/ipv4.c
> @@ -657,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>  	if (dccp_v4_send_response(sk, req))
>  		goto drop_and_free;
>  
> -	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
> +	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT, NULL);
>  	reqsk_put(req);
>  	return 0;
>  
> diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
> index 85f4b8fdbe5e..493cdb12ce2b 100644
> --- a/net/dccp/ipv6.c
> +++ b/net/dccp/ipv6.c
> @@ -400,7 +400,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
>  	if (dccp_v6_send_response(sk, req))
>  		goto drop_and_free;
>  
> -	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
> +	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT, NULL);
>  	reqsk_put(req);
>  	return 0;
>  
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index d81f74ce0f02..2fa9b33ae26a 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -1122,25 +1122,32 @@ static void reqsk_timer_handler(struct timer_list *t)
>  	inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
>  }
>  
> -static void reqsk_queue_hash_req(struct request_sock *req,
> -				 unsigned long timeout)
> +static bool reqsk_queue_hash_req(struct request_sock *req,
> +				 unsigned long timeout, bool *found_dup_sk)
>  {

Given any changes here in reqsk_queue_hash_req() conflicts with 4.19
(oldest stable) and DCCP does not check found_dup_sk, you can define
found_dup_sk here, then you need not touch DCCP at all.


> +	if (!inet_ehash_insert(req_to_sk(req), NULL, found_dup_sk))
> +		return false;
> +
> +	/* The timer needs to be setup after a successful insertion. */
>  	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
>  	mod_timer(&req->rsk_timer, jiffies + timeout);
>  
> -	inet_ehash_insert(req_to_sk(req), NULL, NULL);
>  	/* before letting lookups find us, make sure all req fields
>  	 * are committed to memory and refcnt initialized.
>  	 */
>  	smp_wmb();
>  	refcount_set(&req->rsk_refcnt, 2 + 1);
> +	return true;
>  }
>  
> -void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
> -				   unsigned long timeout)
> +bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
> +				   unsigned long timeout, bool *found_dup_sk)
>  {
> -	reqsk_queue_hash_req(req, timeout);
> +	if (!reqsk_queue_hash_req(req, timeout, found_dup_sk))
> +		return false;
> +
>  	inet_csk_reqsk_queue_added(sk);
> +	return true;
>  }
>  EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
>  
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 9c04a9c8be9d..e006c374f781 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -7255,8 +7255,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>  	} else {
>  		tcp_rsk(req)->tfo_listener = false;
>  		if (!want_cookie) {
> +			bool found_dup_sk = false;
> +
>  			req->timeout = tcp_timeout_init((struct sock *)req);
> -			inet_csk_reqsk_queue_hash_add(sk, req, req->timeout);
> +			if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, req->timeout,
> +								    &found_dup_sk))) {
> +				reqsk_free(req);
> +				return 0;
> +			}
> +
>  		}
>  		af_ops->send_synack(sk, dst, &fl, req, &foc,
>  				    !want_cookie ? TCP_SYNACK_NORMAL :
> -- 
> 2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ