lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210505045618.flihfg3hcesdyfak@kafai-mbp.dhcp.thefacebook.com>
Date:   Tue, 4 May 2021 21:56:18 -0700
From:   Martin KaFai Lau <kafai@...com>
To:     Kuniyuki Iwashima <kuniyu@...zon.co.jp>
CC:     "David S . Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Eric Dumazet <edumazet@...gle.com>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>,
        Benjamin Herrenschmidt <benh@...zon.com>,
        Kuniyuki Iwashima <kuni1840@...il.com>, <bpf@...r.kernel.org>,
        <netdev@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v4 bpf-next 06/11] tcp: Migrate TCP_NEW_SYN_RECV requests
 at retransmitting SYN+ACKs.

On Tue, Apr 27, 2021 at 12:46:18PM +0900, Kuniyuki Iwashima wrote:
[ ... ]

> diff --git a/net/core/request_sock.c b/net/core/request_sock.c
> index 82cf9fbe2668..08c37ecd923b 100644
> --- a/net/core/request_sock.c
> +++ b/net/core/request_sock.c
> @@ -151,6 +151,7 @@ struct request_sock *reqsk_clone(struct request_sock *req, struct sock *sk)
>  	memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
>  	       req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
>  
> +	sk_node_init(&nreq_sk->sk_node);
This belongs to patch 5.
"rsk_refcnt" also needs to be 0 instead of staying uninitialized
after reqsk_clone() returned.

>  	nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
>  #ifdef CONFIG_XPS
>  	nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 851992405826..dc984d1f352e 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -695,10 +695,20 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
>  }
>  EXPORT_SYMBOL(inet_rtx_syn_ack);
>  
> +static void reqsk_queue_migrated(struct request_sock_queue *queue,
> +				 const struct request_sock *req)
> +{
> +	if (req->num_timeout == 0)
> +		atomic_inc(&queue->young);
> +	atomic_inc(&queue->qlen);
> +}
> +
>  static void reqsk_migrate_reset(struct request_sock *req)
>  {
> +	req->saved_syn = NULL;
> +	inet_rsk(req)->ireq_opt = NULL;
>  #if IS_ENABLED(CONFIG_IPV6)
> -	inet_rsk(req)->ipv6_opt = NULL;
> +	inet_rsk(req)->pktopts = NULL;
>  #endif
>  }
>  
> @@ -741,16 +751,37 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
>  
>  static void reqsk_timer_handler(struct timer_list *t)
>  {
> -	struct request_sock *req = from_timer(req, t, rsk_timer);
> -	struct sock *sk_listener = req->rsk_listener;
> -	struct net *net = sock_net(sk_listener);
> -	struct inet_connection_sock *icsk = inet_csk(sk_listener);
> -	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
> +	struct request_sock *req = from_timer(req, t, rsk_timer), *nreq = NULL, *oreq = req;
nit. This line is too long.
Lets move the new "*nreq" and "*oreg" to a new line and keep the current
"*req" line as is:
	struct request_sock *req = from_timer(req, t, rsk_timer);
	struct request_sock *oreq = req, *nreq = NULL;

> +	struct sock *sk_listener = req->rsk_listener, *nsk = NULL;
"*nsk" can be moved into the following "!= TCP_LISTEN" case below.
Keep the current "*sk_listener" line as is.

> +	struct inet_connection_sock *icsk;
> +	struct request_sock_queue *queue;
> +	struct net *net;
>  	int max_syn_ack_retries, qlen, expire = 0, resend = 0;
>  
> -	if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
> -		goto drop;
> +	if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {

		struct sock *nsk;

> +		nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
> +		if (!nsk)
> +			goto drop;
> +
> +		nreq = reqsk_clone(req, nsk);
> +		if (!nreq)
> +			goto drop;
> +
> +		/* The new timer for the cloned req can decrease the 2
> +		 * by calling inet_csk_reqsk_queue_drop_and_put(), so
> +		 * hold another count to prevent use-after-free and
> +		 * call reqsk_put() just before return.
> +		 */
> +		refcount_set(&nreq->rsk_refcnt, 2 + 1);
> +		timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
> +		reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
> +
> +		req = nreq;
> +		sk_listener = nsk;
> +	}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ