lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230124175712.38112-1-kuniyu@amazon.com>
Date:   Tue, 24 Jan 2023 09:57:12 -0800
From:   Kuniyuki Iwashima <kuniyu@...zon.com>
To:     <tkhai@...ru>
CC:     <davem@...emloft.net>, <edumazet@...gle.com>, <gorcunov@...il.com>,
        <kuba@...nel.org>, <kuniyu@...zon.com>, <netdev@...r.kernel.org>,
        <pabeni@...hat.com>
Subject: Re: [PATCH net-next] unix: Guarantee sk_state relevance in case of it was assigned by a task on other cpu

From:   Kirill Tkhai <tkhai@...ru>
Date:   Mon, 23 Jan 2023 01:21:20 +0300
> Some functions use unlocked check for sock::sk_state. This does not guarantee
> a new value assigned to sk_state on some CPU is already visible on this CPU.
> 
> Example:
> 
> [CPU0:Task0]                    [CPU1:Task1]
> unix_listen()
>   unix_state_lock(sk);
>   sk->sk_state = TCP_LISTEN;
>   unix_state_unlock(sk);
>                                 unix_accept()
>                                   if (sk->sk_state != TCP_LISTEN) /* not visible */
>                                      goto out;                    /* return error */
> 
> Task1 may miss new sk->sk_state value, and then unix_accept() returns error.
> Since in this situation unix_accept() is called chronologically later, such
> behavior is not obvious and it is wrong.

Have you seen this on a real workload ?

It sounds like a userspace bug that accept() is called on a different
CPU before listen() returns.  At least, accept() is fetching sk at the
same time, then I think there should be no guarantee that sk_state is
TCP_LISTEN.

Same for other TCP_ESTABLISHED tests, it seems a program is calling
sendmsg()/recvmsg() when sk is still TCP_CLOSE and betting concurrent
connect() will finish earlier.


> 
> This patch aims to fix the problem. A new function unix_sock_state() is
> introduced, and it makes sure a user never misses a new state assigned just
> before the function is called. We will use it in the places, where unlocked
> sk_state dereferencing was used before.
> 
> Note, that there remain some more places with sk_state unfixed. Also, the same
> problem is with unix_peer(). This will be a subject for future patches.
> 
> Signed-off-by: Kirill Tkhai <tkhai@...ru>
> ---
>  net/unix/af_unix.c |   43 +++++++++++++++++++++++++++++++------------
>  1 file changed, 31 insertions(+), 12 deletions(-)
> 
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 009616fa0256..f53e09a0753b 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -247,6 +247,28 @@ struct sock *unix_peer_get(struct sock *s)
>  }
>  EXPORT_SYMBOL_GPL(unix_peer_get);
>  
> +/* This function returns current sk->sk_state guaranteeing
> + * its relevance in case of assignment was made on other CPU.
> + */
> +static unsigned char unix_sock_state(struct sock *sk)
> +{
> +	unsigned char s_state = READ_ONCE(sk->sk_state);
> +
> +	/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
> +	 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
> +	 * We may avoid taking the lock in case of those states are
> +	 * already visible.
> +	 */
> +	if ((s_state == TCP_ESTABLISHED || s_state == TCP_LISTEN)
> +	    && sk->sk_type != SOCK_DGRAM)
> +		return s_state;
> +
> +	unix_state_lock(sk);
> +	s_state = sk->sk_state;
> +	unix_state_unlock(sk);
> +	return s_state;
> +}
> +
>  static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
>  					     int addr_len)
>  {
> @@ -812,13 +834,9 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
>  	int nr_fds = 0;
>  
>  	if (sk) {
> -		s_state = READ_ONCE(sk->sk_state);
> +		s_state = unix_sock_state(sk);
>  		u = unix_sk(sk);
>  
> -		/* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
> -		 * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
> -		 * SOCK_DGRAM is ordinary. So, no lock is needed.
> -		 */
>  		if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
>  			nr_fds = atomic_read(&u->scm_stat.nr_fds);
>  		else if (s_state == TCP_LISTEN)
> @@ -1686,7 +1704,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
>  		goto out;
>  
>  	err = -EINVAL;
> -	if (sk->sk_state != TCP_LISTEN)
> +	if (unix_sock_state(sk) != TCP_LISTEN)
>  		goto out;
>  
>  	/* If socket state is TCP_LISTEN it cannot change (for now...),
> @@ -2178,7 +2196,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
>  	}
>  
>  	if (msg->msg_namelen) {
> -		err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
> +		unsigned char s_state = unix_sock_state(sk);
> +		err = s_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;

No need to define s_state here, or a blank line is needed after
the definition.
https://patchwork.kernel.org/project/netdevbpf/patch/72ae40ef-2d68-2e89-46d3-fc8f820db42a@ya.ru/

>  		goto out_err;
>  	} else {
>  		err = -ENOTCONN;
> @@ -2279,7 +2298,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
>  		return -EOPNOTSUPP;
>  
>  	other = unix_peer(sk);
> -	if (!other || sk->sk_state != TCP_ESTABLISHED)
> +	if (!other || unix_sock_state(sk) != TCP_ESTABLISHED)
>  		return -ENOTCONN;
>  
>  	if (false) {
> @@ -2391,7 +2410,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
>  	if (err)
>  		return err;
>  
> -	if (sk->sk_state != TCP_ESTABLISHED)
> +	if (unix_sock_state(sk) != TCP_ESTABLISHED)
>  		return -ENOTCONN;
>  
>  	if (msg->msg_namelen)
> @@ -2405,7 +2424,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
>  {
>  	struct sock *sk = sock->sk;
>  
> -	if (sk->sk_state != TCP_ESTABLISHED)
> +	if (unix_sock_state(sk) != TCP_ESTABLISHED)
>  		return -ENOTCONN;
>  
>  	return unix_dgram_recvmsg(sock, msg, size, flags);
> @@ -2689,7 +2708,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
>  
>  static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
>  {
> -	if (unlikely(sk->sk_state != TCP_ESTABLISHED))
> +	if (unlikely(unix_sock_state(sk) != TCP_ESTABLISHED))
>  		return -ENOTCONN;
>  
>  	return unix_read_skb(sk, recv_actor);
> @@ -2713,7 +2732,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
>  	size_t size = state->size;
>  	unsigned int last_len;
>  
> -	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
> +	if (unlikely(unix_sock_state(sk) != TCP_ESTABLISHED)) {
>  		err = -EINVAL;
>  		goto out;
>  	}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ