[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230124175712.38112-1-kuniyu@amazon.com>
Date: Tue, 24 Jan 2023 09:57:12 -0800
From: Kuniyuki Iwashima <kuniyu@...zon.com>
To: <tkhai@...ru>
CC: <davem@...emloft.net>, <edumazet@...gle.com>, <gorcunov@...il.com>,
<kuba@...nel.org>, <kuniyu@...zon.com>, <netdev@...r.kernel.org>,
<pabeni@...hat.com>
Subject: Re: [PATCH net-next] unix: Guarantee sk_state relevance in case of it was assigned by a task on other cpu
From: Kirill Tkhai <tkhai@...ru>
Date: Mon, 23 Jan 2023 01:21:20 +0300
> Some functions use unlocked check for sock::sk_state. This does not guarantee
> a new value assigned to sk_state on some CPU is already visible on this CPU.
>
> Example:
>
> [CPU0:Task0] [CPU1:Task1]
> unix_listen()
> unix_state_lock(sk);
> sk->sk_state = TCP_LISTEN;
> unix_state_unlock(sk);
> unix_accept()
> if (sk->sk_state != TCP_LISTEN) /* not visible */
> goto out; /* return error */
>
> Task1 may miss new sk->sk_state value, and then unix_accept() returns error.
> Since in this situation unix_accept() is called chronologically later, such
> behavior is not obvious and it is wrong.
Have you seen this on a real workload ?
It sounds like a userspace bug that accept() is called on a different
CPU before listen() returns. At least, accept() is fetching sk at the
same time, then I think there should be no guarantee that sk_state is
TCP_LISTEN.
Same for other TCP_ESTABLISHED tests, it seems a program is calling
sendmsg()/recvmsg() when sk is still TCP_CLOSE and betting concurrent
connect() will finish earlier.
>
> This patch aims to fix the problem. A new function unix_sock_state() is
> introduced, and it makes sure a user never misses a new state assigned just
> before the function is called. We will use it in the places, where unlocked
> sk_state dereferencing was used before.
>
> Note, that there remain some more places with sk_state unfixed. Also, the same
> problem is with unix_peer(). This will be a subject for future patches.
>
> Signed-off-by: Kirill Tkhai <tkhai@...ru>
> ---
> net/unix/af_unix.c | 43 +++++++++++++++++++++++++++++++------------
> 1 file changed, 31 insertions(+), 12 deletions(-)
>
> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
> index 009616fa0256..f53e09a0753b 100644
> --- a/net/unix/af_unix.c
> +++ b/net/unix/af_unix.c
> @@ -247,6 +247,28 @@ struct sock *unix_peer_get(struct sock *s)
> }
> EXPORT_SYMBOL_GPL(unix_peer_get);
>
> +/* This function returns current sk->sk_state guaranteeing
> + * its relevance in case of assignment was made on other CPU.
> + */
> +static unsigned char unix_sock_state(struct sock *sk)
> +{
> + unsigned char s_state = READ_ONCE(sk->sk_state);
> +
> + /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
> + * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
> + * We may avoid taking the lock in case of those states are
> + * already visible.
> + */
> + if ((s_state == TCP_ESTABLISHED || s_state == TCP_LISTEN)
> + && sk->sk_type != SOCK_DGRAM)
> + return s_state;
> +
> + unix_state_lock(sk);
> + s_state = sk->sk_state;
> + unix_state_unlock(sk);
> + return s_state;
> +}
> +
> static struct unix_address *unix_create_addr(struct sockaddr_un *sunaddr,
> int addr_len)
> {
> @@ -812,13 +834,9 @@ static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
> int nr_fds = 0;
>
> if (sk) {
> - s_state = READ_ONCE(sk->sk_state);
> + s_state = unix_sock_state(sk);
> u = unix_sk(sk);
>
> - /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
> - * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
> - * SOCK_DGRAM is ordinary. So, no lock is needed.
> - */
> if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
> nr_fds = atomic_read(&u->scm_stat.nr_fds);
> else if (s_state == TCP_LISTEN)
> @@ -1686,7 +1704,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
> goto out;
>
> err = -EINVAL;
> - if (sk->sk_state != TCP_LISTEN)
> + if (unix_sock_state(sk) != TCP_LISTEN)
> goto out;
>
> /* If socket state is TCP_LISTEN it cannot change (for now...),
> @@ -2178,7 +2196,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
> }
>
> if (msg->msg_namelen) {
> - err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
> + unsigned char s_state = unix_sock_state(sk);
> + err = s_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
No need to define s_state here, or a blank line is needed after
the definition.
https://patchwork.kernel.org/project/netdevbpf/patch/72ae40ef-2d68-2e89-46d3-fc8f820db42a@ya.ru/
> goto out_err;
> } else {
> err = -ENOTCONN;
> @@ -2279,7 +2298,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
> return -EOPNOTSUPP;
>
> other = unix_peer(sk);
> - if (!other || sk->sk_state != TCP_ESTABLISHED)
> + if (!other || unix_sock_state(sk) != TCP_ESTABLISHED)
> return -ENOTCONN;
>
> if (false) {
> @@ -2391,7 +2410,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
> if (err)
> return err;
>
> - if (sk->sk_state != TCP_ESTABLISHED)
> + if (unix_sock_state(sk) != TCP_ESTABLISHED)
> return -ENOTCONN;
>
> if (msg->msg_namelen)
> @@ -2405,7 +2424,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
> {
> struct sock *sk = sock->sk;
>
> - if (sk->sk_state != TCP_ESTABLISHED)
> + if (unix_sock_state(sk) != TCP_ESTABLISHED)
> return -ENOTCONN;
>
> return unix_dgram_recvmsg(sock, msg, size, flags);
> @@ -2689,7 +2708,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
>
> static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
> {
> - if (unlikely(sk->sk_state != TCP_ESTABLISHED))
> + if (unlikely(unix_sock_state(sk) != TCP_ESTABLISHED))
> return -ENOTCONN;
>
> return unix_read_skb(sk, recv_actor);
> @@ -2713,7 +2732,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
> size_t size = state->size;
> unsigned int last_len;
>
> - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
> + if (unlikely(unix_sock_state(sk) != TCP_ESTABLISHED)) {
> err = -EINVAL;
> goto out;
> }
Powered by blists - more mailing lists