[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iJn-T_rKg67h6deW0Oyh=X4kWXVBrtvUJU+VpDTfpde0w@mail.gmail.com>
Date: Wed, 12 Oct 2022 11:59:43 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: Kuniyuki Iwashima <kuniyu@...zon.com>
Cc: "David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
David Ahern <dsahern@...nel.org>,
Martin KaFai Lau <martin.lau@...nel.org>,
Craig Gallek <kraig@...gle.com>,
Willem de Bruijn <willemb@...gle.com>,
Kuniyuki Iwashima <kuni1840@...il.com>, netdev@...r.kernel.org
Subject: Re: [PATCH v2 net] udp: Update reuse->has_conns under reuseport_lock.
On Wed, Oct 12, 2022 at 11:53 AM Kuniyuki Iwashima <kuniyu@...zon.com> wrote:
>
> When we call connect() for a UDP socket in a reuseport group, we have
> to update sk->sk_reuseport_cb->has_conns to 1. Otherwise, the kernel
> could select a unconnected socket wrongly for packets sent to the
> connected socket.
>
> However, the current way to set has_conns is illegal and possible to
> trigger that problem. reuseport_has_conns() changes has_conns under
> rcu_read_lock(), which upgrades the RCU reader to the updater. Then,
> it must do the update under the updater's lock, reuseport_lock, but
> it doesn't for now.
>
> For this reason, there is a race below where we fail to set has_conns
> resulting in the wrong socket selection. To avoid the race, let's split
> the reader and updater with proper locking.
>
> cpu1 cpu2
> +----+ +----+
>
> __ip[46]_datagram_connect() reuseport_grow()
> . .
> |- reuseport_has_conns(sk, true) |- more_reuse = __reuseport_alloc(more_socks_size)
> | . |
> | |- rcu_read_lock()
> | |- reuse = rcu_dereference(sk->sk_reuseport_cb)
> | |
> | | | /* reuse->has_conns == 0 here */
> | | |- more_reuse->has_conns = reuse->has_conns
> | |- reuse->has_conns = 1 | /* more_reuse->has_conns SHOULD BE 1 HERE */
> | | |
> | | |- rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
> | | | more_reuse)
> | `- rcu_read_unlock() `- kfree_rcu(reuse, rcu)
> |
> |- sk->sk_state = TCP_ESTABLISHED
>
> Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...zon.com>
> ---
> v2:
> * Fix build failure for CONFIG_IPV6=m
> * Drop SO_INCOMING_CPU fix, which will be sent for net-next
> after the v6.1 merge window
>
> v1: https://lore.kernel.org/netdev/20221010174351.11024-1-kuniyu@amazon.com/
> ---
> include/net/sock_reuseport.h | 11 +++++------
> net/core/sock_reuseport.c | 15 +++++++++++++++
> net/ipv4/datagram.c | 2 +-
> net/ipv4/udp.c | 2 +-
> net/ipv6/datagram.c | 2 +-
> net/ipv6/udp.c | 2 +-
> 6 files changed, 24 insertions(+), 10 deletions(-)
>
> diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> index 473b0b0fa4ab..efc9085c6892 100644
> --- a/include/net/sock_reuseport.h
> +++ b/include/net/sock_reuseport.h
> @@ -43,21 +43,20 @@ struct sock *reuseport_migrate_sock(struct sock *sk,
> extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
> extern int reuseport_detach_prog(struct sock *sk);
>
> -static inline bool reuseport_has_conns(struct sock *sk, bool set)
> +static inline bool reuseport_has_conns(struct sock *sk)
> {
> struct sock_reuseport *reuse;
> bool ret = false;
>
> rcu_read_lock();
> reuse = rcu_dereference(sk->sk_reuseport_cb);
> - if (reuse) {
> - if (set)
> - reuse->has_conns = 1;
> - ret = reuse->has_conns;
> - }
> + if (reuse && reuse->has_conns)
> + ret = true;
> rcu_read_unlock();
>
> return ret;
> }
>
> +void reuseport_has_conns_set(struct sock *sk);
> +
> #endif /* _SOCK_REUSEPORT_H */
> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> index 5daa1fa54249..abb414ed4aa7 100644
> --- a/net/core/sock_reuseport.c
> +++ b/net/core/sock_reuseport.c
> @@ -21,6 +21,21 @@ static DEFINE_IDA(reuseport_ida);
> static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
> struct sock_reuseport *reuse, bool bind_inany);
>
> +void reuseport_has_conns_set(struct sock *sk)
> +{
> + struct sock_reuseport *reuse;
> +
> + if (!rcu_access_pointer(sk->sk_reuseport_cb))
> + return;
> +
> + spin_lock(&reuseport_lock);
> + reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
> + lockdep_is_held(&reuseport_lock));
Could @reuse be NULL at this point ?
Previous test was performed without reuseport_lock being held.
> + reuse->has_conns = 1;
> + spin_unlock(&reuseport_lock);
> +}
> +EXPORT_SYMBOL(reuseport_has_conns_set);
> +
> static int reuseport_sock_index(struct sock *sk,
> const struct sock_reuseport *reuse,
> bool closed)
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 405a8c2aea64..5e66add7befa 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -70,7 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
> }
> inet->inet_daddr = fl4->daddr;
> inet->inet_dport = usin->sin_port;
> - reuseport_has_conns(sk, true);
> + reuseport_has_conns_set(sk);
> sk->sk_state = TCP_ESTABLISHED;
> sk_set_txhash(sk);
> inet->inet_id = prandom_u32();
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index d63118ce5900..29228231b058 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -448,7 +448,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
> result = lookup_reuseport(net, sk, skb,
> saddr, sport, daddr, hnum);
> /* Fall back to scoring if group has connections */
> - if (result && !reuseport_has_conns(sk, false))
> + if (result && !reuseport_has_conns(sk))
> return result;
>
> result = result ? : sk;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index df665d4e8f0f..5ecb56522f9d 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -256,7 +256,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
> goto out;
> }
>
> - reuseport_has_conns(sk, true);
> + reuseport_has_conns_set(sk);
> sk->sk_state = TCP_ESTABLISHED;
> sk_set_txhash(sk);
> out:
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 91e795bb9ade..56e4523a3004 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -182,7 +182,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
> result = lookup_reuseport(net, sk, skb,
> saddr, sport, daddr, hnum);
> /* Fall back to scoring if group has connections */
> - if (result && !reuseport_has_conns(sk, false))
> + if (result && !reuseport_has_conns(sk))
> return result;
>
> result = result ? : sk;
> --
> 2.30.2
>
Powered by blists - more mailing lists