lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 13 Sep 2019 16:47:50 +0200
From:   Paolo Abeni <pabeni@...hat.com>
To:     Willem de Bruijn <willemdebruijn.kernel@...il.com>,
        netdev@...r.kernel.org
Cc:     davem@...emloft.net, edumazet@...gle.com, kraig@...gle.com,
        zabele@...cast.net, mark.keaton@...theon.com,
        Willem de Bruijn <willemb@...gle.com>
Subject: Re: [PATCH net] udp: correct reuseport selection with connected
 sockets

On Thu, 2019-09-12 at 21:16 -0400, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@...gle.com>
> 
> UDP reuseport groups can hold a mix unconnected and connected sockets.
> Ensure that connections only receive all traffic to their 4-tuple.
> 
> Fast reuseport returns on the first reuseport match on the assumption
> that all matches are equal. Only if connections are present, return to
> the previous behavior of scoring all sockets.
> 
> Record if connections are present and if so (1) treat such connected
> sockets as an independent match from the group, (2) only return
> 2-tuple matches from reuseport and (3) do not return on the first
> 2-tuple reuseport match to allow for a higher scoring match later.
> 
> New field has_conns is set without locks. No other fields in the
> bitmap are modified at runtime and the field is only ever set
> unconditionally, so an RMW cannot miss a change.
> 
> Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
> Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
> Signed-off-by: Willem de Bruijn <willemb@...gle.com>
> 
> ---
> 
> I was unable to compile some older kernels, so the Fixes tag is based
> on basic analysis, not bisected to by the regression test.
> ---
>  include/net/sock_reuseport.h | 20 +++++++++++++++++++-
>  net/core/sock_reuseport.c    | 15 +++++++++++++--
>  net/ipv4/datagram.c          |  2 ++
>  net/ipv4/udp.c               |  5 +++--
>  net/ipv6/datagram.c          |  2 ++
>  net/ipv6/udp.c               |  5 +++--
>  6 files changed, 42 insertions(+), 7 deletions(-)
> 
> diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
> index d9112de85261..43f4a818d88f 100644
> --- a/include/net/sock_reuseport.h
> +++ b/include/net/sock_reuseport.h
> @@ -21,7 +21,8 @@ struct sock_reuseport {
>  	unsigned int		synq_overflow_ts;
>  	/* ID stays the same even after the size of socks[] grows. */
>  	unsigned int		reuseport_id;
> -	bool			bind_inany;
> +	unsigned int		bind_inany:1;
> +	unsigned int		has_conns:1;
>  	struct bpf_prog __rcu	*prog;		/* optional BPF sock selector */
>  	struct sock		*socks[0];	/* array of sock pointers */
>  };
> @@ -37,6 +38,23 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
>  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
>  extern int reuseport_detach_prog(struct sock *sk);
>  
> +static inline bool reuseport_has_conns(struct sock *sk, bool set)
> +{
> +	struct sock_reuseport *reuse;
> +	bool ret = false;
> +
> +	rcu_read_lock();
> +	reuse = rcu_dereference(sk->sk_reuseport_cb);
> +	if (reuse) {
> +		if (set)
> +			reuse->has_conns = 1;
> +		ret = reuse->has_conns;
> +	}
> +	rcu_read_unlock();
> +
> +	return ret;
> +}
> +
>  int reuseport_get_id(struct sock_reuseport *reuse);
>  
>  #endif  /* _SOCK_REUSEPORT_H */
> diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
> index 9408f9264d05..f3ceec93f392 100644
> --- a/net/core/sock_reuseport.c
> +++ b/net/core/sock_reuseport.c
> @@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
>  
>  select_by_hash:
>  		/* no bpf or invalid bpf result: fall back to hash usage */
> -		if (!sk2)
> -			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
> +		if (!sk2) {
> +			int i, j;
> +
> +			i = j = reciprocal_scale(hash, socks);
> +			while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
> +				i++;
> +				if (i >= reuse->num_socks)
> +					i = 0;
> +				if (i == j)
> +					goto out;
> +			}
> +			sk2 = reuse->socks[i];
> +		}
>  	}
>  
>  out:
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 7bd29e694603..9a0fe0c2fa02 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -15,6 +15,7 @@
>  #include <net/sock.h>
>  #include <net/route.h>
>  #include <net/tcp_states.h>
> +#include <net/sock_reuseport.h>
>  
>  int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>  {
> @@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
>  	}
>  	inet->inet_daddr = fl4->daddr;
>  	inet->inet_dport = usin->sin_port;
> +	reuseport_has_conns(sk, true);
>  	sk->sk_state = TCP_ESTABLISHED;
>  	sk_set_txhash(sk);
>  	inet->inet_id = jiffies;
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index d88821c794fb..16486c8b708b 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
>  		score = compute_score(sk, net, saddr, sport,
>  				      daddr, hnum, dif, sdif);
>  		if (score > badness) {
> -			if (sk->sk_reuseport) {
> +			if (sk->sk_reuseport &&
> +			    sk->sk_state != TCP_ESTABLISHED) {
>  				hash = udp_ehashfn(net, daddr, hnum,
>  						   saddr, sport);
>  				result = reuseport_select_sock(sk, hash, skb,
>  							sizeof(struct udphdr));
> -				if (result)
> +				if (result && !reuseport_has_conns(sk, false))
>  					return result;
>  			}
>  			badness = score;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index 9ab897ded4df..96f939248d2f 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -27,6 +27,7 @@
>  #include <net/ip6_route.h>
>  #include <net/tcp_states.h>
>  #include <net/dsfield.h>
> +#include <net/sock_reuseport.h>
>  
>  #include <linux/errqueue.h>
>  #include <linux/uaccess.h>
> @@ -254,6 +255,7 @@ int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr,
>  		goto out;
>  	}
>  
> +	reuseport_has_conns(sk, true);
>  	sk->sk_state = TCP_ESTABLISHED;
>  	sk_set_txhash(sk);
>  out:
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index 827fe7385078..5995fdc99d3f 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
>  		score = compute_score(sk, net, saddr, sport,
>  				      daddr, hnum, dif, sdif);
>  		if (score > badness) {
> -			if (sk->sk_reuseport) {
> +			if (sk->sk_reuseport &&
> +			    sk->sk_state != TCP_ESTABLISHED) {
>  				hash = udp6_ehashfn(net, daddr, hnum,
>  						    saddr, sport);
>  
>  				result = reuseport_select_sock(sk, hash, skb,
>  							sizeof(struct udphdr));
> -				if (result)
> +				if (result && !reuseport_has_conns(sk, false))
>  					return result;
>  			}
>  			result = sk;

The patch LGTM,

Acked-by: Paolo Abeni <pabeni@...hat.com>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ