lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <896de7c3-5d7f-4b06-5159-ed58c350bafc@kernel.org>
Date: Fri, 1 Dec 2023 15:45:09 -0800 (PST)
From: Mat Martineau <martineau@...nel.org>
To: David Laight <David.Laight@...lab.com>
cc: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, 
    Jakub Kicinski <kuba@...nel.org>, "David S. Miller" <davem@...emloft.net>, 
    Stephen Hemminger <stephen@...workplumber.org>, 
    Eric Dumazet <edumazet@...gle.com>, David Ahern <dsahern@...nel.org>, 
    Paolo Abeni <pabeni@...hat.com>, 
    "jakub@...udflare.com" <jakub@...udflare.com>
Subject: Re: [PATCH net-next] ipv4: Use READ/WRITE_ONCE() for IP
 local_port_range

On Wed, 29 Nov 2023, David Laight wrote:

> Commit 227b60f5102cd added a seqlock to ensure that the low and high
> port numbers were always updated together.
> This is overkill because the two 16bit port numbers can be held in
> a u32 and read/written in a single instruction.
>
> More recently 91d0b78c5177f added support for finer per-socket limits.
> The user-supplied value is 'high << 16 | low' but they are held
> separately and the socket options protected by the socket lock.
>
> Use a u32 containing 'high << 16 | low' for both the 'net' and 'sk'
> fields and use READ_ONCE()/WRITE_ONCE() to ensure both values are
> always updated together.
>
> Change (the now trival) inet_get_local_port_range() to a static inline
> to optimise the calling code.
> (In particular avoiding returning integers by reference.)
>
> Signed-off-by: David Laight <david.laight@...lab.com>
> ---
> include/net/inet_sock.h         |  5 +----
> include/net/ip.h                |  7 ++++++-
> include/net/netns/ipv4.h        |  3 +--
> net/ipv4/af_inet.c              |  4 +---
> net/ipv4/inet_connection_sock.c | 29 ++++++++++------------------
> net/ipv4/ip_sockglue.c          | 34 ++++++++++++++++-----------------
> net/ipv4/sysctl_net_ipv4.c      | 12 ++++--------
> 7 files changed, 40 insertions(+), 54 deletions(-)
>
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 74db6d97cae1..ebf71410aa2b 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -234,10 +234,7 @@ struct inet_sock {
> 	int			uc_index;
> 	int			mc_index;
> 	__be32			mc_addr;
> -	struct {
> -		__u16 lo;
> -		__u16 hi;
> -	}			local_port_range;
> +	u32			local_port_range;
>
> 	struct ip_mc_socklist __rcu	*mc_list;
> 	struct inet_cork_full	cork;
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 1fc4c8d69e33..154f9dd75fe6 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -349,7 +349,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
> 	} \
> }
>
> -void inet_get_local_port_range(const struct net *net, int *low, int *high);
> +static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
> +{
> +	u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);
> +	*low = range & 0xffff;
> +	*high = range >> 16;
> +}
> void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
>
> #ifdef CONFIG_SYSCTL
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 73f43f699199..30ba5359da84 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -19,8 +19,7 @@ struct hlist_head;
> struct fib_table;
> struct sock;
> struct local_ports {
> -	seqlock_t	lock;
> -	int		range[2];
> +	u32		range;	/* high << 16 | low */
> 	bool		warned;
> };
>
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index fb81de10d332..b8964b40c3c0 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1847,9 +1847,7 @@ static __net_init int inet_init_net(struct net *net)
> 	/*
> 	 * Set defaults for local port range
> 	 */
> -	seqlock_init(&net->ipv4.ip_local_ports.lock);
> -	net->ipv4.ip_local_ports.range[0] =  32768;
> -	net->ipv4.ip_local_ports.range[1] =  60999;
> +	net->ipv4.ip_local_ports.range =  60999 << 16 | 32768;

Hi David -

Better to use unsigned integer constants here, since 60999 << 16 doesn't 
fit in a signed int on 32-bit platforms.

>
> 	seqlock_init(&net->ipv4.ping_group_range.lock);
> 	/*
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 394a498c2823..1a45d41f8b39 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -117,34 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk)
> 	return !sk->sk_rcv_saddr;
> }
>
> -void inet_get_local_port_range(const struct net *net, int *low, int *high)
> -{
> -	unsigned int seq;
> -
> -	do {
> -		seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
> -
> -		*low = net->ipv4.ip_local_ports.range[0];
> -		*high = net->ipv4.ip_local_ports.range[1];
> -	} while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
> -}
> -EXPORT_SYMBOL(inet_get_local_port_range);
> -
> void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
> {
> 	const struct inet_sock *inet = inet_sk(sk);
> 	const struct net *net = sock_net(sk);
> 	int lo, hi, sk_lo, sk_hi;
> +	u32 sk_range;
>
> 	inet_get_local_port_range(net, &lo, &hi);
>
> -	sk_lo = inet->local_port_range.lo;
> -	sk_hi = inet->local_port_range.hi;
> +	sk_range = READ_ONCE(inet->local_port_range);
> +	if (unlikely(sk_range)) {
> +		sk_lo = sk_range & 0xffff;
> +		sk_hi = sk_range >> 16;
>
> -	if (unlikely(lo <= sk_lo && sk_lo <= hi))
> -		lo = sk_lo;
> -	if (unlikely(lo <= sk_hi && sk_hi <= hi))
> -		hi = sk_hi;
> +		if (unlikely(lo <= sk_lo && sk_lo <= hi))
> +			lo = sk_lo;
> +		if (unlikely(lo <= sk_hi && sk_hi <= hi))
> +			hi = sk_hi;
> +	}
>
> 	*low = lo;
> 	*high = hi;
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index 2efc53526a38..bf940fe249a5 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -1055,6 +1055,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
> 	case IP_TOS:	/* This sets both TOS and Precedence */
> 		ip_sock_set_tos(sk, val);
> 		return 0;
> +
> +	case IP_LOCAL_PORT_RANGE:
> +	{
> +		const __u16 lo = val;
> +		const __u16 hi = val >> 16;

Suggest casting 'val' to an unsigned int before shifting right, even 
though assigning to a __u16 will mask off any surprising bits introduced 
by an arithmetic right shift of a 32-bit signed int.

> +
> +		if (optlen != sizeof(__u32))
> +			return -EINVAL;
> +		if (lo != 0 && hi != 0 && lo > hi)
> +			return -EINVAL;
> +
> +		WRITE_ONCE(inet->local_port_range, val);
> +		return 0;
> +	}
> 	}
>
> 	err = 0;
> @@ -1332,20 +1346,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
> 		err = xfrm_user_policy(sk, optname, optval, optlen);
> 		break;
>
> -	case IP_LOCAL_PORT_RANGE:
> -	{
> -		const __u16 lo = val;
> -		const __u16 hi = val >> 16;
> -
> -		if (optlen != sizeof(__u32))
> -			goto e_inval;
> -		if (lo != 0 && hi != 0 && lo > hi)
> -			goto e_inval;
> -
> -		inet->local_port_range.lo = lo;
> -		inet->local_port_range.hi = hi;
> -		break;
> -	}
> 	default:
> 		err = -ENOPROTOOPT;
> 		break;
> @@ -1692,6 +1692,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
> 			return -EFAULT;
> 		return 0;
> 	}
> +	case IP_LOCAL_PORT_RANGE:
> +		val = READ_ONCE(inet->local_port_range);
> +		goto copyval;
> 	}
>
> 	if (needs_rtnl)
> @@ -1721,9 +1724,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
> 		else
> 			err = ip_get_mcast_msfilter(sk, optval, optlen, len);
> 		goto out;
> -	case IP_LOCAL_PORT_RANGE:
> -		val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
> -		break;
> 	case IP_PROTOCOL:
> 		val = inet_sk(sk)->inet_num;
> 		break;
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index f63a545a7374..b008b6b5e85d 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -54,22 +54,18 @@ static void set_local_port_range(struct net *net, int range[2])
> {
> 	bool same_parity = !((range[0] ^ range[1]) & 1);
>
> -	write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
> 	if (same_parity && !net->ipv4.ip_local_ports.warned) {
> 		net->ipv4.ip_local_ports.warned = true;
> 		pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
> 	}
> -	net->ipv4.ip_local_ports.range[0] = range[0];
> -	net->ipv4.ip_local_ports.range[1] = range[1];
> -	write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
> +	WRITE_ONCE(net->ipv4.ip_local_ports.range, range[1] << 16 | range[0]);

Similar, make sure the value is cast to unsigned before shifting here.


- Mat


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ