[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iJ_uWADjV-JytShdDXYa=nDghEYS9Sy+EBu=tUWgwuS9w@mail.gmail.com>
Date: Thu, 30 Nov 2023 10:28:23 +0100
From: Eric Dumazet <edumazet@...gle.com>
To: David Laight <David.Laight@...lab.com>
Cc: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, Jakub Kicinski <kuba@...nel.org>,
"David S. Miller" <davem@...emloft.net>, Stephen Hemminger <stephen@...workplumber.org>,
David Ahern <dsahern@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
"jakub@...udflare.com" <jakub@...udflare.com>
Subject: Re: [PATCH net-next] ipv4: Use READ/WRITE_ONCE() for IP local_port_range
On Wed, Nov 29, 2023 at 8:26 PM David Laight <David.Laight@...lab.com> wrote:
>
> Commit 227b60f5102cd added a seqlock to ensure that the low and high
> port numbers were always updated together.
> This is overkill because the two 16bit port numbers can be held in
> a u32 and read/written in a single instruction.
>
> More recently 91d0b78c5177f added support for finer per-socket limits.
> The user-supplied value is 'high << 16 | low' but they are held
> separately and the socket options protected by the socket lock.
>
> Use a u32 containing 'high << 16 | low' for both the 'net' and 'sk'
> fields and use READ_ONCE()/WRITE_ONCE() to ensure both values are
> always updated together.
>
> Change (the now trival) inet_get_local_port_range() to a static inline
> to optimise the calling code.
> (In particular avoiding returning integers by reference.)
>
> Signed-off-by: David Laight <david.laight@...lab.com>
> ---
> include/net/inet_sock.h | 5 +----
> include/net/ip.h | 7 ++++++-
> include/net/netns/ipv4.h | 3 +--
> net/ipv4/af_inet.c | 4 +---
> net/ipv4/inet_connection_sock.c | 29 ++++++++++------------------
> net/ipv4/ip_sockglue.c | 34 ++++++++++++++++-----------------
> net/ipv4/sysctl_net_ipv4.c | 12 ++++--------
> 7 files changed, 40 insertions(+), 54 deletions(-)
>
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 74db6d97cae1..ebf71410aa2b 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -234,10 +234,7 @@ struct inet_sock {
> int uc_index;
> int mc_index;
> __be32 mc_addr;
> - struct {
> - __u16 lo;
> - __u16 hi;
> - } local_port_range;
> + u32 local_port_range;
>
> struct ip_mc_socklist __rcu *mc_list;
> struct inet_cork_full cork;
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 1fc4c8d69e33..154f9dd75fe6 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -349,7 +349,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
> } \
> }
>
> -void inet_get_local_port_range(const struct net *net, int *low, int *high);
> +static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
> +{
> + u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);
Please insert an empty line here.
> + *low = range & 0xffff;
> + *high = range >> 16;
> +}
> void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
>
> #ifdef CONFIG_SYSCTL
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 73f43f699199..30ba5359da84 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -19,8 +19,7 @@ struct hlist_head;
> struct fib_table;
> struct sock;
> struct local_ports {
> - seqlock_t lock;
> - int range[2];
> + u32 range; /* high << 16 | low */
> bool warned;
> };
>
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index fb81de10d332..b8964b40c3c0 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1847,9 +1847,7 @@ static __net_init int inet_init_net(struct net *net)
> /*
> * Set defaults for local port range
> */
> - seqlock_init(&net->ipv4.ip_local_ports.lock);
> - net->ipv4.ip_local_ports.range[0] = 32768;
> - net->ipv4.ip_local_ports.range[1] = 60999;
> + net->ipv4.ip_local_ports.range = 60999 << 16 | 32768;
>
> seqlock_init(&net->ipv4.ping_group_range.lock);
> /*
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 394a498c2823..1a45d41f8b39 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -117,34 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk)
> return !sk->sk_rcv_saddr;
> }
>
> -void inet_get_local_port_range(const struct net *net, int *low, int *high)
> -{
> - unsigned int seq;
> -
> - do {
> - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
> -
> - *low = net->ipv4.ip_local_ports.range[0];
> - *high = net->ipv4.ip_local_ports.range[1];
> - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
> -}
> -EXPORT_SYMBOL(inet_get_local_port_range);
> -
> void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
> {
> const struct inet_sock *inet = inet_sk(sk);
> const struct net *net = sock_net(sk);
> int lo, hi, sk_lo, sk_hi;
> + u32 sk_range;
>
> inet_get_local_port_range(net, &lo, &hi);
>
> - sk_lo = inet->local_port_range.lo;
> - sk_hi = inet->local_port_range.hi;
> + sk_range = READ_ONCE(inet->local_port_range);
> + if (unlikely(sk_range)) {
> + sk_lo = sk_range & 0xffff;
> + sk_hi = sk_range >> 16;
>
> - if (unlikely(lo <= sk_lo && sk_lo <= hi))
> - lo = sk_lo;
> - if (unlikely(lo <= sk_hi && sk_hi <= hi))
> - hi = sk_hi;
> + if (unlikely(lo <= sk_lo && sk_lo <= hi))
> + lo = sk_lo;
> + if (unlikely(lo <= sk_hi && sk_hi <= hi))
> + hi = sk_hi;
> + }
>
> *low = lo;
> *high = hi;
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index 2efc53526a38..bf940fe249a5 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -1055,6 +1055,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
> case IP_TOS: /* This sets both TOS and Precedence */
> ip_sock_set_tos(sk, val);
> return 0;
> +
> + case IP_LOCAL_PORT_RANGE:
> + {
> + const __u16 lo = val;
> + const __u16 hi = val >> 16;
I know that we use __u16 and __u32 already, but I think we should
reserve them for exported fields in uapi.
New code should use u16 and u32, no need for __ prefixes.
> +
> + if (optlen != sizeof(__u32))
> + return -EINVAL;
> + if (lo != 0 && hi != 0 && lo > hi)
> + return -EINVAL;
> +
> + WRITE_ONCE(inet->local_port_range, val);
> + return 0;
> + }
>
Powered by blists - more mailing lists