lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 30 Nov 2023 10:28:23 +0100
From: Eric Dumazet <edumazet@...gle.com>
To: David Laight <David.Laight@...lab.com>
Cc: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, Jakub Kicinski <kuba@...nel.org>, 
	"David S. Miller" <davem@...emloft.net>, Stephen Hemminger <stephen@...workplumber.org>, 
	David Ahern <dsahern@...nel.org>, Paolo Abeni <pabeni@...hat.com>, 
	"jakub@...udflare.com" <jakub@...udflare.com>
Subject: Re: [PATCH net-next] ipv4: Use READ/WRITE_ONCE() for IP local_port_range

On Wed, Nov 29, 2023 at 8:26 PM David Laight <David.Laight@...lab.com> wrote:
>
> Commit 227b60f5102cd added a seqlock to ensure that the low and high
> port numbers were always updated together.
> This is overkill because the two 16bit port numbers can be held in
> a u32 and read/written in a single instruction.
>
> More recently 91d0b78c5177f added support for finer per-socket limits.
> The user-supplied value is 'high << 16 | low' but they are held
> separately and the socket options protected by the socket lock.
>
> Use a u32 containing 'high << 16 | low' for both the 'net' and 'sk'
> fields and use READ_ONCE()/WRITE_ONCE() to ensure both values are
> always updated together.
>
> Change (the now trival) inet_get_local_port_range() to a static inline
> to optimise the calling code.
> (In particular avoiding returning integers by reference.)
>
> Signed-off-by: David Laight <david.laight@...lab.com>
> ---
>  include/net/inet_sock.h         |  5 +----
>  include/net/ip.h                |  7 ++++++-
>  include/net/netns/ipv4.h        |  3 +--
>  net/ipv4/af_inet.c              |  4 +---
>  net/ipv4/inet_connection_sock.c | 29 ++++++++++------------------
>  net/ipv4/ip_sockglue.c          | 34 ++++++++++++++++-----------------
>  net/ipv4/sysctl_net_ipv4.c      | 12 ++++--------
>  7 files changed, 40 insertions(+), 54 deletions(-)
>
> diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
> index 74db6d97cae1..ebf71410aa2b 100644
> --- a/include/net/inet_sock.h
> +++ b/include/net/inet_sock.h
> @@ -234,10 +234,7 @@ struct inet_sock {
>         int                     uc_index;
>         int                     mc_index;
>         __be32                  mc_addr;
> -       struct {
> -               __u16 lo;
> -               __u16 hi;
> -       }                       local_port_range;
> +       u32                     local_port_range;
>
>         struct ip_mc_socklist __rcu     *mc_list;
>         struct inet_cork_full   cork;
> diff --git a/include/net/ip.h b/include/net/ip.h
> index 1fc4c8d69e33..154f9dd75fe6 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -349,7 +349,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
>         } \
>  }
>
> -void inet_get_local_port_range(const struct net *net, int *low, int *high);
> +static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
> +{
> +       u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);

Please insert an empty line here.

> +       *low = range & 0xffff;
> +       *high = range >> 16;
> +}
>  void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
>
>  #ifdef CONFIG_SYSCTL
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 73f43f699199..30ba5359da84 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -19,8 +19,7 @@ struct hlist_head;
>  struct fib_table;
>  struct sock;
>  struct local_ports {
> -       seqlock_t       lock;
> -       int             range[2];
> +       u32             range;  /* high << 16 | low */
>         bool            warned;
>  };
>
> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
> index fb81de10d332..b8964b40c3c0 100644
> --- a/net/ipv4/af_inet.c
> +++ b/net/ipv4/af_inet.c
> @@ -1847,9 +1847,7 @@ static __net_init int inet_init_net(struct net *net)
>         /*
>          * Set defaults for local port range
>          */
> -       seqlock_init(&net->ipv4.ip_local_ports.lock);
> -       net->ipv4.ip_local_ports.range[0] =  32768;
> -       net->ipv4.ip_local_ports.range[1] =  60999;
> +       net->ipv4.ip_local_ports.range =  60999 << 16 | 32768;
>
>         seqlock_init(&net->ipv4.ping_group_range.lock);
>         /*
> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
> index 394a498c2823..1a45d41f8b39 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -117,34 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk)
>         return !sk->sk_rcv_saddr;
>  }
>
> -void inet_get_local_port_range(const struct net *net, int *low, int *high)
> -{
> -       unsigned int seq;
> -
> -       do {
> -               seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
> -
> -               *low = net->ipv4.ip_local_ports.range[0];
> -               *high = net->ipv4.ip_local_ports.range[1];
> -       } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
> -}
> -EXPORT_SYMBOL(inet_get_local_port_range);
> -
>  void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
>  {
>         const struct inet_sock *inet = inet_sk(sk);
>         const struct net *net = sock_net(sk);
>         int lo, hi, sk_lo, sk_hi;
> +       u32 sk_range;
>
>         inet_get_local_port_range(net, &lo, &hi);
>
> -       sk_lo = inet->local_port_range.lo;
> -       sk_hi = inet->local_port_range.hi;
> +       sk_range = READ_ONCE(inet->local_port_range);
> +       if (unlikely(sk_range)) {
> +               sk_lo = sk_range & 0xffff;
> +               sk_hi = sk_range >> 16;
>
> -       if (unlikely(lo <= sk_lo && sk_lo <= hi))
> -               lo = sk_lo;
> -       if (unlikely(lo <= sk_hi && sk_hi <= hi))
> -               hi = sk_hi;
> +               if (unlikely(lo <= sk_lo && sk_lo <= hi))
> +                       lo = sk_lo;
> +               if (unlikely(lo <= sk_hi && sk_hi <= hi))
> +                       hi = sk_hi;
> +       }
>
>         *low = lo;
>         *high = hi;
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index 2efc53526a38..bf940fe249a5 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -1055,6 +1055,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
>         case IP_TOS:    /* This sets both TOS and Precedence */
>                 ip_sock_set_tos(sk, val);
>                 return 0;
> +
> +       case IP_LOCAL_PORT_RANGE:
> +       {
> +               const __u16 lo = val;
> +               const __u16 hi = val >> 16;

I know that we use __u16 and __u32 already, but I think we should
reserve them for exported fields in uapi.

New code  should use u16 and u32, no need for __ prefixes.

> +
> +               if (optlen != sizeof(__u32))
> +                       return -EINVAL;
> +               if (lo != 0 && hi != 0 && lo > hi)
> +                       return -EINVAL;
> +
> +               WRITE_ONCE(inet->local_port_range, val);
> +               return 0;
> +       }
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ