lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <CANn89iJ_uWADjV-JytShdDXYa=nDghEYS9Sy+EBu=tUWgwuS9w@mail.gmail.com> Date: Thu, 30 Nov 2023 10:28:23 +0100 From: Eric Dumazet <edumazet@...gle.com> To: David Laight <David.Laight@...lab.com> Cc: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, Jakub Kicinski <kuba@...nel.org>, "David S. Miller" <davem@...emloft.net>, Stephen Hemminger <stephen@...workplumber.org>, David Ahern <dsahern@...nel.org>, Paolo Abeni <pabeni@...hat.com>, "jakub@...udflare.com" <jakub@...udflare.com> Subject: Re: [PATCH net-next] ipv4: Use READ/WRITE_ONCE() for IP local_port_range On Wed, Nov 29, 2023 at 8:26 PM David Laight <David.Laight@...lab.com> wrote: > > Commit 227b60f5102cd added a seqlock to ensure that the low and high > port numbers were always updated together. > This is overkill because the two 16bit port numbers can be held in > a u32 and read/written in a single instruction. > > More recently 91d0b78c5177f added support for finer per-socket limits. > The user-supplied value is 'high << 16 | low' but they are held > separately and the socket options protected by the socket lock. > > Use a u32 containing 'high << 16 | low' for both the 'net' and 'sk' > fields and use READ_ONCE()/WRITE_ONCE() to ensure both values are > always updated together. > > Change (the now trival) inet_get_local_port_range() to a static inline > to optimise the calling code. > (In particular avoiding returning integers by reference.) > > Signed-off-by: David Laight <david.laight@...lab.com> > --- > include/net/inet_sock.h | 5 +---- > include/net/ip.h | 7 ++++++- > include/net/netns/ipv4.h | 3 +-- > net/ipv4/af_inet.c | 4 +--- > net/ipv4/inet_connection_sock.c | 29 ++++++++++------------------ > net/ipv4/ip_sockglue.c | 34 ++++++++++++++++----------------- > net/ipv4/sysctl_net_ipv4.c | 12 ++++-------- > 7 files changed, 40 insertions(+), 54 deletions(-) > > diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h > index 74db6d97cae1..ebf71410aa2b 100644 > --- a/include/net/inet_sock.h > +++ b/include/net/inet_sock.h > @@ -234,10 +234,7 @@ struct inet_sock { > int uc_index; > int mc_index; > __be32 mc_addr; > - struct { > - __u16 lo; > - __u16 hi; > - } local_port_range; > + u32 local_port_range; > > struct ip_mc_socklist __rcu *mc_list; > struct inet_cork_full cork; > diff --git a/include/net/ip.h b/include/net/ip.h > index 1fc4c8d69e33..154f9dd75fe6 100644 > --- a/include/net/ip.h > +++ b/include/net/ip.h > @@ -349,7 +349,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o > } \ > } > > -void inet_get_local_port_range(const struct net *net, int *low, int *high); > +static inline void inet_get_local_port_range(const struct net *net, int *low, int *high) > +{ > + u32 range = READ_ONCE(net->ipv4.ip_local_ports.range); Please insert an empty line here. > + *low = range & 0xffff; > + *high = range >> 16; > +} > void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high); > > #ifdef CONFIG_SYSCTL > diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h > index 73f43f699199..30ba5359da84 100644 > --- a/include/net/netns/ipv4.h > +++ b/include/net/netns/ipv4.h > @@ -19,8 +19,7 @@ struct hlist_head; > struct fib_table; > struct sock; > struct local_ports { > - seqlock_t lock; > - int range[2]; > + u32 range; /* high << 16 | low */ > bool warned; > }; > > diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c > index fb81de10d332..b8964b40c3c0 100644 > --- a/net/ipv4/af_inet.c > +++ b/net/ipv4/af_inet.c > @@ -1847,9 +1847,7 @@ static __net_init int inet_init_net(struct net *net) > /* > * Set defaults for local port range > */ > - seqlock_init(&net->ipv4.ip_local_ports.lock); > - net->ipv4.ip_local_ports.range[0] = 32768; > - net->ipv4.ip_local_ports.range[1] = 60999; > + net->ipv4.ip_local_ports.range = 60999 << 16 | 32768; > > seqlock_init(&net->ipv4.ping_group_range.lock); > /* > diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c > index 394a498c2823..1a45d41f8b39 100644 > --- a/net/ipv4/inet_connection_sock.c > +++ b/net/ipv4/inet_connection_sock.c > @@ -117,34 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk) > return !sk->sk_rcv_saddr; > } > > -void inet_get_local_port_range(const struct net *net, int *low, int *high) > -{ > - unsigned int seq; > - > - do { > - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); > - > - *low = net->ipv4.ip_local_ports.range[0]; > - *high = net->ipv4.ip_local_ports.range[1]; > - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); > -} > -EXPORT_SYMBOL(inet_get_local_port_range); > - > void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high) > { > const struct inet_sock *inet = inet_sk(sk); > const struct net *net = sock_net(sk); > int lo, hi, sk_lo, sk_hi; > + u32 sk_range; > > inet_get_local_port_range(net, &lo, &hi); > > - sk_lo = inet->local_port_range.lo; > - sk_hi = inet->local_port_range.hi; > + sk_range = READ_ONCE(inet->local_port_range); > + if (unlikely(sk_range)) { > + sk_lo = sk_range & 0xffff; > + sk_hi = sk_range >> 16; > > - if (unlikely(lo <= sk_lo && sk_lo <= hi)) > - lo = sk_lo; > - if (unlikely(lo <= sk_hi && sk_hi <= hi)) > - hi = sk_hi; > + if (unlikely(lo <= sk_lo && sk_lo <= hi)) > + lo = sk_lo; > + if (unlikely(lo <= sk_hi && sk_hi <= hi)) > + hi = sk_hi; > + } > > *low = lo; > *high = hi; > diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c > index 2efc53526a38..bf940fe249a5 100644 > --- a/net/ipv4/ip_sockglue.c > +++ b/net/ipv4/ip_sockglue.c > @@ -1055,6 +1055,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname, > case IP_TOS: /* This sets both TOS and Precedence */ > ip_sock_set_tos(sk, val); > return 0; > + > + case IP_LOCAL_PORT_RANGE: > + { > + const __u16 lo = val; > + const __u16 hi = val >> 16; I know that we use __u16 and __u32 already, but I think we should reserve them for exported fields in uapi. New code should use u16 and u32, no need for __ prefixes. > + > + if (optlen != sizeof(__u32)) > + return -EINVAL; > + if (lo != 0 && hi != 0 && lo > hi) > + return -EINVAL; > + > + WRITE_ONCE(inet->local_port_range, val); > + return 0; > + } >
Powered by blists - more mailing lists