[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <a4f5b61c9cd44eada41d8f09d3848806@AcuMS.aculab.com>
Date: Wed, 29 Nov 2023 19:26:02 +0000
From: David Laight <David.Laight@...LAB.COM>
To: "netdev@...r.kernel.org" <netdev@...r.kernel.org>, 'Jakub Kicinski'
<kuba@...nel.org>, "David S. Miller" <davem@...emloft.net>
CC: Stephen Hemminger <stephen@...workplumber.org>, Eric Dumazet
<edumazet@...gle.com>, 'David Ahern' <dsahern@...nel.org>, Paolo Abeni
<pabeni@...hat.com>, "'jakub@...udflare.com'" <jakub@...udflare.com>
Subject: [PATCH net-next] ipv4: Use READ/WRITE_ONCE() for IP local_port_range
Commit 227b60f5102cd added a seqlock to ensure that the low and high
port numbers were always updated together.
This is overkill because the two 16bit port numbers can be held in
a u32 and read/written in a single instruction.
More recently 91d0b78c5177f added support for finer per-socket limits.
The user-supplied value is 'high << 16 | low' but they are held
separately and the socket options protected by the socket lock.
Use a u32 containing 'high << 16 | low' for both the 'net' and 'sk'
fields and use READ_ONCE()/WRITE_ONCE() to ensure both values are
always updated together.
Change (the now trival) inet_get_local_port_range() to a static inline
to optimise the calling code.
(In particular avoiding returning integers by reference.)
Signed-off-by: David Laight <david.laight@...lab.com>
---
include/net/inet_sock.h | 5 +----
include/net/ip.h | 7 ++++++-
include/net/netns/ipv4.h | 3 +--
net/ipv4/af_inet.c | 4 +---
net/ipv4/inet_connection_sock.c | 29 ++++++++++------------------
net/ipv4/ip_sockglue.c | 34 ++++++++++++++++-----------------
net/ipv4/sysctl_net_ipv4.c | 12 ++++--------
7 files changed, 40 insertions(+), 54 deletions(-)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 74db6d97cae1..ebf71410aa2b 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -234,10 +234,7 @@ struct inet_sock {
int uc_index;
int mc_index;
__be32 mc_addr;
- struct {
- __u16 lo;
- __u16 hi;
- } local_port_range;
+ u32 local_port_range;
struct ip_mc_socklist __rcu *mc_list;
struct inet_cork_full cork;
diff --git a/include/net/ip.h b/include/net/ip.h
index 1fc4c8d69e33..154f9dd75fe6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -349,7 +349,12 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o
} \
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high);
+static inline void inet_get_local_port_range(const struct net *net, int *low, int *high)
+{
+ u32 range = READ_ONCE(net->ipv4.ip_local_ports.range);
+ *low = range & 0xffff;
+ *high = range >> 16;
+}
void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
#ifdef CONFIG_SYSCTL
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 73f43f699199..30ba5359da84 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -19,8 +19,7 @@ struct hlist_head;
struct fib_table;
struct sock;
struct local_ports {
- seqlock_t lock;
- int range[2];
+ u32 range; /* high << 16 | low */
bool warned;
};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fb81de10d332..b8964b40c3c0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1847,9 +1847,7 @@ static __net_init int inet_init_net(struct net *net)
/*
* Set defaults for local port range
*/
- seqlock_init(&net->ipv4.ip_local_ports.lock);
- net->ipv4.ip_local_ports.range[0] = 32768;
- net->ipv4.ip_local_ports.range[1] = 60999;
+ net->ipv4.ip_local_ports.range = 60999 << 16 | 32768;
seqlock_init(&net->ipv4.ping_group_range.lock);
/*
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 394a498c2823..1a45d41f8b39 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,34 +117,25 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high)
-{
- unsigned int seq;
-
- do {
- seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
-
- *low = net->ipv4.ip_local_ports.range[0];
- *high = net->ipv4.ip_local_ports.range[1];
- } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
-}
-EXPORT_SYMBOL(inet_get_local_port_range);
-
void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
{
const struct inet_sock *inet = inet_sk(sk);
const struct net *net = sock_net(sk);
int lo, hi, sk_lo, sk_hi;
+ u32 sk_range;
inet_get_local_port_range(net, &lo, &hi);
- sk_lo = inet->local_port_range.lo;
- sk_hi = inet->local_port_range.hi;
+ sk_range = READ_ONCE(inet->local_port_range);
+ if (unlikely(sk_range)) {
+ sk_lo = sk_range & 0xffff;
+ sk_hi = sk_range >> 16;
- if (unlikely(lo <= sk_lo && sk_lo <= hi))
- lo = sk_lo;
- if (unlikely(lo <= sk_hi && sk_hi <= hi))
- hi = sk_hi;
+ if (unlikely(lo <= sk_lo && sk_lo <= hi))
+ lo = sk_lo;
+ if (unlikely(lo <= sk_hi && sk_hi <= hi))
+ hi = sk_hi;
+ }
*low = lo;
*high = hi;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2efc53526a38..bf940fe249a5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1055,6 +1055,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_TOS: /* This sets both TOS and Precedence */
ip_sock_set_tos(sk, val);
return 0;
+
+ case IP_LOCAL_PORT_RANGE:
+ {
+ const __u16 lo = val;
+ const __u16 hi = val >> 16;
+
+ if (optlen != sizeof(__u32))
+ return -EINVAL;
+ if (lo != 0 && hi != 0 && lo > hi)
+ return -EINVAL;
+
+ WRITE_ONCE(inet->local_port_range, val);
+ return 0;
+ }
}
err = 0;
@@ -1332,20 +1346,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IP_LOCAL_PORT_RANGE:
- {
- const __u16 lo = val;
- const __u16 hi = val >> 16;
-
- if (optlen != sizeof(__u32))
- goto e_inval;
- if (lo != 0 && hi != 0 && lo > hi)
- goto e_inval;
-
- inet->local_port_range.lo = lo;
- inet->local_port_range.hi = hi;
- break;
- }
default:
err = -ENOPROTOOPT;
break;
@@ -1692,6 +1692,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
+ case IP_LOCAL_PORT_RANGE:
+ val = READ_ONCE(inet->local_port_range);
+ goto copyval;
}
if (needs_rtnl)
@@ -1721,9 +1724,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
else
err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- case IP_LOCAL_PORT_RANGE:
- val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
- break;
case IP_PROTOCOL:
val = inet_sk(sk)->inet_num;
break;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f63a545a7374..b008b6b5e85d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -54,22 +54,18 @@ static void set_local_port_range(struct net *net, int range[2])
{
bool same_parity = !((range[0] ^ range[1]) & 1);
- write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
if (same_parity && !net->ipv4.ip_local_ports.warned) {
net->ipv4.ip_local_ports.warned = true;
pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
}
- net->ipv4.ip_local_ports.range[0] = range[0];
- net->ipv4.ip_local_ports.range[1] = range[1];
- write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
+ WRITE_ONCE(net->ipv4.ip_local_ports.range, range[1] << 16 | range[0]);
}
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net =
- container_of(table->data, struct net, ipv4.ip_local_ports.range);
+ struct net *net = table->data;
int ret;
int range[2];
struct ctl_table tmp = {
@@ -733,8 +729,8 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "ip_local_port_range",
- .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
- .data = &init_net.ipv4.ip_local_ports.range,
+ .maxlen = 0,
+ .data = &init_net,
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
--
2.17.1
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Powered by blists - more mailing lists