[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230912160212.3467976-2-edumazet@google.com>
Date: Tue, 12 Sep 2023 16:01:59 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: David Ahern <dsahern@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com,
Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next 01/14] ipv6: lockless IPV6_UNICAST_HOPS implementation
Some np->hop_limit accesses are racy, when socket lock is not held.
Add missing annotations and switch to full lockless implementation.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/linux/ipv6.h | 12 +-----------
include/net/ipv6.h | 2 +-
net/ipv6/ip6_output.c | 2 +-
net/ipv6/ipv6_sockglue.c | 20 +++++++++++---------
net/ipv6/mcast.c | 2 +-
net/ipv6/ndisc.c | 2 +-
6 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index af8a771a053c51eed297516f927a5fd003315ef4..c2e0870713849fbbf1a8ec2d60cca80caab0cb98 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -213,17 +213,7 @@ struct ipv6_pinfo {
__be32 flow_label;
__u32 frag_size;
- /*
- * Packed in 16bits.
- * Omit one shift by putting the signed field at MSB.
- */
-#if defined(__BIG_ENDIAN_BITFIELD)
- __s16 hop_limit:9;
- __u16 __unused_1:7;
-#else
- __u16 __unused_1:7;
- __s16 hop_limit:9;
-#endif
+ s16 hop_limit;
#if defined(__BIG_ENDIAN_BITFIELD)
/* Packed in 16bits. */
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 0675be0f3fa0efc55575bb5b2569dc8a1dbb9f24..61007db0036482e27121747add0eec77f912b54a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -911,7 +911,7 @@ static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6,
if (ipv6_addr_is_multicast(&fl6->daddr))
hlimit = np->mcast_hops;
else
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
return hlimit;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54fc4c711f2c545f2ca625d6b0e09f2bb8e6d513..1e16d56d8c38ac51bd999038ae4e8478bf2f5f8c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -309,7 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* Fill in the IPv6 header
*/
if (np)
- hlimit = np->hop_limit;
+ hlimit = READ_ONCE(np->hop_limit);
if (hlimit < 0)
hlimit = ip6_dst_hoplimit(dst);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0e2a0847b387f0f6f50211b89f92ac1e00a0b07a..f27993a1470dddd876f34f65c1f171c576eca272 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -415,6 +415,16 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (ip6_mroute_opt(optname))
return ip6_mroute_setsockopt(sk, optname, optval, optlen);
+ /* Handle options that can be set without locking the socket. */
+ switch (optname) {
+ case IPV6_UNICAST_HOPS:
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val > 255 || val < -1)
+ return -EINVAL;
+ WRITE_ONCE(np->hop_limit, val);
+ return 0;
+ }
if (needs_rtnl)
rtnl_lock();
sockopt_lock_sock(sk);
@@ -733,14 +743,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
}
break;
}
- case IPV6_UNICAST_HOPS:
- if (optlen < sizeof(int))
- goto e_inval;
- if (val > 255 || val < -1)
- goto e_inval;
- np->hop_limit = val;
- retv = 0;
- break;
case IPV6_MULTICAST_HOPS:
if (sk->sk_type == SOCK_STREAM)
@@ -1347,7 +1349,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
struct dst_entry *dst;
if (optname == IPV6_UNICAST_HOPS)
- val = np->hop_limit;
+ val = READ_ONCE(np->hop_limit);
else
val = np->mcast_hops;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ce25bcb9974de97f26635d0d3d54695af3070a7..6a33a50687bcf7201e75574f03e619fe89636068 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1716,7 +1716,7 @@ static void ip6_mc_hdr(const struct sock *sk, struct sk_buff *skb,
hdr->payload_len = htons(len);
hdr->nexthdr = proto;
- hdr->hop_limit = inet6_sk(sk)->hop_limit;
+ hdr->hop_limit = READ_ONCE(inet6_sk(sk)->hop_limit);
hdr->saddr = *saddr;
hdr->daddr = *daddr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 553c8664e0a7a37d7858393ab6a30616ab13a3bf..b554fd40bdc3787eb3bafa1d9923076d6078217e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -500,7 +500,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr,
csum_partial(icmp6h,
skb->len, 0));
- ip6_nd_hdr(skb, saddr, daddr, inet6_sk(sk)->hop_limit, skb->len);
+ ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len);
rcu_read_lock();
idev = __in6_dev_get(dst->dev);
--
2.42.0.283.g2d96d420d3-goog
Powered by blists - more mailing lists