[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iJvxPyYprrUjdD3JOkU-nEcEwmMDiSF0izXHkfi5MLYyw@mail.gmail.com>
Date: Thu, 26 Jun 2025 07:54:14 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: Kuniyuki Iwashima <kuni1840@...il.com>
Cc: "David S. Miller" <davem@...emloft.net>, David Ahern <dsahern@...nel.org>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
Kuniyuki Iwashima <kuniyu@...gle.com>, netdev@...r.kernel.org
Subject: Re: [PATCH v2 net-next 14/15] ipv6: anycast: Don't hold RTNL for IPV6_JOIN_ANYCAST.
On Tue, Jun 24, 2025 at 1:26 PM Kuniyuki Iwashima <kuni1840@...il.com> wrote:
>
> From: Kuniyuki Iwashima <kuniyu@...gle.com>
>
> inet6_sk(sk)->ipv6_ac_list is protected by lock_sock().
>
> In ipv6_sock_ac_join(), only __dev_get_by_index(), __dev_get_by_flags(),
> and __in6_dev_get() require RTNL.
>
> __dev_get_by_flags() is only used by ipv6_sock_ac_join() and can be
> converted to RCU version.
>
> Let's replace RCU version helper and drop RTNL from IPV6_JOIN_ANYCAST.
>
> setsockopt_needs_rtnl() will be removed in the next patch.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@...gle.com>
> ---
> v2: Hold rcu_read_lock() around rt6_lookup & dev_hold()
> ---
> include/linux/netdevice.h | 4 ++--
> net/core/dev.c | 38 ++++++++++++++++++--------------------
> net/ipv6/anycast.c | 20 +++++++++++++-------
> net/ipv6/ipv6_sockglue.c | 4 ----
> 4 files changed, 33 insertions(+), 33 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 03c26bb0fbbe..68f874a58c92 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3339,8 +3339,8 @@ int dev_get_iflink(const struct net_device *dev);
> int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
> int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
> struct net_device_path_stack *stack);
> -struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
> - unsigned short mask);
> +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
> + unsigned short mask);
> struct net_device *dev_get_by_name(struct net *net, const char *name);
> struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
> struct net_device *__dev_get_by_name(struct net *net, const char *name);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 7ee808eb068e..553c654e6f77 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1267,33 +1267,31 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
> EXPORT_SYMBOL(dev_getfirstbyhwtype);
>
> /**
> - * __dev_get_by_flags - find any device with given flags
> - * @net: the applicable net namespace
> - * @if_flags: IFF_* values
> - * @mask: bitmask of bits in if_flags to check
> + * dev_get_by_flags_rcu - find any device with given flags
> + * @net: the applicable net namespace
> + * @if_flags: IFF_* values
> + * @mask: bitmask of bits in if_flags to check
> *
> - * Search for any interface with the given flags. Returns NULL if a device
> - * is not found or a pointer to the device. Must be called inside
> - * rtnl_lock(), and result refcount is unchanged.
> + * Search for any interface with the given flags.
> + *
> + * Context: rcu_read_lock() must be held.
> + * Returns: NULL if a device is not found or a pointer to the device.
> */
> -
> -struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
> - unsigned short mask)
> +struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
> + unsigned short mask)
> {
> - struct net_device *dev, *ret;
> -
> - ASSERT_RTNL();
> + struct net_device *dev;
>
> - ret = NULL;
> - for_each_netdev(net, dev) {
> - if (((dev->flags ^ if_flags) & mask) == 0) {
> - ret = dev;
> - break;
> + for_each_netdev_rcu(net, dev) {
> + if (((READ_ONCE(dev->flags) ^ if_flags) & mask) == 0) {
> + dev_hold(dev);
> + return dev;
> }
> }
> - return ret;
> +
> + return NULL;
> }
> -EXPORT_SYMBOL(__dev_get_by_flags);
> +EXPORT_IPV6_MOD(dev_get_by_flags_rcu);
>
> /**
> * dev_valid_name - check if name is okay for network device
> diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
> index e0a1f9d7622c..427fa95018b7 100644
> --- a/net/ipv6/anycast.c
> +++ b/net/ipv6/anycast.c
> @@ -73,15 +73,13 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
> struct inet6_dev *idev;
> int err = 0, ishost;
>
> - ASSERT_RTNL();
> -
> if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
> return -EPERM;
> if (ipv6_addr_is_multicast(addr))
> return -EINVAL;
>
> if (ifindex)
> - dev = __dev_get_by_index(net, ifindex);
> + dev = dev_get_by_index(net, ifindex);
>
> if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) {
> err = -EINVAL;
> @@ -102,18 +100,22 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
> if (ifindex == 0) {
> struct rt6_info *rt;
>
> + rcu_read_lock();
> rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
> if (rt) {
> dev = rt->dst.dev;
READ_ONCE(rt->dst.dev)
Reviewed-by: Eric Dumazet <edumazet@...gle.com>
> + dev_hold(dev);
> ip6_rt_put(rt);
> } else if (ishost) {
> + rcu_read_unlock();
> err = -EADDRNOTAVAIL;
> goto error;
> } else {
> /* router, no matching interface: just pick one */
> - dev = __dev_get_by_flags(net, IFF_UP,
> - IFF_UP | IFF_LOOPBACK);
> + dev = dev_get_by_flags_rcu(net, IFF_UP,
> + IFF_UP | IFF_LOOPBACK);
> }
> + rcu_read_unlock();
> }
>
> if (!dev) {
> @@ -121,7 +123,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
> goto error;
> }
>
> - idev = __in6_dev_get(dev);
> + idev = in6_dev_get(dev);
> if (!idev) {
> if (ifindex)
> err = -ENODEV;
> @@ -143,7 +145,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
> if (ishost)
> err = -EADDRNOTAVAIL;
> if (err)
> - goto error;
> + goto error_idev;
> }
>
> err = __ipv6_dev_ac_inc(idev, addr);
> @@ -153,7 +155,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
> pac = NULL;
> }
>
> +error_idev:
> + in6_dev_put(idev);
> error:
> + dev_put(dev);
> +
> if (pac)
> sock_kfree_s(sk, pac, sizeof(*pac));
> return err;
> diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
> index 3d891aa6e7f5..702dc33e50ad 100644
> --- a/net/ipv6/ipv6_sockglue.c
> +++ b/net/ipv6/ipv6_sockglue.c
> @@ -119,10 +119,6 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
>
> static bool setsockopt_needs_rtnl(int optname)
> {
> - switch (optname) {
> - case IPV6_JOIN_ANYCAST:
> - return true;
> - }
> return false;
> }
>
> --
> 2.49.0
>
Powered by blists - more mailing lists