[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1519752620.2760.10.camel@redhat.com>
Date: Tue, 27 Feb 2018 18:30:20 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: "David S. Miller" <davem@...emloft.net>
Cc: Mahesh Bandewar <maheshb@...gle.com>, Jiri Benc <jbenc@...hat.com>,
netdev@...r.kernel.org
Subject: Re: [PATCH net-next] ipvlan: use per device spinlock to protect
addrs list updates
On Mon, 2018-02-26 at 22:42 +0100, Paolo Abeni wrote:
> This changeset moves ipvlan address under RCU protection, using
> a per ipvlan device spinlock to protect list mutation and RCU
> read access to protect list traversal.
>
> Also explicitly use RCU read lock to traverse the per port
> ipvlans list, so that we can now perform a full address lookup
> without asserting the RTNL lock.
>
> Overall this allows the ipvlan driver to check fully for duplicate
> addresses - before this commit ipv6 addresses assigned by autoconf
> via prefix delegation where accepted without any check - and avoid
> the following rntl assertion failure still in the same code path:
>
> RTNL: assertion failed at drivers/net/ipvlan/ipvlan_core.c (124)
> WARNING: CPU: 15 PID: 0 at drivers/net/ipvlan/ipvlan_core.c:124 ipvlan_addr_busy+0x97/0xa0 [ipvlan]
> Modules linked in: ipvlan(E) ixgbe
> CPU: 15 PID: 0 Comm: swapper/15 Tainted: G E 4.16.0-rc2.ipvlan+ #1782
> Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016
> RIP: 0010:ipvlan_addr_busy+0x97/0xa0 [ipvlan]
> RSP: 0018:ffff881ff9e03768 EFLAGS: 00010286
> RAX: 0000000000000000 RBX: ffff881fdf2a9000 RCX: 0000000000000000
> RDX: 0000000000000001 RSI: 00000000000000f6 RDI: 0000000000000300
> RBP: ffff881fdf2a8000 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000001 R11: ffff881ff9e034c0 R12: ffff881fe07bcc00
> R13: 0000000000000001 R14: ffffffffa02002b0 R15: 0000000000000001
> FS: 0000000000000000(0000) GS:ffff881ff9e00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007fc5c1a4f248 CR3: 000000207e012005 CR4: 00000000001606e0
> Call Trace:
> <IRQ>
> ipvlan_addr6_event+0x6c/0xd0 [ipvlan]
> notifier_call_chain+0x49/0x90
> atomic_notifier_call_chain+0x6a/0x100
> ipv6_add_addr+0x5f9/0x720
> addrconf_prefix_rcv_add_addr+0x244/0x3c0
> addrconf_prefix_rcv+0x2f3/0x790
> ndisc_router_discovery+0x633/0xb70
> ndisc_rcv+0x155/0x180
> icmpv6_rcv+0x4ac/0x5f0
> ip6_input_finish+0x138/0x6a0
> ip6_input+0x41/0x1f0
> ipv6_rcv+0x4db/0x8d0
> __netif_receive_skb_core+0x3d5/0xe40
> netif_receive_skb_internal+0x89/0x370
> napi_gro_receive+0x14f/0x1e0
> ixgbe_clean_rx_irq+0x4ce/0x1020 [ixgbe]
> ixgbe_poll+0x31a/0x7a0 [ixgbe]
> net_rx_action+0x296/0x4f0
> __do_softirq+0xcf/0x4f5
> irq_exit+0xf5/0x110
> do_IRQ+0x62/0x110
> common_interrupt+0x91/0x91
> </IRQ>
>
> Fixes: e9997c2938b2 ("ipvlan: fix check for IP addresses in control path")
> Signed-off-by: Paolo Abeni <pabeni@...hat.com>
> ---
> drivers/net/ipvlan/ipvlan.h | 1 +
> drivers/net/ipvlan/ipvlan_core.c | 30 +++++++++++++--------
> drivers/net/ipvlan/ipvlan_main.c | 56 ++++++++++++++++++++++++++--------------
> 3 files changed, 56 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
> index 5166575a164d..a115f12bf130 100644
> --- a/drivers/net/ipvlan/ipvlan.h
> +++ b/drivers/net/ipvlan/ipvlan.h
> @@ -74,6 +74,7 @@ struct ipvl_dev {
> DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
> netdev_features_t sfeatures;
> u32 msg_enable;
> + spinlock_t addrs_lock;
> };
>
> struct ipvl_addr {
> diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
> index 1b5dc200b573..c1781e698b0b 100644
> --- a/drivers/net/ipvlan/ipvlan_core.c
> +++ b/drivers/net/ipvlan/ipvlan_core.c
> @@ -109,25 +109,33 @@ void ipvlan_ht_addr_del(struct ipvl_addr *addr)
> struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
> const void *iaddr, bool is_v6)
> {
> - struct ipvl_addr *addr;
> + struct ipvl_addr *addr, *ret = NULL;
>
> - list_for_each_entry(addr, &ipvlan->addrs, anode)
> - if (addr_equal(is_v6, addr, iaddr))
> - return addr;
> - return NULL;
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode) {
> + if (addr_equal(is_v6, addr, iaddr)) {
> + ret = addr;
> + break;
> + }
> + }
> + rcu_read_unlock();
> + return ret;
> }
>
> bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
> {
> struct ipvl_dev *ipvlan;
> + bool ret = false;
>
> - ASSERT_RTNL();
> -
> - list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
> - if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
> - return true;
> + rcu_read_lock();
> + list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
> + if (ipvlan_find_addr(ipvlan, iaddr, is_v6)) {
> + ret = true;
> + break;
> + }
> }
> - return false;
> + rcu_read_unlock();
> + return ret;
> }
>
> static void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
> diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
> index 67c91ceda979..45b50c778204 100644
> --- a/drivers/net/ipvlan/ipvlan_main.c
> +++ b/drivers/net/ipvlan/ipvlan_main.c
> @@ -227,8 +227,10 @@ static int ipvlan_open(struct net_device *dev)
> else
> dev->flags &= ~IFF_NOARP;
>
> - list_for_each_entry(addr, &ipvlan->addrs, anode)
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
> ipvlan_ht_addr_add(ipvlan, addr);
> + rcu_read_unlock();
>
> return dev_uc_add(phy_dev, phy_dev->dev_addr);
> }
> @@ -244,8 +246,10 @@ static int ipvlan_stop(struct net_device *dev)
>
> dev_uc_del(phy_dev, phy_dev->dev_addr);
>
> - list_for_each_entry(addr, &ipvlan->addrs, anode)
> + rcu_read_lock();
> + list_for_each_entry_rcu(addr, &ipvlan->addrs, anode)
> ipvlan_ht_addr_del(addr);
> + rcu_read_unlock();
>
> return 0;
> }
> @@ -588,6 +592,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
> ipvlan->sfeatures = IPVLAN_FEATURES;
> ipvlan_adjust_mtu(ipvlan, phy_dev);
> INIT_LIST_HEAD(&ipvlan->addrs);
> + spin_lock_init(&ipvlan->addrs_lock);
>
> /* TODO Probably put random address here to be presented to the
> * world but keep using the physical-dev address for the outgoing
> @@ -665,11 +670,13 @@ void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
> struct ipvl_dev *ipvlan = netdev_priv(dev);
> struct ipvl_addr *addr, *next;
>
> + spin_lock_bh(&ipvlan->addrs_lock);
> list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
> ipvlan_ht_addr_del(addr);
> - list_del(&addr->anode);
> + list_del_rcu(&addr->anode);
> kfree_rcu(addr, rcu);
> }
> + spin_unlock_bh(&ipvlan->addrs_lock);
>
> ida_simple_remove(&ipvlan->port->ida, dev->dev_id);
> list_del_rcu(&ipvlan->pnode);
> @@ -760,8 +767,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
> if (dev->reg_state != NETREG_UNREGISTERING)
> break;
>
> - list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
> - pnode)
> + list_for_each_entry_safe(ipvlan, next, &port->ipvlans, pnode)
> ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
> &lst_kill);
> unregister_netdevice_many(&lst_kill);
> @@ -793,6 +799,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
> return NOTIFY_DONE;
> }
>
> +/* the caller must held the addrs lock */
> static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
> {
> struct ipvl_addr *addr;
> @@ -811,7 +818,8 @@ static int ipvlan_add_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
> addr->atype = IPVL_IPV6;
> #endif
> }
> - list_add_tail(&addr->anode, &ipvlan->addrs);
> +
> + list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
>
> /* If the interface is not up, the address will be added to the hash
> * list by ipvlan_open.
> @@ -826,15 +834,17 @@ static void ipvlan_del_addr(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
> {
> struct ipvl_addr *addr;
>
> + spin_lock_bh(&ipvlan->addrs_lock);
> addr = ipvlan_find_addr(ipvlan, iaddr, is_v6);
> - if (!addr)
> + if (!addr) {
> + spin_unlock_bh(&ipvlan->addrs_lock);
> return;
> + }
>
> ipvlan_ht_addr_del(addr);
> - list_del(&addr->anode);
> + list_del_rcu(&addr->anode);
> + spin_unlock_bh(&ipvlan->addrs_lock);
> kfree_rcu(addr, rcu);
> -
> - return;
> }
>
> static bool ipvlan_is_valid_dev(const struct net_device *dev)
> @@ -853,14 +863,17 @@ static bool ipvlan_is_valid_dev(const struct net_device *dev)
> #if IS_ENABLED(CONFIG_IPV6)
> static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
> {
> - if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
> + int ret = -EINVAL;
> +
> + spin_lock_bh(&ipvlan->addrs_lock);
> + if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true))
> netif_err(ipvlan, ifup, ipvlan->dev,
> "Failed to add IPv6=%pI6c addr for %s intf\n",
> ip6_addr, ipvlan->dev->name);
> - return -EINVAL;
> - }
> -
> - return ipvlan_add_addr(ipvlan, ip6_addr, true);
> + else
> + ret = ipvlan_add_addr(ipvlan, ip6_addr, true);
> + spin_unlock_bh(&ipvlan->addrs_lock);
> + return ret;
> }
>
> static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
> @@ -922,14 +935,17 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
>
> static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
> {
> - if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
> + int ret = -EINVAL;
> +
> + spin_lock_bh(&ipvlan->addrs_lock);
> + if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false))
> netif_err(ipvlan, ifup, ipvlan->dev,
> "Failed to add IPv4=%pI4 on %s intf.\n",
> ip4_addr, ipvlan->dev->name);
> - return -EINVAL;
> - }
> -
> - return ipvlan_add_addr(ipvlan, ip4_addr, false);
> + else
> + ret = ipvlan_add_addr(ipvlan, ip4_addr, false);
> + spin_unlock_bh(&ipvlan->addrs_lock);
> + return ret;
> }
>
> static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
Oops... I forgot to drop the now unneeded 'if (in_softirq())' ... in
ipvlan_addr6_validator_event(). I'll send a v2.
Also I explicitly targeted net-next, despite the 'Fixes' tag, as this
feels a little too invasive for a -net patch, it mostly
extends/completes an existing feature - ipv6 duplicate addresses
checking - and a net-next patch will avoid several conflict on the next
merge from -net.
Please advise if you prefer otherwise, thanks!
Paolo
Powered by blists - more mailing lists