[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20110322101648.GB3210@psychotron.brq.redhat.com>
Date: Tue, 22 Mar 2011 11:16:48 +0100
From: Jiri Pirko <jpirko@...hat.com>
To: Nicolas de Pesloüan
<nicolas.2p.debian@...il.com>
Cc: Jay Vosburgh <fubar@...ibm.com>,
Andy Gospodarek <andy@...yhouse.net>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>
Subject: Re: oops / kernel panic in bonding.
Sun, Mar 20, 2011 at 10:19:21PM CET, nicolas.2p.debian@...il.com wrote:
>Le 20/03/2011 21:17, Nicolas de Pesloüan a écrit :
>>Hi Jiri,
>>
>>I suspect we have a race condition somewhere in the new
>>bond_handle_frame function:
>>
>>The following commands produce one of the following errors:
>>
>>modprobe bonding max_bonds=0
>>echo +bond0>/sys/class/net/bonding_masters
>>echo +bond1>/sys/class/net/bonding_masters
>>echo +eth1>/sys/class/net/bond1/bonding/slaves
>>
>>This is mostly reproducible, under VirtualBox.
>>
>>All tests done with 08351fc6a75731226e1112fc7254542bd3a2912e at the top
>>commit (current net-next-2.6).
>
>I suspect netdev_rx_handler_register is called too early in bond_enslave.
>
>I think it should be the last thing we do in bond_enslave, if we
>don't want to face the risk to have bond_handle_frame being called
>before everything is properly setup.
>
> Nicolas.
Nicolas, would you please give the following patch a drive?
Signed-off-by: Jiri Pirko <jpirko@...hat.com>
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1a6e9eb..c339eb1 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1482,21 +1482,16 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
{
struct sk_buff *skb = *pskb;
struct slave *slave;
- struct net_device *bond_dev;
struct bonding *bond;
- slave = bond_slave_get_rcu(skb->dev);
- bond_dev = ACCESS_ONCE(slave->dev->master);
- if (unlikely(!bond_dev))
- return RX_HANDLER_PASS;
-
skb = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
return RX_HANDLER_CONSUMED;
*pskb = skb;
- bond = netdev_priv(bond_dev);
+ slave = bond_slave_get_rcu(skb->dev);
+ bond = slave->bond;
if (bond->params.arp_interval)
slave->dev->last_rx = jiffies;
@@ -1505,10 +1500,10 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_EXACT;
}
- skb->dev = bond_dev;
+ skb->dev = bond->dev;
if (bond->params.mode == BOND_MODE_ALB &&
- bond_dev->priv_flags & IFF_BRIDGE_PORT &&
+ bond->dev->priv_flags & IFF_BRIDGE_PORT &&
skb->pkt_type == PACKET_HOST) {
if (unlikely(skb_cow_head(skb,
@@ -1516,7 +1511,7 @@ static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
kfree_skb(skb);
return RX_HANDLER_CONSUMED;
}
- memcpy(eth_hdr(skb)->h_dest, bond_dev->dev_addr, ETH_ALEN);
+ memcpy(eth_hdr(skb)->h_dest, bond->dev->dev_addr, ETH_ALEN);
}
return RX_HANDLER_ANOTHER;
@@ -1698,20 +1693,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
pr_debug("Error %d calling netdev_set_bond_master\n", res);
goto err_restore_mac;
}
- res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
- new_slave);
- if (res) {
- pr_debug("Error %d calling netdev_rx_handler_register\n", res);
- goto err_unset_master;
- }
/* open the slave since the application closed it */
res = dev_open(slave_dev);
if (res) {
pr_debug("Opening slave %s failed\n", slave_dev->name);
- goto err_unreg_rxhandler;
+ goto err_unset_master;
}
+ new_slave->bond = bond;
new_slave->dev = slave_dev;
slave_dev->priv_flags |= IFF_BONDING;
@@ -1907,6 +1897,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (res)
goto err_close;
+ res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
+ new_slave);
+ if (res) {
+ pr_debug("Error %d calling netdev_rx_handler_register\n", res);
+ goto err_dest_symlinks;
+ }
+
pr_info("%s: enslaving %s as a%s interface with a%s link.\n",
bond_dev->name, slave_dev->name,
bond_is_active_slave(new_slave) ? "n active" : " backup",
@@ -1916,13 +1913,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
return 0;
/* Undo stages on error */
+err_dest_symlinks:
+ bond_destroy_slave_symlinks(bond_dev, slave_dev);
+
err_close:
dev_close(slave_dev);
-err_unreg_rxhandler:
- netdev_rx_handler_unregister(slave_dev);
- synchronize_net();
-
err_unset_master:
netdev_set_bond_master(slave_dev, NULL);
@@ -1988,6 +1984,12 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
return -EINVAL;
}
+ /* unregister rx_handler early so bond_handle_frame wouldn't be called
+ * for this slave anymore.
+ */
+ netdev_rx_handler_unregister(slave_dev);
+ synchronize_net();
+
if (!bond->params.fail_over_mac) {
if (!compare_ether_addr(bond_dev->dev_addr, slave->perm_hwaddr) &&
bond->slave_cnt > 1)
@@ -2104,8 +2106,6 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
netif_addr_unlock_bh(bond_dev);
}
- netdev_rx_handler_unregister(slave_dev);
- synchronize_net();
netdev_set_bond_master(slave_dev, NULL);
slave_disable_netpoll(slave);
@@ -2171,13 +2171,20 @@ static int bond_release_all(struct net_device *bond_dev)
bond_change_active_slave(bond, NULL);
while ((slave = bond->first_slave) != NULL) {
+ slave_dev = slave->dev;
+
+ /* unregister rx_handler early so bond_handle_frame wouldn't
+ * be called for this slave anymore.
+ */
+ netdev_rx_handler_unregister(slave_dev);
+ synchronize_net();
+
/* Inform AD package of unbinding of slave
* before slave is detached from the list.
*/
if (bond->params.mode == BOND_MODE_8023AD)
bond_3ad_unbind_slave(slave);
- slave_dev = slave->dev;
bond_detach_slave(bond, slave);
/* now that the slave is detached, unlock and perform
@@ -2217,8 +2224,6 @@ static int bond_release_all(struct net_device *bond_dev)
netif_addr_unlock_bh(bond_dev);
}
- netdev_rx_handler_unregister(slave_dev);
- synchronize_net();
netdev_set_bond_master(slave_dev, NULL);
slave_disable_netpoll(slave);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 6b26962..90736cb 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -187,6 +187,7 @@ struct slave {
struct net_device *dev; /* first - useful for panic debug */
struct slave *next;
struct slave *prev;
+ struct bonding *bond; /* our master */
int delay;
unsigned long jiffies;
unsigned long last_arp_rx;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists