[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090928220020.GC4436@gospo.rdu.redhat.com>
Date: Mon, 28 Sep 2009 18:00:20 -0400
From: Andy Gospodarek <andy@...yhouse.net>
To: Jay Vosburgh <fubar@...ibm.com>, netdev@...r.kernel.org,
bonding-devel@...ts.sourceforge.net
Subject: Re: [PATCH 2/4 v3] bonding: make sure tx and rx hash tables stay
in sync when using alb mode
On Fri, Sep 18, 2009 at 11:56:45AM -0400, Andy Gospodarek wrote:
> On Fri, Sep 18, 2009 at 11:36:22AM -0400, Andy Gospodarek wrote:
> > On Wed, Sep 16, 2009 at 04:36:09PM -0700, Jay Vosburgh wrote:
> > > Andy Gospodarek <andy@...yhouse.net> wrote:
> > >
> > > >
> > > >Subject: [PATCH] bonding: make sure tx and rx hash tables stay in sync when using alb mode
> > >
> > > When testing this, I'm getting a lockdep warning. It appears to
> > > be unhappy that tlb_choose_channel acquires the tx / rx hash table locks
> > > in the order tx then rx, but rlb_choose_channel -> alb_get_best_slave
> > > acquires the locks in the other order. I applied all four patches, but
> > > it looks like the change that trips lockdep is in this patch (#2).
> > >
> > > I haven't gotten an actual deadlock from this, although it seems
> > > plausible if there are two cpus in bond_alb_xmit at the same time, and
> > > one of them is sending an ARP.
> > >
> > > One fairly straightforward fix would be to combine the rx and tx
> > > hash table locks into a single lock. I suspect that wouldn't have any
> > > real performance penalty, since the rx hash table lock is generally not
> > > acquired very often (unlike the tx lock, which is taken for every packet
> > > that goes out).
> > >
> > > Also, FYI, two of the four patches had trailing whitespace. I
> > > believe it was #2 and #4.
> > >
> > > Thoughts?
> >
> > Jay,
> >
> > This patch should address both the the deadlock and whitespace conerns.
> > I ran a kernel with LOCKDEP enabled and saw no warnings while passing
> > traffic on the bond while pulling cables and while removing the module.
> > Here it is....
> >
>
> Adding the version and signed-off-by lines might be nice, eh?
>
> [PATCH v3] bonding: make sure tx and rx hash tables stay in sync when using alb mode
>
> I noticed that it was easy for alb (mode 6) bonding to get into a state
> where the tx hash-table and rx hash-table are out of sync (there is
> really nothing to keep them synchronized), and we will transmit traffic
> destined for a host on one slave and send ARP frames to the same slave
> from another interface using a different source MAC.
>
> There is no compelling reason to do this, so this patch makes sure the
> rx hash-table changes whenever the tx hash-table is updated based on
> device load. This patch also drops the code that does rlb re-balancing
> since the balancing will not be controlled by the tx hash-table based on
> transmit load. In order to address an issue found with the initial
> patch, I have also combined the rx and tx hash table lock into a single
> lock. This will facilitate moving these into a single table at some
> point.
>
> Signed-off-by: Andy Gospodarek <andy@...yhouse.net>
>
> ---
> drivers/net/bonding/bond_alb.c | 203 +++++++++++++++-------------------------
> drivers/net/bonding/bond_alb.h | 3 +-
> 2 files changed, 75 insertions(+), 131 deletions(-)
>
> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
> index bcf25c6..04b7055 100644
> --- a/drivers/net/bonding/bond_alb.c
> +++ b/drivers/net/bonding/bond_alb.c
> @@ -111,6 +111,7 @@ static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
>
> /* Forward declaration */
> static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index);
>
> static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> {
> @@ -124,18 +125,18 @@ static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
> return hash;
> }
>
> -/*********************** tlb specific functions ***************************/
> -
> -static inline void _lock_tx_hashtbl(struct bonding *bond)
> +/********************* hash table lock functions *************************/
> +static inline void _lock_hashtbl(struct bonding *bond)
> {
> - spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> + spin_lock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
> }
>
> -static inline void _unlock_tx_hashtbl(struct bonding *bond)
> +static inline void _unlock_hashtbl(struct bonding *bond)
> {
> - spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
> + spin_unlock_bh(&(BOND_ALB_INFO(bond).hashtbl_lock));
> }
>
> +/*********************** tlb specific functions ***************************/
> /* Caller must hold tx_hashtbl lock */
> static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
> {
> @@ -163,7 +164,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
> struct tlb_client_info *tx_hash_table;
> u32 index;
>
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> /* clear slave from tx_hashtbl */
> tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
> @@ -180,7 +181,7 @@ static void tlb_clear_slave(struct bonding *bond, struct slave *slave, int save_
>
> tlb_init_slave(slave);
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* Must be called before starting the monitor timer */
> @@ -191,7 +192,7 @@ static int tlb_initialize(struct bonding *bond)
> struct tlb_client_info *new_hashtbl;
> int i;
>
> - spin_lock_init(&(bond_info->tx_hashtbl_lock));
> + spin_lock_init(&(bond_info->hashtbl_lock));
>
> new_hashtbl = kzalloc(size, GFP_KERNEL);
> if (!new_hashtbl) {
> @@ -200,7 +201,7 @@ static int tlb_initialize(struct bonding *bond)
> bond->dev->name);
> return -1;
> }
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> bond_info->tx_hashtbl = new_hashtbl;
>
> @@ -208,7 +209,7 @@ static int tlb_initialize(struct bonding *bond)
> tlb_init_table_entry(&bond_info->tx_hashtbl[i], 1);
> }
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> return 0;
> }
> @@ -218,12 +219,12 @@ static void tlb_deinitialize(struct bonding *bond)
> {
> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> kfree(bond_info->tx_hashtbl);
> bond_info->tx_hashtbl = NULL;
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* Caller must hold bond lock for read */
> @@ -264,24 +265,6 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
> return least_loaded;
> }
>
> -/* Caller must hold bond lock for read and hashtbl lock */
> -static struct slave *tlb_get_best_slave(struct bonding *bond, u32 hash_index)
> -{
> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> - struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
> - struct slave *last_slave = tx_hash_table[hash_index].last_slave;
> - struct slave *next_slave = NULL;
> -
> - if (last_slave && SLAVE_IS_OK(last_slave)) {
> - /* Use the last slave listed in the tx hashtbl if:
> - the last slave currently is essentially unloaded. */
> - if (SLAVE_TLB_INFO(last_slave).load < 10)
> - next_slave = last_slave;
> - }
> -
> - return next_slave ? next_slave : tlb_get_least_loaded_slave(bond);
> -}
> -
> /* Caller must hold bond lock for read */
> static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u32 skb_len)
> {
> @@ -289,13 +272,12 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> struct tlb_client_info *hash_table;
> struct slave *assigned_slave;
>
> - _lock_tx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_table = bond_info->tx_hashtbl;
> assigned_slave = hash_table[hash_index].tx_slave;
> if (!assigned_slave) {
> - assigned_slave = tlb_get_best_slave(bond, hash_index);
> -
> + assigned_slave = alb_get_best_slave(bond, hash_index);
> if (assigned_slave) {
> struct tlb_slave_info *slave_info =
> &(SLAVE_TLB_INFO(assigned_slave));
> @@ -319,20 +301,52 @@ static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index, u3
> hash_table[hash_index].tx_bytes += skb_len;
> }
>
> - _unlock_tx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> return assigned_slave;
> }
>
> /*********************** rlb specific functions ***************************/
> -static inline void _lock_rx_hashtbl(struct bonding *bond)
> +
> +/* Caller must hold bond lock for read and hashtbl lock */
> +static struct slave *rlb_update_rx_table(struct bonding *bond, struct slave *next_slave, u32 hash_index)
> {
> - spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> +
> + /* check rlb table and correct it if wrong */
> + if (bond_info->rlb_enabled) {
> + struct rlb_client_info *rx_client_info = &(bond_info->rx_hashtbl[hash_index]);
> +
> + /* if the new slave computed by tlb checks doesn't match rlb, stop rlb from using it */
> + if (next_slave && (next_slave != rx_client_info->slave))
> + rx_client_info->slave = next_slave;
> + }
> + return next_slave;
> }
>
> -static inline void _unlock_rx_hashtbl(struct bonding *bond)
> +/* Caller must hold bond lock for read and hashtbl lock */
> +static struct slave *alb_get_best_slave(struct bonding *bond, u32 hash_index)
> {
> - spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
> + struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> + struct tlb_client_info *tx_hash_table = bond_info->tx_hashtbl;
> + struct slave *last_slave = tx_hash_table[hash_index].last_slave;
> + struct slave *next_slave = NULL;
> +
> + /* presume the next slave will be the least loaded one */
> + next_slave = tlb_get_least_loaded_slave(bond);
> +
> + if (last_slave && SLAVE_IS_OK(last_slave)) {
> + /* Use the last slave listed in the tx hashtbl if:
> + the last slave currently is essentially unloaded. */
> + if (SLAVE_TLB_INFO(last_slave).load < 10)
> + next_slave = last_slave;
> + }
> +
> + /* update the rlb hashtbl if there was a previous entry */
> + if (bond_info->rlb_enabled)
> + rlb_update_rx_table(bond, next_slave, hash_index);
> +
> + return next_slave;
> }
>
> /* when an ARP REPLY is received from a client update its info
> @@ -344,7 +358,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> struct rlb_client_info *client_info;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
> client_info = &(bond_info->rx_hashtbl[hash_index]);
> @@ -358,7 +372,7 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
> bond_info->rx_ntt = 1;
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev)
> @@ -402,38 +416,6 @@ out:
> return res;
> }
>
> -/* Caller must hold bond lock for read */
> -static struct slave *rlb_next_rx_slave(struct bonding *bond)
> -{
> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> - struct slave *rx_slave, *slave, *start_at;
> - int i = 0;
> -
> - if (bond_info->next_rx_slave) {
> - start_at = bond_info->next_rx_slave;
> - } else {
> - start_at = bond->first_slave;
> - }
> -
> - rx_slave = NULL;
> -
> - bond_for_each_slave_from(bond, slave, i, start_at) {
> - if (SLAVE_IS_OK(slave)) {
> - if (!rx_slave) {
> - rx_slave = slave;
> - } else if (slave->speed > rx_slave->speed) {
> - rx_slave = slave;
> - }
> - }
> - }
> -
> - if (rx_slave) {
> - bond_info->next_rx_slave = rx_slave->next;
> - }
> -
> - return rx_slave;
> -}
> -
> /* teach the switch the mac of a disabled slave
> * on the primary for fault tolerance
> *
> @@ -468,14 +450,14 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
> u32 index, next_index;
>
> /* clear slave from rx_hashtbl */
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> rx_hash_table = bond_info->rx_hashtbl;
> index = bond_info->rx_hashtbl_head;
> for (; index != RLB_NULL_INDEX; index = next_index) {
> next_index = rx_hash_table[index].next;
> if (rx_hash_table[index].slave == slave) {
> - struct slave *assigned_slave = rlb_next_rx_slave(bond);
> + struct slave *assigned_slave = alb_get_best_slave(bond, index);
>
> if (assigned_slave) {
> rx_hash_table[index].slave = assigned_slave;
> @@ -499,7 +481,7 @@ static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
> }
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> write_lock_bh(&bond->curr_slave_lock);
>
> @@ -558,7 +540,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
> struct rlb_client_info *client_info;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = bond_info->rx_hashtbl_head;
> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> @@ -576,7 +558,7 @@ static void rlb_update_rx_clients(struct bonding *bond)
> */
> bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* The slave was assigned a new mac address - update the clients */
> @@ -587,7 +569,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
> int ntt = 0;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = bond_info->rx_hashtbl_head;
> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> @@ -607,7 +589,7 @@ static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *sla
> bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* mark all clients using src_ip to be updated */
> @@ -617,7 +599,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> struct rlb_client_info *client_info;
> u32 hash_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = bond_info->rx_hashtbl_head;
> for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> @@ -643,7 +625,7 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
> }
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /* Caller must hold both bond and ptr locks for read */
> @@ -655,7 +637,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> struct rlb_client_info *client_info;
> u32 hash_index = 0;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_src));
> client_info = &(bond_info->rx_hashtbl[hash_index]);
> @@ -671,7 +653,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
>
> assigned_slave = client_info->slave;
> if (assigned_slave) {
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> return assigned_slave;
> }
> } else {
> @@ -687,7 +669,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> }
> }
> /* assign a new slave */
> - assigned_slave = rlb_next_rx_slave(bond);
> + assigned_slave = alb_get_best_slave(bond, hash_index);
>
> if (assigned_slave) {
> client_info->ip_src = arp->ip_src;
> @@ -723,7 +705,7 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
> }
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> return assigned_slave;
> }
> @@ -771,36 +753,6 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
> return tx_slave;
> }
>
> -/* Caller must hold bond lock for read */
> -static void rlb_rebalance(struct bonding *bond)
> -{
> - struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> - struct slave *assigned_slave;
> - struct rlb_client_info *client_info;
> - int ntt;
> - u32 hash_index;
> -
> - _lock_rx_hashtbl(bond);
> -
> - ntt = 0;
> - hash_index = bond_info->rx_hashtbl_head;
> - for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->next) {
> - client_info = &(bond_info->rx_hashtbl[hash_index]);
> - assigned_slave = rlb_next_rx_slave(bond);
> - if (assigned_slave && (client_info->slave != assigned_slave)) {
> - client_info->slave = assigned_slave;
> - client_info->ntt = 1;
> - ntt = 1;
> - }
> - }
> -
> - /* update the team's flag only after the whole iteration */
> - if (ntt) {
> - bond_info->rx_ntt = 1;
> - }
> - _unlock_rx_hashtbl(bond);
> -}
> -
> /* Caller must hold rx_hashtbl lock */
> static void rlb_init_table_entry(struct rlb_client_info *entry)
> {
> @@ -817,8 +769,6 @@ static int rlb_initialize(struct bonding *bond)
> int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
> int i;
>
> - spin_lock_init(&(bond_info->rx_hashtbl_lock));
> -
> new_hashtbl = kmalloc(size, GFP_KERNEL);
> if (!new_hashtbl) {
> printk(KERN_ERR DRV_NAME
> @@ -826,7 +776,7 @@ static int rlb_initialize(struct bonding *bond)
> bond->dev->name);
> return -1;
> }
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> bond_info->rx_hashtbl = new_hashtbl;
>
> @@ -836,7 +786,7 @@ static int rlb_initialize(struct bonding *bond)
> rlb_init_table_entry(bond_info->rx_hashtbl + i);
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
>
> /*initialize packet type*/
> pk_type->type = cpu_to_be16(ETH_P_ARP);
> @@ -855,13 +805,13 @@ static void rlb_deinitialize(struct bonding *bond)
>
> dev_remove_pack(&(bond_info->rlb_pkt_type));
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> kfree(bond_info->rx_hashtbl);
> bond_info->rx_hashtbl = NULL;
> bond_info->rx_hashtbl_head = RLB_NULL_INDEX;
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> @@ -869,7 +819,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> u32 curr_index;
>
> - _lock_rx_hashtbl(bond);
> + _lock_hashtbl(bond);
>
> curr_index = bond_info->rx_hashtbl_head;
> while (curr_index != RLB_NULL_INDEX) {
> @@ -894,7 +844,7 @@ static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
> curr_index = next_index;
> }
>
> - _unlock_rx_hashtbl(bond);
> + _unlock_hashtbl(bond);
> }
>
> /*********************** tlb/rlb shared functions *********************/
> @@ -1521,11 +1471,6 @@ void bond_alb_monitor(struct work_struct *work)
> read_lock(&bond->lock);
> }
>
> - if (bond_info->rlb_rebalance) {
> - bond_info->rlb_rebalance = 0;
> - rlb_rebalance(bond);
> - }
> -
> /* check if clients need updating */
> if (bond_info->rx_ntt) {
> if (bond_info->rlb_update_delay_counter) {
> diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
> index b65fd29..09d755a 100644
> --- a/drivers/net/bonding/bond_alb.h
> +++ b/drivers/net/bonding/bond_alb.h
> @@ -90,7 +90,7 @@ struct tlb_slave_info {
> struct alb_bond_info {
> struct timer_list alb_timer;
> struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
> - spinlock_t tx_hashtbl_lock;
> + spinlock_t hashtbl_lock; /* lock for both tables */
> u32 unbalanced_load;
> int tx_rebalance_counter;
> int lp_counter;
> @@ -98,7 +98,6 @@ struct alb_bond_info {
> int rlb_enabled;
> struct packet_type rlb_pkt_type;
> struct rlb_client_info *rx_hashtbl; /* Receive hash table */
> - spinlock_t rx_hashtbl_lock;
> u32 rx_hashtbl_head;
> u8 rx_ntt; /* flag - need to transmit
> * to all rx clients
Any thoughts on this, Jay?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists