[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6356.1412180365@famine>
Date: Wed, 01 Oct 2014 09:19:25 -0700
From: Jay Vosburgh <jay.vosburgh@...onical.com>
To: Mahesh Bandewar <maheshb@...gle.com>
cc: Veaceslav Falico <vfalico@...hat.com>,
Andy Gospodarek <andy@...yhouse.net>,
David Miller <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>,
Maciej Zenczykowski <maze@...gle.com>
Subject: Re: [PATCH net-next v6 2/2] bonding: Simplify the xmit function for modes that use xmit_hash
Mahesh Bandewar <maheshb@...gle.com> wrote:
>Earlier change to use usable slave array for TLB mode had an additional
>performance advantage. So extending the same logic to all other modes
>that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>Also consolidating this with the earlier TLB change.
>
>The main idea is to build the usable slaves array in the control path
>and use that array for slave selection during xmit operation.
>
>Measured performance in a setup with a bond of 4x1G NICs with 200
>instances of netperf for the modes involved (3ad, xor, tlb)
>cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>
>Mode TPS-Before TPS-After
>
>802.3ad : 468,694 493,101
>TLB (lb=0): 392,583 392,965
>XOR : 475,696 484,517
>
>Signed-off-by: Mahesh Bandewar <maheshb@...gle.com>
>---
>v1:
> (a) If bond_update_slave_arr() fails to allocate memory, it will overwrite
> the slave that need to be removed.
> (b) Freeing of array will assign NULL (to handle bond->down to bond->up
> transition gracefully.
> (c) Change from pr_debug() to pr_err() if bond_update_slave_arr() returns
> failure.
> (d) XOR: bond_update_slave_arr() will consider mii-mon, arp-mon cases and
> will populate the array even if these parameters are not used.
> (e) 3AD: Should handle the ad_agg_selection_logic correctly.
>v2:
> (a) Removed rcu_read_{un}lock() calls from array manipulation code.
> (b) Slave link-events now refresh array for all these modes.
> (c) Moved free-array call from bond_close() to bond_uninit().
>v3:
> (a) Fixed null pointer dereference.
> (b) Removed bond->lock lockdep dependency.
>v4:
> (a) Made to changes to comply with Nikolay's locking changes
> (b) Added a work-queue to refresh slave-array when RTNL is not held
> (c) Array refresh happens ONLY with RTNL now.
> (d) alloc changed from GFP_ATOMIC to GFP_KERNEL
>v5:
> (a) Consolidated all delayed slave-array updates at one place in
> 3ad_state_machine_handler()
>v6:
> (a) Free slave array when there is no active aggregator
>
> drivers/net/bonding/bond_3ad.c | 140 +++++++++++------------------
> drivers/net/bonding/bond_alb.c | 51 ++---------
> drivers/net/bonding/bond_alb.h | 8 --
> drivers/net/bonding/bond_main.c | 192 +++++++++++++++++++++++++++++++++++++---
> drivers/net/bonding/bonding.h | 10 +++
> 5 files changed, 249 insertions(+), 152 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 7e9e522fd476..2110215f3528 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
> /* ================= main 802.3ad protocol functions ================== */
> static int ad_lacpdu_send(struct port *port);
> static int ad_marker_send(struct port *port, struct bond_marker *marker);
>-static void ad_mux_machine(struct port *port);
>+static void ad_mux_machine(struct port *port, bool *update_slave_arr);
> static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
> static void ad_tx_machine(struct port *port);
> static void ad_periodic_machine(struct port *port);
>-static void ad_port_selection_logic(struct port *port);
>-static void ad_agg_selection_logic(struct aggregator *aggregator);
>+static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
>+static void ad_agg_selection_logic(struct aggregator *aggregator,
>+ bool *update_slave_arr);
> static void ad_clear_agg(struct aggregator *aggregator);
> static void ad_initialize_agg(struct aggregator *aggregator);
> static void ad_initialize_port(struct port *port, int lacp_fast);
>-static void ad_enable_collecting_distributing(struct port *port);
>-static void ad_disable_collecting_distributing(struct port *port);
>+static void ad_enable_collecting_distributing(struct port *port,
>+ bool *update_slave_arr);
>+static void ad_disable_collecting_distributing(struct port *port,
>+ bool *update_slave_arr);
> static void ad_marker_info_received(struct bond_marker *marker_info,
> struct port *port);
> static void ad_marker_response_received(struct bond_marker *marker,
>@@ -796,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
> /**
> * ad_mux_machine - handle a port's mux state machine
> * @port: the port we're looking at
>+ * @update_slave_arr: Does slave array need update?
> */
>-static void ad_mux_machine(struct port *port)
>+static void ad_mux_machine(struct port *port, bool *update_slave_arr)
> {
> mux_states_t last_state;
>
>@@ -901,7 +905,8 @@ static void ad_mux_machine(struct port *port)
> switch (port->sm_mux_state) {
> case AD_MUX_DETACHED:
> port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
>- ad_disable_collecting_distributing(port);
>+ ad_disable_collecting_distributing(port,
>+ update_slave_arr);
> port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
> port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
> port->ntt = true;
>@@ -913,13 +918,15 @@ static void ad_mux_machine(struct port *port)
> port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
> port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
> port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
>- ad_disable_collecting_distributing(port);
>+ ad_disable_collecting_distributing(port,
>+ update_slave_arr);
> port->ntt = true;
> break;
> case AD_MUX_COLLECTING_DISTRIBUTING:
> port->actor_oper_port_state |= AD_STATE_COLLECTING;
> port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
>- ad_enable_collecting_distributing(port);
>+ ad_enable_collecting_distributing(port,
>+ update_slave_arr);
> port->ntt = true;
> break;
> default:
>@@ -1187,12 +1194,13 @@ static void ad_periodic_machine(struct port *port)
> /**
> * ad_port_selection_logic - select aggregation groups
> * @port: the port we're looking at
>+ * @update_slave_arr: Does slave array need update?
> *
> * Select aggregation groups, and assign each port for it's aggregetor. The
> * selection logic is called in the inititalization (after all the handshkes),
> * and after every lacpdu receive (if selected is off).
> */
>-static void ad_port_selection_logic(struct port *port)
>+static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
Since this function is void, why not have it return a value
instead of the bool *update_slave_arr? That would eliminate the need
for some call sites to pass a "dummy" to the function. This comment
applies to ad_agg_selection_logic and ad_enable_collecting_distributing
as well.
-J
> {
> struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
> struct port *last_port = NULL, *curr_port;
>@@ -1347,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port)
> __agg_ports_are_ready(port->aggregator));
>
> aggregator = __get_first_agg(port);
>- ad_agg_selection_logic(aggregator);
>+ ad_agg_selection_logic(aggregator, update_slave_arr);
> }
>
> /* Decide if "agg" is a better choice for the new active aggregator that
>@@ -1435,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg)
> /**
> * ad_agg_selection_logic - select an aggregation group for a team
> * @aggregator: the aggregator we're looking at
>+ * @update_slave_arr: Does slave array need update?
> *
> * It is assumed that only one aggregator may be selected for a team.
> *
>@@ -1457,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg)
> * __get_active_agg() won't work correctly. This function should be better
> * called with the bond itself, and retrieve the first agg from it.
> */
>-static void ad_agg_selection_logic(struct aggregator *agg)
>+static void ad_agg_selection_logic(struct aggregator *agg,
>+ bool *update_slave_arr)
> {
> struct aggregator *best, *active, *origin;
> struct bonding *bond = agg->slave->bond;
>@@ -1550,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
> __disable_port(port);
> }
> }
>+ /* Slave array needs update. */
>+ *update_slave_arr = true;
> }
>
> /* if the selected aggregator is of join individuals
>@@ -1678,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
> /**
> * ad_enable_collecting_distributing - enable a port's transmit/receive
> * @port: the port we're looking at
>+ * @update_slave_arr: Does slave array need update?
> *
> * Enable @port if it's in an active aggregator
> */
>-static void ad_enable_collecting_distributing(struct port *port)
>+static void ad_enable_collecting_distributing(struct port *port,
>+ bool *update_slave_arr)
> {
> if (port->aggregator->is_active) {
> pr_debug("Enabling port %d(LAG %d)\n",
> port->actor_port_number,
> port->aggregator->aggregator_identifier);
> __enable_port(port);
>+ /* Slave array needs update */
>+ *update_slave_arr = true;
> }
> }
>
> /**
> * ad_disable_collecting_distributing - disable a port's transmit/receive
> * @port: the port we're looking at
>+ * @update_slave_arr: Does slave array need update?
> */
>-static void ad_disable_collecting_distributing(struct port *port)
>+static void ad_disable_collecting_distributing(struct port *port,
>+ bool *update_slave_arr)
> {
> if (port->aggregator &&
> !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
>@@ -1704,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port)
> port->actor_port_number,
> port->aggregator->aggregator_identifier);
> __disable_port(port);
>+ /* Slave array needs an update */
>+ *update_slave_arr = true;
> }
> }
>
>@@ -1868,6 +1888,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
> struct bonding *bond = slave->bond;
> struct slave *slave_iter;
> struct list_head *iter;
>+ bool dummy_slave_update; /* Ignore this value as caller updates array */
>
> /* Sync against bond_3ad_state_machine_handler() */
> spin_lock_bh(&bond->mode_lock);
>@@ -1951,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
> ad_clear_agg(aggregator);
>
> if (select_new_active_agg)
>- ad_agg_selection_logic(__get_first_agg(port));
>+ ad_agg_selection_logic(__get_first_agg(port),
>+ &dummy_slave_update);
> } else {
> netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n");
> }
>@@ -1966,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
> /* select new active aggregator */
> temp_aggregator = __get_first_agg(port);
> if (temp_aggregator)
>- ad_agg_selection_logic(temp_aggregator);
>+ ad_agg_selection_logic(temp_aggregator,
>+ &dummy_slave_update);
> }
> }
> }
>@@ -1996,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
> if (select_new_active_agg) {
> netdev_info(bond->dev, "Removing an active aggregator\n");
> /* select new active aggregator */
>- ad_agg_selection_logic(__get_first_agg(port));
>+ ad_agg_selection_logic(__get_first_agg(port),
>+ &dummy_slave_update);
> }
> }
> break;
>@@ -2031,6 +2055,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
> struct slave *slave;
> struct port *port;
> bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
>+ bool update_slave_arr = false;
>
> /* Lock to protect data accessed by all (e.g., port->sm_vars) and
> * against running with bond_3ad_unbind_slave. ad_rx_machine may run
>@@ -2058,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
> }
>
> aggregator = __get_first_agg(port);
>- ad_agg_selection_logic(aggregator);
>+ ad_agg_selection_logic(aggregator, &update_slave_arr);
> }
> bond_3ad_set_carrier(bond);
> }
>@@ -2074,8 +2099,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
>
> ad_rx_machine(NULL, port);
> ad_periodic_machine(port);
>- ad_port_selection_logic(port);
>- ad_mux_machine(port);
>+ ad_port_selection_logic(port, &update_slave_arr);
>+ ad_mux_machine(port, &update_slave_arr);
> ad_tx_machine(port);
>
> /* turn off the BEGIN bit, since we already handled it */
>@@ -2093,6 +2118,9 @@ re_arm:
> rcu_read_unlock();
> spin_unlock_bh(&bond->mode_lock);
>
>+ if (update_slave_arr)
>+ bond_slave_arr_work_rearm(bond, 0);
>+
> if (should_notify_rtnl && rtnl_trylock()) {
> bond_slave_state_notify(bond);
> rtnl_unlock();
>@@ -2283,6 +2311,11 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
> port->sm_vars |= AD_PORT_BEGIN;
>
> spin_unlock_bh(&slave->bond->mode_lock);
>+
>+ /* RTNL is held and mode_lock is released so it's safe
>+ * to update slave_array here.
>+ */
>+ bond_update_slave_arr(slave->bond, NULL);
> }
>
> /**
>@@ -2377,73 +2410,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
> return ret;
> }
>
>-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
>-{
>- struct bonding *bond = netdev_priv(dev);
>- struct slave *slave, *first_ok_slave;
>- struct aggregator *agg;
>- struct ad_info ad_info;
>- struct list_head *iter;
>- int slaves_in_agg;
>- int slave_agg_no;
>- int agg_id;
>-
>- if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>- netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
>- goto err_free;
>- }
>-
>- slaves_in_agg = ad_info.ports;
>- agg_id = ad_info.aggregator_id;
>-
>- if (slaves_in_agg == 0) {
>- netdev_dbg(dev, "active aggregator is empty\n");
>- goto err_free;
>- }
>-
>- slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
>- first_ok_slave = NULL;
>-
>- bond_for_each_slave_rcu(bond, slave, iter) {
>- agg = SLAVE_AD_INFO(slave)->port.aggregator;
>- if (!agg || agg->aggregator_identifier != agg_id)
>- continue;
>-
>- if (slave_agg_no >= 0) {
>- if (!first_ok_slave && bond_slave_can_tx(slave))
>- first_ok_slave = slave;
>- slave_agg_no--;
>- continue;
>- }
>-
>- if (bond_slave_can_tx(slave)) {
>- bond_dev_queue_xmit(bond, skb, slave->dev);
>- goto out;
>- }
>- }
>-
>- if (slave_agg_no >= 0) {
>- netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
>- agg_id);
>- goto err_free;
>- }
>-
>- /* we couldn't find any suitable slave after the agg_no, so use the
>- * first suitable found, if found.
>- */
>- if (first_ok_slave)
>- bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
>- else
>- goto err_free;
>-
>-out:
>- return NETDEV_TX_OK;
>-err_free:
>- /* no suitable interface, frame not sent */
>- dev_kfree_skb_any(skb);
>- goto out;
>-}
>-
> int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
> struct slave *slave)
> {
>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>index 615f3bebd019..d2eadab787c5 100644
>--- a/drivers/net/bonding/bond_alb.c
>+++ b/drivers/net/bonding/bond_alb.c
>@@ -177,7 +177,6 @@ static int tlb_initialize(struct bonding *bond)
> static void tlb_deinitialize(struct bonding *bond)
> {
> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>- struct tlb_up_slave *arr;
>
> spin_lock_bh(&bond->mode_lock);
>
>@@ -185,10 +184,6 @@ static void tlb_deinitialize(struct bonding *bond)
> bond_info->tx_hashtbl = NULL;
>
> spin_unlock_bh(&bond->mode_lock);
>-
>- arr = rtnl_dereference(bond_info->slave_arr);
>- if (arr)
>- kfree_rcu(arr, rcu);
> }
>
> static long long compute_gap(struct slave *slave)
>@@ -1336,39 +1331,9 @@ out:
> return NETDEV_TX_OK;
> }
>
>-static int bond_tlb_update_slave_arr(struct bonding *bond,
>- struct slave *skipslave)
>-{
>- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>- struct slave *tx_slave;
>- struct list_head *iter;
>- struct tlb_up_slave *new_arr, *old_arr;
>-
>- new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
>- GFP_ATOMIC);
>- if (!new_arr)
>- return -ENOMEM;
>-
>- bond_for_each_slave(bond, tx_slave, iter) {
>- if (!bond_slave_can_tx(tx_slave))
>- continue;
>- if (skipslave == tx_slave)
>- continue;
>- new_arr->arr[new_arr->count++] = tx_slave;
>- }
>-
>- old_arr = rtnl_dereference(bond_info->slave_arr);
>- rcu_assign_pointer(bond_info->slave_arr, new_arr);
>- if (old_arr)
>- kfree_rcu(old_arr, rcu);
>-
>- return 0;
>-}
>-
> int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> {
> struct bonding *bond = netdev_priv(bond_dev);
>- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
> struct ethhdr *eth_data;
> struct slave *tx_slave = NULL;
> u32 hash_index;
>@@ -1389,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
> hash_index & 0xFF,
> skb->len);
> } else {
>- struct tlb_up_slave *slaves;
>+ struct bond_up_slave *slaves;
>+ unsigned int count;
>
>- slaves = rcu_dereference(bond_info->slave_arr);
>- if (slaves && slaves->count)
>+ slaves = rcu_dereference(bond->slave_arr);
>+ count = slaves ? ACCESS_ONCE(slaves->count) : 0;
>+ if (likely(count))
> tx_slave = slaves->arr[hash_index %
>- slaves->count];
>+ count];
> }
> break;
> }
>@@ -1641,10 +1608,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
> rlb_clear_slave(bond, slave);
> }
>
>- if (bond_is_nondyn_tlb(bond))
>- if (bond_tlb_update_slave_arr(bond, slave))
>- pr_err("Failed to build slave-array for TLB mode.\n");
>-
> }
>
> void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
>@@ -1669,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
> }
>
> if (bond_is_nondyn_tlb(bond)) {
>- if (bond_tlb_update_slave_arr(bond, NULL))
>+ if (bond_update_slave_arr(bond, NULL))
> pr_err("Failed to build slave-array for TLB mode.\n");
> }
> }
>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>index 3c6a7ff974d7..1ad473b4ade5 100644
>--- a/drivers/net/bonding/bond_alb.h
>+++ b/drivers/net/bonding/bond_alb.h
>@@ -139,19 +139,11 @@ struct tlb_slave_info {
> */
> };
>
>-struct tlb_up_slave {
>- unsigned int count;
>- struct rcu_head rcu;
>- struct slave *arr[0];
>-};
>-
> struct alb_bond_info {
> struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
> u32 unbalanced_load;
> int tx_rebalance_counter;
> int lp_counter;
>- /* -------- non-dynamic tlb mode only ---------*/
>- struct tlb_up_slave __rcu *slave_arr; /* Up slaves */
> /* -------- rlb parameters -------- */
> int rlb_enabled;
> struct rlb_client_info *rx_hashtbl; /* Receive hash table */
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index c2adc2755ff6..6f79f495b01a 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -210,6 +210,7 @@ static int bond_init(struct net_device *bond_dev);
> static void bond_uninit(struct net_device *bond_dev);
> static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
> struct rtnl_link_stats64 *stats);
>+static void bond_slave_arr_handler(struct work_struct *work);
>
> /*---------------------------- General routines -----------------------------*/
>
>@@ -1551,6 +1552,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
> unblock_netpoll_tx();
> }
>
>+ if (bond_mode_uses_xmit_hash(bond))
>+ bond_update_slave_arr(bond, NULL);
>+
> netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
> slave_dev->name,
> bond_is_active_slave(new_slave) ? "an active" : "a backup",
>@@ -1668,6 +1672,9 @@ static int __bond_release_one(struct net_device *bond_dev,
> if (BOND_MODE(bond) == BOND_MODE_8023AD)
> bond_3ad_unbind_slave(slave);
>
>+ if (bond_mode_uses_xmit_hash(bond))
>+ bond_update_slave_arr(bond, slave);
>+
> netdev_info(bond_dev, "Releasing %s interface %s\n",
> bond_is_active_slave(slave) ? "active" : "backup",
> slave_dev->name);
>@@ -1970,6 +1977,9 @@ static void bond_miimon_commit(struct bonding *bond)
> bond_alb_handle_link_change(bond, slave,
> BOND_LINK_UP);
>
>+ if (BOND_MODE(bond) == BOND_MODE_XOR)
>+ bond_update_slave_arr(bond, NULL);
>+
> if (!bond->curr_active_slave || slave == primary)
> goto do_failover;
>
>@@ -1997,6 +2007,9 @@ static void bond_miimon_commit(struct bonding *bond)
> bond_alb_handle_link_change(bond, slave,
> BOND_LINK_DOWN);
>
>+ if (BOND_MODE(bond) == BOND_MODE_XOR)
>+ bond_update_slave_arr(bond, NULL);
>+
> if (slave == rcu_access_pointer(bond->curr_active_slave))
> goto do_failover;
>
>@@ -2453,6 +2466,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
>
> if (slave_state_changed) {
> bond_slave_state_change(bond);
>+ if (BOND_MODE(bond) == BOND_MODE_XOR)
>+ bond_update_slave_arr(bond, NULL);
> } else if (do_failover) {
> block_netpoll_tx();
> bond_select_active_slave(bond);
>@@ -2829,8 +2844,20 @@ static int bond_slave_netdev_event(unsigned long event,
> if (old_duplex != slave->duplex)
> bond_3ad_adapter_duplex_changed(slave);
> }
>+ /* Refresh slave-array if applicable!
>+ * If the setup does not use miimon or arpmon (mode-specific!),
>+ * then these events will not cause the slave-array to be
>+ * refreshed. This will cause xmit to use a slave that is not
>+ * usable. Avoid such situation by refeshing the array at these
>+ * events. If these (miimon/arpmon) parameters are configured
>+ * then array gets refreshed twice and that should be fine!
>+ */
>+ if (bond_mode_uses_xmit_hash(bond))
>+ bond_update_slave_arr(bond, NULL);
> break;
> case NETDEV_DOWN:
>+ if (bond_mode_uses_xmit_hash(bond))
>+ bond_update_slave_arr(bond, NULL);
> break;
> case NETDEV_CHANGEMTU:
> /* TODO: Should slaves be allowed to
>@@ -3010,6 +3037,7 @@ static void bond_work_init_all(struct bonding *bond)
> else
> INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
> INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
>+ INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
> }
>
> static void bond_work_cancel_all(struct bonding *bond)
>@@ -3019,6 +3047,7 @@ static void bond_work_cancel_all(struct bonding *bond)
> cancel_delayed_work_sync(&bond->alb_work);
> cancel_delayed_work_sync(&bond->ad_work);
> cancel_delayed_work_sync(&bond->mcast_work);
>+ cancel_delayed_work_sync(&bond->slave_arr_work);
> }
>
> static int bond_open(struct net_device *bond_dev)
>@@ -3068,6 +3097,9 @@ static int bond_open(struct net_device *bond_dev)
> bond_3ad_initiate_agg_selection(bond, 1);
> }
>
>+ if (bond_mode_uses_xmit_hash(bond))
>+ bond_update_slave_arr(bond, NULL);
>+
> return 0;
> }
>
>@@ -3573,20 +3605,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
> return NETDEV_TX_OK;
> }
>
>-/* In bond_xmit_xor() , we determine the output device by using a pre-
>- * determined xmit_hash_policy(), If the selected device is not enabled,
>- * find the next active slave.
>+/* Use this to update slave_array when (a) it's not appropriate to update
>+ * slave_array right away (note that update_slave_array() may sleep)
>+ * and / or (b) RTNL is not held.
> */
>-static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
>+void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
> {
>- struct bonding *bond = netdev_priv(bond_dev);
>- int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
>+ queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
>+}
>
>- if (likely(slave_cnt))
>- bond_xmit_slave_id(bond, skb,
>- bond_xmit_hash(bond, skb) % slave_cnt);
>- else
>+/* Slave array work handler. Holds only RTNL */
>+static void bond_slave_arr_handler(struct work_struct *work)
>+{
>+ struct bonding *bond = container_of(work, struct bonding,
>+ slave_arr_work.work);
>+ int ret;
>+
>+ if (!rtnl_trylock())
>+ goto err;
>+
>+ ret = bond_update_slave_arr(bond, NULL);
>+ rtnl_unlock();
>+ if (ret) {
>+ pr_warn_ratelimited("Failed to update slave array from WT\n");
>+ goto err;
>+ }
>+ return;
>+
>+err:
>+ bond_slave_arr_work_rearm(bond, 1);
>+}
>+
>+/* Build the usable slaves array in control path for modes that use xmit-hash
>+ * to determine the slave interface -
>+ * (a) BOND_MODE_8023AD
>+ * (b) BOND_MODE_XOR
>+ * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
>+ *
>+ * The caller is expected to hold RTNL only and NO other lock!
>+ */
>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
>+{
>+ struct slave *slave;
>+ struct list_head *iter;
>+ struct bond_up_slave *new_arr, *old_arr;
>+ int slaves_in_agg;
>+ int agg_id = 0;
>+ int ret = 0;
>+
>+#ifdef CONFIG_LOCKDEP
>+ WARN_ON(lockdep_is_held(&bond->mode_lock));
>+#endif
>+
>+ new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
>+ GFP_KERNEL);
>+ if (!new_arr) {
>+ ret = -ENOMEM;
>+ pr_err("Failed to build slave-array.\n");
>+ goto out;
>+ }
>+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>+ struct ad_info ad_info;
>+
>+ if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
>+ pr_debug("bond_3ad_get_active_agg_info failed\n");
>+ kfree_rcu(new_arr, rcu);
>+ /* No active aggragator means its not safe to use
>+ * the previous array.
>+ */
>+ old_arr = rtnl_dereference(bond->slave_arr);
>+ if (old_arr) {
>+ RCU_INIT_POINTER(bond->slave_arr, NULL);
>+ kfree_rcu(old_arr, rcu);
>+ }
>+ goto out;
>+ }
>+ slaves_in_agg = ad_info.ports;
>+ agg_id = ad_info.aggregator_id;
>+ }
>+ bond_for_each_slave(bond, slave, iter) {
>+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>+ struct aggregator *agg;
>+
>+ agg = SLAVE_AD_INFO(slave)->port.aggregator;
>+ if (!agg || agg->aggregator_identifier != agg_id)
>+ continue;
>+ }
>+ if (!bond_slave_can_tx(slave))
>+ continue;
>+ if (skipslave == slave)
>+ continue;
>+ new_arr->arr[new_arr->count++] = slave;
>+ }
>+
>+ old_arr = rtnl_dereference(bond->slave_arr);
>+ rcu_assign_pointer(bond->slave_arr, new_arr);
>+ if (old_arr)
>+ kfree_rcu(old_arr, rcu);
>+out:
>+ if (ret != 0 && skipslave) {
>+ int idx;
>+
>+ /* Rare situation where caller has asked to skip a specific
>+ * slave but allocation failed (most likely!). BTW this is
>+ * only possible when the call is initiated from
>+ * __bond_release_one(). In this situation; overwrite the
>+ * skipslave entry in the array with the last entry from the
>+ * array to avoid a situation where the xmit path may choose
>+ * this to-be-skipped slave to send a packet out.
>+ */
>+ old_arr = rtnl_dereference(bond->slave_arr);
>+ for (idx = 0; idx < old_arr->count; idx++) {
>+ if (skipslave == old_arr->arr[idx]) {
>+ old_arr->arr[idx] =
>+ old_arr->arr[old_arr->count-1];
>+ old_arr->count--;
>+ break;
>+ }
>+ }
>+ }
>+ return ret;
>+}
>+
>+/* Use this Xmit function for 3AD as well as XOR modes. The current
>+ * usable slave array is formed in the control path. The xmit function
>+ * just calculates hash and sends the packet out.
>+ */
>+int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
>+{
>+ struct bonding *bond = netdev_priv(dev);
>+ struct slave *slave;
>+ struct bond_up_slave *slaves;
>+ unsigned int count;
>+
>+ slaves = rcu_dereference(bond->slave_arr);
>+ count = slaves ? ACCESS_ONCE(slaves->count) : 0;
>+ if (likely(count)) {
>+ slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
>+ bond_dev_queue_xmit(bond, skb, slave->dev);
>+ } else {
> dev_kfree_skb_any(skb);
>+ atomic_long_inc(&dev->tx_dropped);
>+ }
>
> return NETDEV_TX_OK;
> }
>@@ -3682,12 +3842,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
> return bond_xmit_roundrobin(skb, dev);
> case BOND_MODE_ACTIVEBACKUP:
> return bond_xmit_activebackup(skb, dev);
>+ case BOND_MODE_8023AD:
> case BOND_MODE_XOR:
>- return bond_xmit_xor(skb, dev);
>+ return bond_3ad_xor_xmit(skb, dev);
> case BOND_MODE_BROADCAST:
> return bond_xmit_broadcast(skb, dev);
>- case BOND_MODE_8023AD:
>- return bond_3ad_xmit_xor(skb, dev);
> case BOND_MODE_ALB:
> return bond_alb_xmit(skb, dev);
> case BOND_MODE_TLB:
>@@ -3861,6 +4020,7 @@ static void bond_uninit(struct net_device *bond_dev)
> struct bonding *bond = netdev_priv(bond_dev);
> struct list_head *iter;
> struct slave *slave;
>+ struct bond_up_slave *arr;
>
> bond_netpoll_cleanup(bond_dev);
>
>@@ -3869,6 +4029,12 @@ static void bond_uninit(struct net_device *bond_dev)
> __bond_release_one(bond_dev, slave->dev, true);
> netdev_info(bond_dev, "Released all slaves\n");
>
>+ arr = rtnl_dereference(bond->slave_arr);
>+ if (arr) {
>+ kfree_rcu(arr, rcu);
>+ RCU_INIT_POINTER(bond->slave_arr, NULL);
>+ }
>+
> list_del(&bond->bond_list);
>
> bond_debug_unregister(bond);
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 5b022da9cad2..10920f0686e2 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -179,6 +179,12 @@ struct slave {
> struct rtnl_link_stats64 slave_stats;
> };
>
>+struct bond_up_slave {
>+ unsigned int count;
>+ struct rcu_head rcu;
>+ struct slave *arr[0];
>+};
>+
> /*
> * Link pseudo-state only used internally by monitors
> */
>@@ -193,6 +199,7 @@ struct bonding {
> struct slave __rcu *curr_active_slave;
> struct slave __rcu *current_arp_slave;
> struct slave __rcu *primary_slave;
>+ struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
> bool force_primary;
> s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
> int (*recv_probe)(const struct sk_buff *, struct bonding *,
>@@ -222,6 +229,7 @@ struct bonding {
> struct delayed_work alb_work;
> struct delayed_work ad_work;
> struct delayed_work mcast_work;
>+ struct delayed_work slave_arr_work;
> #ifdef CONFIG_DEBUG_FS
> /* debugging support via debugfs */
> struct dentry *debug_dir;
>@@ -534,6 +542,8 @@ const char *bond_slave_link_status(s8 link);
> struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
> struct net_device *end_dev,
> int level);
>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>+void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
>
> #ifdef CONFIG_PROC_FS
> void bond_create_proc_entry(struct bonding *bond);
>--
>2.1.0.rc2.206.gedb03e5
---
-Jay Vosburgh, jay.vosburgh@...onical.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists