[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAF2d9jioDfj+qmvrQBQcaHnvXbL+Fnsh3-jxwn2KU+zvV=xgJQ@mail.gmail.com>
Date: Thu, 2 Oct 2014 08:26:20 +0530
From: Mahesh Bandewar <maheshb@...gle.com>
To: Jay Vosburgh <jay.vosburgh@...onical.com>
Cc: Veaceslav Falico <vfalico@...hat.com>,
Andy Gospodarek <andy@...yhouse.net>,
David Miller <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>,
Maciej Zenczykowski <maze@...gle.com>
Subject: Re: [PATCH net-next v6 2/2] bonding: Simplify the xmit function for
modes that use xmit_hash
On Wed, Oct 1, 2014 at 9:49 PM, Jay Vosburgh <jay.vosburgh@...onical.com> wrote:
> Mahesh Bandewar <maheshb@...gle.com> wrote:
>
>>Earlier change to use usable slave array for TLB mode had an additional
>>performance advantage. So extending the same logic to all other modes
>>that use xmit-hash for slave selection (viz 802.3AD, and XOR modes).
>>Also consolidating this with the earlier TLB change.
>>
>>The main idea is to build the usable slaves array in the control path
>>and use that array for slave selection during xmit operation.
>>
>>Measured performance in a setup with a bond of 4x1G NICs with 200
>>instances of netperf for the modes involved (3ad, xor, tlb)
>>cmd: netperf -t TCP_RR -H <TargetHost> -l 60 -s 5
>>
>>Mode TPS-Before TPS-After
>>
>>802.3ad : 468,694 493,101
>>TLB (lb=0): 392,583 392,965
>>XOR : 475,696 484,517
>>
>>Signed-off-by: Mahesh Bandewar <maheshb@...gle.com>
>>---
>>v1:
>> (a) If bond_update_slave_arr() fails to allocate memory, it will overwrite
>> the slave that need to be removed.
>> (b) Freeing of array will assign NULL (to handle bond->down to bond->up
>> transition gracefully.
>> (c) Change from pr_debug() to pr_err() if bond_update_slave_arr() returns
>> failure.
>> (d) XOR: bond_update_slave_arr() will consider mii-mon, arp-mon cases and
>> will populate the array even if these parameters are not used.
>> (e) 3AD: Should handle the ad_agg_selection_logic correctly.
>>v2:
>> (a) Removed rcu_read_{un}lock() calls from array manipulation code.
>> (b) Slave link-events now refresh array for all these modes.
>> (c) Moved free-array call from bond_close() to bond_uninit().
>>v3:
>> (a) Fixed null pointer dereference.
>> (b) Removed bond->lock lockdep dependency.
>>v4:
>> (a) Made to changes to comply with Nikolay's locking changes
>> (b) Added a work-queue to refresh slave-array when RTNL is not held
>> (c) Array refresh happens ONLY with RTNL now.
>> (d) alloc changed from GFP_ATOMIC to GFP_KERNEL
>>v5:
>> (a) Consolidated all delayed slave-array updates at one place in
>> 3ad_state_machine_handler()
>>v6:
>> (a) Free slave array when there is no active aggregator
>>
>> drivers/net/bonding/bond_3ad.c | 140 +++++++++++------------------
>> drivers/net/bonding/bond_alb.c | 51 ++---------
>> drivers/net/bonding/bond_alb.h | 8 --
>> drivers/net/bonding/bond_main.c | 192 +++++++++++++++++++++++++++++++++++++---
>> drivers/net/bonding/bonding.h | 10 +++
>> 5 files changed, 249 insertions(+), 152 deletions(-)
>>
>>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>>index 7e9e522fd476..2110215f3528 100644
>>--- a/drivers/net/bonding/bond_3ad.c
>>+++ b/drivers/net/bonding/bond_3ad.c
>>@@ -102,17 +102,20 @@ static const u8 lacpdu_mcast_addr[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
>> /* ================= main 802.3ad protocol functions ================== */
>> static int ad_lacpdu_send(struct port *port);
>> static int ad_marker_send(struct port *port, struct bond_marker *marker);
>>-static void ad_mux_machine(struct port *port);
>>+static void ad_mux_machine(struct port *port, bool *update_slave_arr);
>> static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port);
>> static void ad_tx_machine(struct port *port);
>> static void ad_periodic_machine(struct port *port);
>>-static void ad_port_selection_logic(struct port *port);
>>-static void ad_agg_selection_logic(struct aggregator *aggregator);
>>+static void ad_port_selection_logic(struct port *port, bool *update_slave_arr);
>>+static void ad_agg_selection_logic(struct aggregator *aggregator,
>>+ bool *update_slave_arr);
>> static void ad_clear_agg(struct aggregator *aggregator);
>> static void ad_initialize_agg(struct aggregator *aggregator);
>> static void ad_initialize_port(struct port *port, int lacp_fast);
>>-static void ad_enable_collecting_distributing(struct port *port);
>>-static void ad_disable_collecting_distributing(struct port *port);
>>+static void ad_enable_collecting_distributing(struct port *port,
>>+ bool *update_slave_arr);
>>+static void ad_disable_collecting_distributing(struct port *port,
>>+ bool *update_slave_arr);
>> static void ad_marker_info_received(struct bond_marker *marker_info,
>> struct port *port);
>> static void ad_marker_response_received(struct bond_marker *marker,
>>@@ -796,8 +799,9 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker)
>> /**
>> * ad_mux_machine - handle a port's mux state machine
>> * @port: the port we're looking at
>>+ * @update_slave_arr: Does slave array need update?
>> */
>>-static void ad_mux_machine(struct port *port)
>>+static void ad_mux_machine(struct port *port, bool *update_slave_arr)
>> {
>> mux_states_t last_state;
>>
>>@@ -901,7 +905,8 @@ static void ad_mux_machine(struct port *port)
>> switch (port->sm_mux_state) {
>> case AD_MUX_DETACHED:
>> port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
>>- ad_disable_collecting_distributing(port);
>>+ ad_disable_collecting_distributing(port,
>>+ update_slave_arr);
>> port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
>> port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
>> port->ntt = true;
>>@@ -913,13 +918,15 @@ static void ad_mux_machine(struct port *port)
>> port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
>> port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
>> port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
>>- ad_disable_collecting_distributing(port);
>>+ ad_disable_collecting_distributing(port,
>>+ update_slave_arr);
>> port->ntt = true;
>> break;
>> case AD_MUX_COLLECTING_DISTRIBUTING:
>> port->actor_oper_port_state |= AD_STATE_COLLECTING;
>> port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
>>- ad_enable_collecting_distributing(port);
>>+ ad_enable_collecting_distributing(port,
>>+ update_slave_arr);
>> port->ntt = true;
>> break;
>> default:
>>@@ -1187,12 +1194,13 @@ static void ad_periodic_machine(struct port *port)
>> /**
>> * ad_port_selection_logic - select aggregation groups
>> * @port: the port we're looking at
>>+ * @update_slave_arr: Does slave array need update?
>> *
>> * Select aggregation groups, and assign each port for it's aggregetor. The
>> * selection logic is called in the inititalization (after all the handshkes),
>> * and after every lacpdu receive (if selected is off).
>> */
>>-static void ad_port_selection_logic(struct port *port)
>>+static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
>
> Since this function is void, why not have it return a value
> instead of the bool *update_slave_arr? That would eliminate the need
> for some call sites to pass a "dummy" to the function. This comment
> applies to ad_agg_selection_logic and ad_enable_collecting_distributing
> as well.
>
Yes, I had similar discussion with Nik earlier and overloading the
return value did not feel clean and future-proof and hence decided to
take this approach.
> -J
>
>> {
>> struct aggregator *aggregator, *free_aggregator = NULL, *temp_aggregator;
>> struct port *last_port = NULL, *curr_port;
>>@@ -1347,7 +1355,7 @@ static void ad_port_selection_logic(struct port *port)
>> __agg_ports_are_ready(port->aggregator));
>>
>> aggregator = __get_first_agg(port);
>>- ad_agg_selection_logic(aggregator);
>>+ ad_agg_selection_logic(aggregator, update_slave_arr);
>> }
>>
>> /* Decide if "agg" is a better choice for the new active aggregator that
>>@@ -1435,6 +1443,7 @@ static int agg_device_up(const struct aggregator *agg)
>> /**
>> * ad_agg_selection_logic - select an aggregation group for a team
>> * @aggregator: the aggregator we're looking at
>>+ * @update_slave_arr: Does slave array need update?
>> *
>> * It is assumed that only one aggregator may be selected for a team.
>> *
>>@@ -1457,7 +1466,8 @@ static int agg_device_up(const struct aggregator *agg)
>> * __get_active_agg() won't work correctly. This function should be better
>> * called with the bond itself, and retrieve the first agg from it.
>> */
>>-static void ad_agg_selection_logic(struct aggregator *agg)
>>+static void ad_agg_selection_logic(struct aggregator *agg,
>>+ bool *update_slave_arr)
>> {
>> struct aggregator *best, *active, *origin;
>> struct bonding *bond = agg->slave->bond;
>>@@ -1550,6 +1560,8 @@ static void ad_agg_selection_logic(struct aggregator *agg)
>> __disable_port(port);
>> }
>> }
>>+ /* Slave array needs update. */
>>+ *update_slave_arr = true;
>> }
>>
>> /* if the selected aggregator is of join individuals
>>@@ -1678,24 +1690,30 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
>> /**
>> * ad_enable_collecting_distributing - enable a port's transmit/receive
>> * @port: the port we're looking at
>>+ * @update_slave_arr: Does slave array need update?
>> *
>> * Enable @port if it's in an active aggregator
>> */
>>-static void ad_enable_collecting_distributing(struct port *port)
>>+static void ad_enable_collecting_distributing(struct port *port,
>>+ bool *update_slave_arr)
>> {
>> if (port->aggregator->is_active) {
>> pr_debug("Enabling port %d(LAG %d)\n",
>> port->actor_port_number,
>> port->aggregator->aggregator_identifier);
>> __enable_port(port);
>>+ /* Slave array needs update */
>>+ *update_slave_arr = true;
>> }
>> }
>>
>> /**
>> * ad_disable_collecting_distributing - disable a port's transmit/receive
>> * @port: the port we're looking at
>>+ * @update_slave_arr: Does slave array need update?
>> */
>>-static void ad_disable_collecting_distributing(struct port *port)
>>+static void ad_disable_collecting_distributing(struct port *port,
>>+ bool *update_slave_arr)
>> {
>> if (port->aggregator &&
>> !MAC_ADDRESS_EQUAL(&(port->aggregator->partner_system),
>>@@ -1704,6 +1722,8 @@ static void ad_disable_collecting_distributing(struct port *port)
>> port->actor_port_number,
>> port->aggregator->aggregator_identifier);
>> __disable_port(port);
>>+ /* Slave array needs an update */
>>+ *update_slave_arr = true;
>> }
>> }
>>
>>@@ -1868,6 +1888,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
>> struct bonding *bond = slave->bond;
>> struct slave *slave_iter;
>> struct list_head *iter;
>>+ bool dummy_slave_update; /* Ignore this value as caller updates array */
>>
>> /* Sync against bond_3ad_state_machine_handler() */
>> spin_lock_bh(&bond->mode_lock);
>>@@ -1951,7 +1972,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
>> ad_clear_agg(aggregator);
>>
>> if (select_new_active_agg)
>>- ad_agg_selection_logic(__get_first_agg(port));
>>+ ad_agg_selection_logic(__get_first_agg(port),
>>+ &dummy_slave_update);
>> } else {
>> netdev_warn(bond->dev, "unbinding aggregator, and could not find a new aggregator for its ports\n");
>> }
>>@@ -1966,7 +1988,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
>> /* select new active aggregator */
>> temp_aggregator = __get_first_agg(port);
>> if (temp_aggregator)
>>- ad_agg_selection_logic(temp_aggregator);
>>+ ad_agg_selection_logic(temp_aggregator,
>>+ &dummy_slave_update);
>> }
>> }
>> }
>>@@ -1996,7 +2019,8 @@ void bond_3ad_unbind_slave(struct slave *slave)
>> if (select_new_active_agg) {
>> netdev_info(bond->dev, "Removing an active aggregator\n");
>> /* select new active aggregator */
>>- ad_agg_selection_logic(__get_first_agg(port));
>>+ ad_agg_selection_logic(__get_first_agg(port),
>>+ &dummy_slave_update);
>> }
>> }
>> break;
>>@@ -2031,6 +2055,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
>> struct slave *slave;
>> struct port *port;
>> bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
>>+ bool update_slave_arr = false;
>>
>> /* Lock to protect data accessed by all (e.g., port->sm_vars) and
>> * against running with bond_3ad_unbind_slave. ad_rx_machine may run
>>@@ -2058,7 +2083,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
>> }
>>
>> aggregator = __get_first_agg(port);
>>- ad_agg_selection_logic(aggregator);
>>+ ad_agg_selection_logic(aggregator, &update_slave_arr);
>> }
>> bond_3ad_set_carrier(bond);
>> }
>>@@ -2074,8 +2099,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
>>
>> ad_rx_machine(NULL, port);
>> ad_periodic_machine(port);
>>- ad_port_selection_logic(port);
>>- ad_mux_machine(port);
>>+ ad_port_selection_logic(port, &update_slave_arr);
>>+ ad_mux_machine(port, &update_slave_arr);
>> ad_tx_machine(port);
>>
>> /* turn off the BEGIN bit, since we already handled it */
>>@@ -2093,6 +2118,9 @@ re_arm:
>> rcu_read_unlock();
>> spin_unlock_bh(&bond->mode_lock);
>>
>>+ if (update_slave_arr)
>>+ bond_slave_arr_work_rearm(bond, 0);
>>+
>> if (should_notify_rtnl && rtnl_trylock()) {
>> bond_slave_state_notify(bond);
>> rtnl_unlock();
>>@@ -2283,6 +2311,11 @@ void bond_3ad_handle_link_change(struct slave *slave, char link)
>> port->sm_vars |= AD_PORT_BEGIN;
>>
>> spin_unlock_bh(&slave->bond->mode_lock);
>>+
>>+ /* RTNL is held and mode_lock is released so it's safe
>>+ * to update slave_array here.
>>+ */
>>+ bond_update_slave_arr(slave->bond, NULL);
>> }
>>
>> /**
>>@@ -2377,73 +2410,6 @@ int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info)
>> return ret;
>> }
>>
>>-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
>>-{
>>- struct bonding *bond = netdev_priv(dev);
>>- struct slave *slave, *first_ok_slave;
>>- struct aggregator *agg;
>>- struct ad_info ad_info;
>>- struct list_head *iter;
>>- int slaves_in_agg;
>>- int slave_agg_no;
>>- int agg_id;
>>-
>>- if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
>>- netdev_dbg(dev, "__bond_3ad_get_active_agg_info failed\n");
>>- goto err_free;
>>- }
>>-
>>- slaves_in_agg = ad_info.ports;
>>- agg_id = ad_info.aggregator_id;
>>-
>>- if (slaves_in_agg == 0) {
>>- netdev_dbg(dev, "active aggregator is empty\n");
>>- goto err_free;
>>- }
>>-
>>- slave_agg_no = bond_xmit_hash(bond, skb) % slaves_in_agg;
>>- first_ok_slave = NULL;
>>-
>>- bond_for_each_slave_rcu(bond, slave, iter) {
>>- agg = SLAVE_AD_INFO(slave)->port.aggregator;
>>- if (!agg || agg->aggregator_identifier != agg_id)
>>- continue;
>>-
>>- if (slave_agg_no >= 0) {
>>- if (!first_ok_slave && bond_slave_can_tx(slave))
>>- first_ok_slave = slave;
>>- slave_agg_no--;
>>- continue;
>>- }
>>-
>>- if (bond_slave_can_tx(slave)) {
>>- bond_dev_queue_xmit(bond, skb, slave->dev);
>>- goto out;
>>- }
>>- }
>>-
>>- if (slave_agg_no >= 0) {
>>- netdev_err(dev, "Couldn't find a slave to tx on for aggregator ID %d\n",
>>- agg_id);
>>- goto err_free;
>>- }
>>-
>>- /* we couldn't find any suitable slave after the agg_no, so use the
>>- * first suitable found, if found.
>>- */
>>- if (first_ok_slave)
>>- bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
>>- else
>>- goto err_free;
>>-
>>-out:
>>- return NETDEV_TX_OK;
>>-err_free:
>>- /* no suitable interface, frame not sent */
>>- dev_kfree_skb_any(skb);
>>- goto out;
>>-}
>>-
>> int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
>> struct slave *slave)
>> {
>>diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>>index 615f3bebd019..d2eadab787c5 100644
>>--- a/drivers/net/bonding/bond_alb.c
>>+++ b/drivers/net/bonding/bond_alb.c
>>@@ -177,7 +177,6 @@ static int tlb_initialize(struct bonding *bond)
>> static void tlb_deinitialize(struct bonding *bond)
>> {
>> struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>>- struct tlb_up_slave *arr;
>>
>> spin_lock_bh(&bond->mode_lock);
>>
>>@@ -185,10 +184,6 @@ static void tlb_deinitialize(struct bonding *bond)
>> bond_info->tx_hashtbl = NULL;
>>
>> spin_unlock_bh(&bond->mode_lock);
>>-
>>- arr = rtnl_dereference(bond_info->slave_arr);
>>- if (arr)
>>- kfree_rcu(arr, rcu);
>> }
>>
>> static long long compute_gap(struct slave *slave)
>>@@ -1336,39 +1331,9 @@ out:
>> return NETDEV_TX_OK;
>> }
>>
>>-static int bond_tlb_update_slave_arr(struct bonding *bond,
>>- struct slave *skipslave)
>>-{
>>- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>>- struct slave *tx_slave;
>>- struct list_head *iter;
>>- struct tlb_up_slave *new_arr, *old_arr;
>>-
>>- new_arr = kzalloc(offsetof(struct tlb_up_slave, arr[bond->slave_cnt]),
>>- GFP_ATOMIC);
>>- if (!new_arr)
>>- return -ENOMEM;
>>-
>>- bond_for_each_slave(bond, tx_slave, iter) {
>>- if (!bond_slave_can_tx(tx_slave))
>>- continue;
>>- if (skipslave == tx_slave)
>>- continue;
>>- new_arr->arr[new_arr->count++] = tx_slave;
>>- }
>>-
>>- old_arr = rtnl_dereference(bond_info->slave_arr);
>>- rcu_assign_pointer(bond_info->slave_arr, new_arr);
>>- if (old_arr)
>>- kfree_rcu(old_arr, rcu);
>>-
>>- return 0;
>>-}
>>-
>> int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
>> {
>> struct bonding *bond = netdev_priv(bond_dev);
>>- struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
>> struct ethhdr *eth_data;
>> struct slave *tx_slave = NULL;
>> u32 hash_index;
>>@@ -1389,12 +1354,14 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
>> hash_index & 0xFF,
>> skb->len);
>> } else {
>>- struct tlb_up_slave *slaves;
>>+ struct bond_up_slave *slaves;
>>+ unsigned int count;
>>
>>- slaves = rcu_dereference(bond_info->slave_arr);
>>- if (slaves && slaves->count)
>>+ slaves = rcu_dereference(bond->slave_arr);
>>+ count = slaves ? ACCESS_ONCE(slaves->count) : 0;
>>+ if (likely(count))
>> tx_slave = slaves->arr[hash_index %
>>- slaves->count];
>>+ count];
>> }
>> break;
>> }
>>@@ -1641,10 +1608,6 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
>> rlb_clear_slave(bond, slave);
>> }
>>
>>- if (bond_is_nondyn_tlb(bond))
>>- if (bond_tlb_update_slave_arr(bond, slave))
>>- pr_err("Failed to build slave-array for TLB mode.\n");
>>-
>> }
>>
>> void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
>>@@ -1669,7 +1632,7 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
>> }
>>
>> if (bond_is_nondyn_tlb(bond)) {
>>- if (bond_tlb_update_slave_arr(bond, NULL))
>>+ if (bond_update_slave_arr(bond, NULL))
>> pr_err("Failed to build slave-array for TLB mode.\n");
>> }
>> }
>>diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h
>>index 3c6a7ff974d7..1ad473b4ade5 100644
>>--- a/drivers/net/bonding/bond_alb.h
>>+++ b/drivers/net/bonding/bond_alb.h
>>@@ -139,19 +139,11 @@ struct tlb_slave_info {
>> */
>> };
>>
>>-struct tlb_up_slave {
>>- unsigned int count;
>>- struct rcu_head rcu;
>>- struct slave *arr[0];
>>-};
>>-
>> struct alb_bond_info {
>> struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */
>> u32 unbalanced_load;
>> int tx_rebalance_counter;
>> int lp_counter;
>>- /* -------- non-dynamic tlb mode only ---------*/
>>- struct tlb_up_slave __rcu *slave_arr; /* Up slaves */
>> /* -------- rlb parameters -------- */
>> int rlb_enabled;
>> struct rlb_client_info *rx_hashtbl; /* Receive hash table */
>>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>index c2adc2755ff6..6f79f495b01a 100644
>>--- a/drivers/net/bonding/bond_main.c
>>+++ b/drivers/net/bonding/bond_main.c
>>@@ -210,6 +210,7 @@ static int bond_init(struct net_device *bond_dev);
>> static void bond_uninit(struct net_device *bond_dev);
>> static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
>> struct rtnl_link_stats64 *stats);
>>+static void bond_slave_arr_handler(struct work_struct *work);
>>
>> /*---------------------------- General routines -----------------------------*/
>>
>>@@ -1551,6 +1552,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
>> unblock_netpoll_tx();
>> }
>>
>>+ if (bond_mode_uses_xmit_hash(bond))
>>+ bond_update_slave_arr(bond, NULL);
>>+
>> netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
>> slave_dev->name,
>> bond_is_active_slave(new_slave) ? "an active" : "a backup",
>>@@ -1668,6 +1672,9 @@ static int __bond_release_one(struct net_device *bond_dev,
>> if (BOND_MODE(bond) == BOND_MODE_8023AD)
>> bond_3ad_unbind_slave(slave);
>>
>>+ if (bond_mode_uses_xmit_hash(bond))
>>+ bond_update_slave_arr(bond, slave);
>>+
>> netdev_info(bond_dev, "Releasing %s interface %s\n",
>> bond_is_active_slave(slave) ? "active" : "backup",
>> slave_dev->name);
>>@@ -1970,6 +1977,9 @@ static void bond_miimon_commit(struct bonding *bond)
>> bond_alb_handle_link_change(bond, slave,
>> BOND_LINK_UP);
>>
>>+ if (BOND_MODE(bond) == BOND_MODE_XOR)
>>+ bond_update_slave_arr(bond, NULL);
>>+
>> if (!bond->curr_active_slave || slave == primary)
>> goto do_failover;
>>
>>@@ -1997,6 +2007,9 @@ static void bond_miimon_commit(struct bonding *bond)
>> bond_alb_handle_link_change(bond, slave,
>> BOND_LINK_DOWN);
>>
>>+ if (BOND_MODE(bond) == BOND_MODE_XOR)
>>+ bond_update_slave_arr(bond, NULL);
>>+
>> if (slave == rcu_access_pointer(bond->curr_active_slave))
>> goto do_failover;
>>
>>@@ -2453,6 +2466,8 @@ static void bond_loadbalance_arp_mon(struct work_struct *work)
>>
>> if (slave_state_changed) {
>> bond_slave_state_change(bond);
>>+ if (BOND_MODE(bond) == BOND_MODE_XOR)
>>+ bond_update_slave_arr(bond, NULL);
>> } else if (do_failover) {
>> block_netpoll_tx();
>> bond_select_active_slave(bond);
>>@@ -2829,8 +2844,20 @@ static int bond_slave_netdev_event(unsigned long event,
>> if (old_duplex != slave->duplex)
>> bond_3ad_adapter_duplex_changed(slave);
>> }
>>+ /* Refresh slave-array if applicable!
>>+ * If the setup does not use miimon or arpmon (mode-specific!),
>>+ * then these events will not cause the slave-array to be
>>+ * refreshed. This will cause xmit to use a slave that is not
>>+ * usable. Avoid such situation by refeshing the array at these
>>+ * events. If these (miimon/arpmon) parameters are configured
>>+ * then array gets refreshed twice and that should be fine!
>>+ */
>>+ if (bond_mode_uses_xmit_hash(bond))
>>+ bond_update_slave_arr(bond, NULL);
>> break;
>> case NETDEV_DOWN:
>>+ if (bond_mode_uses_xmit_hash(bond))
>>+ bond_update_slave_arr(bond, NULL);
>> break;
>> case NETDEV_CHANGEMTU:
>> /* TODO: Should slaves be allowed to
>>@@ -3010,6 +3037,7 @@ static void bond_work_init_all(struct bonding *bond)
>> else
>> INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
>> INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
>>+ INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
>> }
>>
>> static void bond_work_cancel_all(struct bonding *bond)
>>@@ -3019,6 +3047,7 @@ static void bond_work_cancel_all(struct bonding *bond)
>> cancel_delayed_work_sync(&bond->alb_work);
>> cancel_delayed_work_sync(&bond->ad_work);
>> cancel_delayed_work_sync(&bond->mcast_work);
>>+ cancel_delayed_work_sync(&bond->slave_arr_work);
>> }
>>
>> static int bond_open(struct net_device *bond_dev)
>>@@ -3068,6 +3097,9 @@ static int bond_open(struct net_device *bond_dev)
>> bond_3ad_initiate_agg_selection(bond, 1);
>> }
>>
>>+ if (bond_mode_uses_xmit_hash(bond))
>>+ bond_update_slave_arr(bond, NULL);
>>+
>> return 0;
>> }
>>
>>@@ -3573,20 +3605,148 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
>> return NETDEV_TX_OK;
>> }
>>
>>-/* In bond_xmit_xor() , we determine the output device by using a pre-
>>- * determined xmit_hash_policy(), If the selected device is not enabled,
>>- * find the next active slave.
>>+/* Use this to update slave_array when (a) it's not appropriate to update
>>+ * slave_array right away (note that update_slave_array() may sleep)
>>+ * and / or (b) RTNL is not held.
>> */
>>-static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
>>+void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
>> {
>>- struct bonding *bond = netdev_priv(bond_dev);
>>- int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
>>+ queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
>>+}
>>
>>- if (likely(slave_cnt))
>>- bond_xmit_slave_id(bond, skb,
>>- bond_xmit_hash(bond, skb) % slave_cnt);
>>- else
>>+/* Slave array work handler. Holds only RTNL */
>>+static void bond_slave_arr_handler(struct work_struct *work)
>>+{
>>+ struct bonding *bond = container_of(work, struct bonding,
>>+ slave_arr_work.work);
>>+ int ret;
>>+
>>+ if (!rtnl_trylock())
>>+ goto err;
>>+
>>+ ret = bond_update_slave_arr(bond, NULL);
>>+ rtnl_unlock();
>>+ if (ret) {
>>+ pr_warn_ratelimited("Failed to update slave array from WT\n");
>>+ goto err;
>>+ }
>>+ return;
>>+
>>+err:
>>+ bond_slave_arr_work_rearm(bond, 1);
>>+}
>>+
>>+/* Build the usable slaves array in control path for modes that use xmit-hash
>>+ * to determine the slave interface -
>>+ * (a) BOND_MODE_8023AD
>>+ * (b) BOND_MODE_XOR
>>+ * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
>>+ *
>>+ * The caller is expected to hold RTNL only and NO other lock!
>>+ */
>>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
>>+{
>>+ struct slave *slave;
>>+ struct list_head *iter;
>>+ struct bond_up_slave *new_arr, *old_arr;
>>+ int slaves_in_agg;
>>+ int agg_id = 0;
>>+ int ret = 0;
>>+
>>+#ifdef CONFIG_LOCKDEP
>>+ WARN_ON(lockdep_is_held(&bond->mode_lock));
>>+#endif
>>+
>>+ new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
>>+ GFP_KERNEL);
>>+ if (!new_arr) {
>>+ ret = -ENOMEM;
>>+ pr_err("Failed to build slave-array.\n");
>>+ goto out;
>>+ }
>>+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>>+ struct ad_info ad_info;
>>+
>>+ if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
>>+ pr_debug("bond_3ad_get_active_agg_info failed\n");
>>+ kfree_rcu(new_arr, rcu);
>>+ /* No active aggragator means its not safe to use
>>+ * the previous array.
>>+ */
>>+ old_arr = rtnl_dereference(bond->slave_arr);
>>+ if (old_arr) {
>>+ RCU_INIT_POINTER(bond->slave_arr, NULL);
>>+ kfree_rcu(old_arr, rcu);
>>+ }
>>+ goto out;
>>+ }
>>+ slaves_in_agg = ad_info.ports;
>>+ agg_id = ad_info.aggregator_id;
>>+ }
>>+ bond_for_each_slave(bond, slave, iter) {
>>+ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
>>+ struct aggregator *agg;
>>+
>>+ agg = SLAVE_AD_INFO(slave)->port.aggregator;
>>+ if (!agg || agg->aggregator_identifier != agg_id)
>>+ continue;
>>+ }
>>+ if (!bond_slave_can_tx(slave))
>>+ continue;
>>+ if (skipslave == slave)
>>+ continue;
>>+ new_arr->arr[new_arr->count++] = slave;
>>+ }
>>+
>>+ old_arr = rtnl_dereference(bond->slave_arr);
>>+ rcu_assign_pointer(bond->slave_arr, new_arr);
>>+ if (old_arr)
>>+ kfree_rcu(old_arr, rcu);
>>+out:
>>+ if (ret != 0 && skipslave) {
>>+ int idx;
>>+
>>+ /* Rare situation where caller has asked to skip a specific
>>+ * slave but allocation failed (most likely!). BTW this is
>>+ * only possible when the call is initiated from
>>+ * __bond_release_one(). In this situation; overwrite the
>>+ * skipslave entry in the array with the last entry from the
>>+ * array to avoid a situation where the xmit path may choose
>>+ * this to-be-skipped slave to send a packet out.
>>+ */
>>+ old_arr = rtnl_dereference(bond->slave_arr);
>>+ for (idx = 0; idx < old_arr->count; idx++) {
>>+ if (skipslave == old_arr->arr[idx]) {
>>+ old_arr->arr[idx] =
>>+ old_arr->arr[old_arr->count-1];
>>+ old_arr->count--;
>>+ break;
>>+ }
>>+ }
>>+ }
>>+ return ret;
>>+}
>>+
>>+/* Use this Xmit function for 3AD as well as XOR modes. The current
>>+ * usable slave array is formed in the control path. The xmit function
>>+ * just calculates hash and sends the packet out.
>>+ */
>>+int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
>>+{
>>+ struct bonding *bond = netdev_priv(dev);
>>+ struct slave *slave;
>>+ struct bond_up_slave *slaves;
>>+ unsigned int count;
>>+
>>+ slaves = rcu_dereference(bond->slave_arr);
>>+ count = slaves ? ACCESS_ONCE(slaves->count) : 0;
>>+ if (likely(count)) {
>>+ slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
>>+ bond_dev_queue_xmit(bond, skb, slave->dev);
>>+ } else {
>> dev_kfree_skb_any(skb);
>>+ atomic_long_inc(&dev->tx_dropped);
>>+ }
>>
>> return NETDEV_TX_OK;
>> }
>>@@ -3682,12 +3842,11 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
>> return bond_xmit_roundrobin(skb, dev);
>> case BOND_MODE_ACTIVEBACKUP:
>> return bond_xmit_activebackup(skb, dev);
>>+ case BOND_MODE_8023AD:
>> case BOND_MODE_XOR:
>>- return bond_xmit_xor(skb, dev);
>>+ return bond_3ad_xor_xmit(skb, dev);
>> case BOND_MODE_BROADCAST:
>> return bond_xmit_broadcast(skb, dev);
>>- case BOND_MODE_8023AD:
>>- return bond_3ad_xmit_xor(skb, dev);
>> case BOND_MODE_ALB:
>> return bond_alb_xmit(skb, dev);
>> case BOND_MODE_TLB:
>>@@ -3861,6 +4020,7 @@ static void bond_uninit(struct net_device *bond_dev)
>> struct bonding *bond = netdev_priv(bond_dev);
>> struct list_head *iter;
>> struct slave *slave;
>>+ struct bond_up_slave *arr;
>>
>> bond_netpoll_cleanup(bond_dev);
>>
>>@@ -3869,6 +4029,12 @@ static void bond_uninit(struct net_device *bond_dev)
>> __bond_release_one(bond_dev, slave->dev, true);
>> netdev_info(bond_dev, "Released all slaves\n");
>>
>>+ arr = rtnl_dereference(bond->slave_arr);
>>+ if (arr) {
>>+ kfree_rcu(arr, rcu);
>>+ RCU_INIT_POINTER(bond->slave_arr, NULL);
>>+ }
>>+
>> list_del(&bond->bond_list);
>>
>> bond_debug_unregister(bond);
>>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>>index 5b022da9cad2..10920f0686e2 100644
>>--- a/drivers/net/bonding/bonding.h
>>+++ b/drivers/net/bonding/bonding.h
>>@@ -179,6 +179,12 @@ struct slave {
>> struct rtnl_link_stats64 slave_stats;
>> };
>>
>>+struct bond_up_slave {
>>+ unsigned int count;
>>+ struct rcu_head rcu;
>>+ struct slave *arr[0];
>>+};
>>+
>> /*
>> * Link pseudo-state only used internally by monitors
>> */
>>@@ -193,6 +199,7 @@ struct bonding {
>> struct slave __rcu *curr_active_slave;
>> struct slave __rcu *current_arp_slave;
>> struct slave __rcu *primary_slave;
>>+ struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
>> bool force_primary;
>> s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
>> int (*recv_probe)(const struct sk_buff *, struct bonding *,
>>@@ -222,6 +229,7 @@ struct bonding {
>> struct delayed_work alb_work;
>> struct delayed_work ad_work;
>> struct delayed_work mcast_work;
>>+ struct delayed_work slave_arr_work;
>> #ifdef CONFIG_DEBUG_FS
>> /* debugging support via debugfs */
>> struct dentry *debug_dir;
>>@@ -534,6 +542,8 @@ const char *bond_slave_link_status(s8 link);
>> struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev,
>> struct net_device *end_dev,
>> int level);
>>+int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave);
>>+void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay);
>>
>> #ifdef CONFIG_PROC_FS
>> void bond_create_proc_entry(struct bonding *bond);
>>--
>>2.1.0.rc2.206.gedb03e5
>
> ---
> -Jay Vosburgh, jay.vosburgh@...onical.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists