[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJYOGF9TY8WtUscsfJ=qduAw7_1BwU+4iE+eL6cidM=LBL9w+A@mail.gmail.com>
Date: Wed, 25 Sep 2019 14:01:50 +0300
From: Aleksei Zakharov <zaharov@...ectel.ru>
To: Jay Vosburgh <jay.vosburgh@...onical.com>
Cc: netdev@...r.kernel.org, "zhangsha (A)" <zhangsha.zhang@...wei.com>
Subject: Re: Fwd: [PATCH] bonding/802.3ad: fix slave initialization states race
ср, 25 сент. 2019 г. в 03:31, Jay Vosburgh <jay.vosburgh@...onical.com>:
>
> Алексей Захаров wrote:
> [...]
> >Right after reboot one of the slaves hangs with actor port state 71
> >and partner port state 1.
> >It doesn't send lacpdu and seems to be broken.
> >Setting link down and up again fixes slave state.
> [...]
>
> I think I see what failed in the first patch, could you test the
> following patch? This one is for net-next, so you'd need to again swap
> slave_err / netdev_err for the Ubuntu 4.15 kernel.
>
I've tested new patch. It seems to work. I can't reproduce the bug
with this patch.
There are two types of messages when link becomes up:
First:
bond-san: EVENT 1 llu 4294895911 slave eth2
8021q: adding VLAN 0 to HW filter on device eth2
bond-san: link status definitely down for interface eth2, disabling it
mlx4_en: eth2: Link Up
bond-san: EVENT 4 llu 4294895911 slave eth2
bond-san: link status up for interface eth2, enabling it in 500 ms
bond-san: invalid new link 3 on slave eth2
bond-san: link status definitely up for interface eth2, 10000 Mbps full duplex
Second:
bond-san: EVENT 1 llu 4295147594 slave eth2
8021q: adding VLAN 0 to HW filter on device eth2
mlx4_en: eth2: Link Up
bond-san: EVENT 4 llu 4295147594 slave eth2
bond-san: link status up again after 0 ms for interface eth2
bond-san: link status definitely up for interface eth2, 10000 Mbps full duplex
These messages (especially "invalid new link") look a bit unclear from
sysadmin point of view.
But lacp seems to work fine, thank you!
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 931d9d935686..5e248588259a 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1617,6 +1617,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
> if (bond->params.miimon) {
> if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
> if (bond->params.updelay) {
> +/*XXX*/slave_info(bond_dev, slave_dev, "BOND_LINK_BACK initial state\n");
> bond_set_slave_link_state(new_slave,
> BOND_LINK_BACK,
> BOND_SLAVE_NOTIFY_NOW);
> @@ -2086,8 +2087,7 @@ static int bond_miimon_inspect(struct bonding *bond)
> ignore_updelay = !rcu_dereference(bond->curr_active_slave);
>
> bond_for_each_slave_rcu(bond, slave, iter) {
> - slave->new_link = BOND_LINK_NOCHANGE;
> - slave->link_new_state = slave->link;
> + bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>
> link_state = bond_check_dev_link(bond, slave->dev, 0);
>
> @@ -2096,8 +2096,6 @@ static int bond_miimon_inspect(struct bonding *bond)
> if (link_state)
> continue;
>
> - bond_propose_link_state(slave, BOND_LINK_FAIL);
> - commit++;
> slave->delay = bond->params.downdelay;
> if (slave->delay) {
> slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
> @@ -2106,6 +2104,7 @@ static int bond_miimon_inspect(struct bonding *bond)
> (bond_is_active_slave(slave) ?
> "active " : "backup ") : "",
> bond->params.downdelay * bond->params.miimon);
> + slave->link = BOND_LINK_FAIL;
> }
> /*FALLTHRU*/
> case BOND_LINK_FAIL:
> @@ -2121,7 +2120,7 @@ static int bond_miimon_inspect(struct bonding *bond)
> }
>
> if (slave->delay <= 0) {
> - slave->new_link = BOND_LINK_DOWN;
> + bond_propose_link_state(slave, BOND_LINK_DOWN);
> commit++;
> continue;
> }
> @@ -2133,15 +2132,13 @@ static int bond_miimon_inspect(struct bonding *bond)
> if (!link_state)
> continue;
>
> - bond_propose_link_state(slave, BOND_LINK_BACK);
> - commit++;
> slave->delay = bond->params.updelay;
> -
> if (slave->delay) {
> slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
> ignore_updelay ? 0 :
> bond->params.updelay *
> bond->params.miimon);
> + slave->link = BOND_LINK_BACK;
> }
> /*FALLTHRU*/
> case BOND_LINK_BACK:
> @@ -2158,7 +2155,7 @@ static int bond_miimon_inspect(struct bonding *bond)
> slave->delay = 0;
>
> if (slave->delay <= 0) {
> - slave->new_link = BOND_LINK_UP;
> + bond_propose_link_state(slave, BOND_LINK_UP);
> commit++;
> ignore_updelay = false;
> continue;
> @@ -2196,7 +2193,7 @@ static void bond_miimon_commit(struct bonding *bond)
> struct slave *slave, *primary;
>
> bond_for_each_slave(bond, slave, iter) {
> - switch (slave->new_link) {
> + switch (slave->link_new_state) {
> case BOND_LINK_NOCHANGE:
> /* For 802.3ad mode, check current slave speed and
> * duplex again in case its port was disabled after
> @@ -2268,8 +2265,8 @@ static void bond_miimon_commit(struct bonding *bond)
>
> default:
> slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n",
> - slave->new_link);
> - slave->new_link = BOND_LINK_NOCHANGE;
> + slave->link_new_state);
> + bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>
> continue;
> }
> @@ -2677,13 +2674,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
> bond_for_each_slave_rcu(bond, slave, iter) {
> unsigned long trans_start = dev_trans_start(slave->dev);
>
> - slave->new_link = BOND_LINK_NOCHANGE;
> + bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
>
> if (slave->link != BOND_LINK_UP) {
> if (bond_time_in_interval(bond, trans_start, 1) &&
> bond_time_in_interval(bond, slave->last_rx, 1)) {
>
> - slave->new_link = BOND_LINK_UP;
> + bond_propose_link_state(slave, BOND_LINK_UP);
> slave_state_changed = 1;
>
> /* primary_slave has no meaning in round-robin
> @@ -2708,7 +2705,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
> if (!bond_time_in_interval(bond, trans_start, 2) ||
> !bond_time_in_interval(bond, slave->last_rx, 2)) {
>
> - slave->new_link = BOND_LINK_DOWN;
> + bond_propose_link_state(slave, BOND_LINK_DOWN);
> slave_state_changed = 1;
>
> if (slave->link_failure_count < UINT_MAX)
> @@ -2739,8 +2736,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
> goto re_arm;
>
> bond_for_each_slave(bond, slave, iter) {
> - if (slave->new_link != BOND_LINK_NOCHANGE)
> - slave->link = slave->new_link;
> + if (slave->link_new_state != BOND_LINK_NOCHANGE)
> + slave->link = slave->link_new_state;
> }
>
> if (slave_state_changed) {
> @@ -2763,9 +2760,9 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
> }
>
> /* Called to inspect slaves for active-backup mode ARP monitor link state
> - * changes. Sets new_link in slaves to specify what action should take
> - * place for the slave. Returns 0 if no changes are found, >0 if changes
> - * to link states must be committed.
> + * changes. Sets proposed link state in slaves to specify what action
> + * should take place for the slave. Returns 0 if no changes are found, >0
> + * if changes to link states must be committed.
> *
> * Called with rcu_read_lock held.
> */
> @@ -2777,12 +2774,12 @@ static int bond_ab_arp_inspect(struct bonding *bond)
> int commit = 0;
>
> bond_for_each_slave_rcu(bond, slave, iter) {
> - slave->new_link = BOND_LINK_NOCHANGE;
> + bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
> last_rx = slave_last_rx(bond, slave);
>
> if (slave->link != BOND_LINK_UP) {
> if (bond_time_in_interval(bond, last_rx, 1)) {
> - slave->new_link = BOND_LINK_UP;
> + bond_propose_link_state(slave, BOND_LINK_UP);
> commit++;
> }
> continue;
> @@ -2810,7 +2807,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
> if (!bond_is_active_slave(slave) &&
> !rcu_access_pointer(bond->current_arp_slave) &&
> !bond_time_in_interval(bond, last_rx, 3)) {
> - slave->new_link = BOND_LINK_DOWN;
> + bond_propose_link_state(slave, BOND_LINK_DOWN);
> commit++;
> }
>
> @@ -2823,7 +2820,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
> if (bond_is_active_slave(slave) &&
> (!bond_time_in_interval(bond, trans_start, 2) ||
> !bond_time_in_interval(bond, last_rx, 2))) {
> - slave->new_link = BOND_LINK_DOWN;
> + bond_propose_link_state(slave, BOND_LINK_DOWN);
> commit++;
> }
> }
> @@ -2843,7 +2840,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
> struct slave *slave;
>
> bond_for_each_slave(bond, slave, iter) {
> - switch (slave->new_link) {
> + switch (slave->link_new_state) {
> case BOND_LINK_NOCHANGE:
> continue;
>
> @@ -2893,8 +2890,9 @@ static void bond_ab_arp_commit(struct bonding *bond)
> continue;
>
> default:
> - slave_err(bond->dev, slave->dev, "impossible: new_link %d on slave\n",
> - slave->new_link);
> + slave_err(bond->dev, slave->dev,
> + "impossible: link_new_state %d on slave\n",
> + slave->link_new_state);
> continue;
> }
>
> @@ -3133,6 +3131,7 @@ static int bond_slave_netdev_event(unsigned long event,
> * let link-monitoring (miimon) set it right when correct
> * speeds/duplex are available.
> */
> +/*XXX*/slave_info(bond_dev, slave_dev, "EVENT %lu llu %lu\n", event, slave->last_link_up);
> if (bond_update_speed_duplex(slave) &&
> BOND_MODE(bond) == BOND_MODE_8023AD) {
> if (slave->last_link_up)
> diff --git a/include/net/bonding.h b/include/net/bonding.h
> index f7fe45689142..d416af72404b 100644
> --- a/include/net/bonding.h
> +++ b/include/net/bonding.h
> @@ -159,7 +159,6 @@ struct slave {
> unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
> s8 link; /* one of BOND_LINK_XXXX */
> s8 link_new_state; /* one of BOND_LINK_XXXX */
> - s8 new_link;
> u8 backup:1, /* indicates backup slave. Value corresponds with
> BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
> inactive:1, /* indicates inactive slave */
> @@ -549,7 +548,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state)
>
> static inline void bond_commit_link_state(struct slave *slave, bool notify)
> {
> - if (slave->link == slave->link_new_state)
> + if (slave->link_new_state == BOND_LINK_NOCHANGE)
> return;
>
> slave->link = slave->link_new_state;
>
>
> ---
> -Jay Vosburgh, jay.vosburgh@...onical.com
--
Best Regards,
Aleksei Zakharov
System administrator
Selectel - hosting for professionals
tel: +7 (812) 677-80-36
www.selectel.com
Powered by blists - more mailing lists