[<prev] [next>] [day] [month] [year] [list]
Message-ID: <87497cc383e249538c692e61f486e8fe@huawei.com>
Date: Mon, 24 May 2021 15:21:19 +0000
From: liaichun <liaichun@...wei.com>
To: Jay Vosburgh <jay.vosburgh@...onical.com>
CC: liaichun <liaichun@...wei.com>,
"davem@...emloft.net" <davem@...emloft.net>,
"kuba@...nel.org" <kuba@...nel.org>,
"vfalico@...il.com" <vfalico@...il.com>,
"andy@...yhouse.net" <andy@...yhouse.net>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"Chenxiang (EulerOS)" <rose.chen@...wei.com>,
moyufeng <moyufeng@...wei.com>
Subject: Re: [PATCH net v2]bonding: check port and aggregator when select
Hi Jay:
I repeated the "did not find a suitable aggregator" issue by repeatedly hot-plugging the NIC device.
https://lore.kernel.org/netdev/52630cba-cc60-a024-8dd0-8319e5245044@huawei.com/
Add the hot swap operation of network adapters:
while true; do
ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp3s0 /remove" &
ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp4s0 /remove" &
ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp5s0 /remove" &
ETS_SSHCMD "echo 1 > /sys/bus/pci/devices/ enp6s0 /remove" &
sleep 10
ETS_SSHCMD "echo 1 > /sys/bus/pci/rescan"
sleep 100
done &
The numbers of port_params updated, and then can not find a suitable aggregator.
The detailed code analysis is as follows:
bond_3ad_lacpdu_recv ->bond_3ad_rx_indication-> ad_rx_machine-> __record_pdu
static void __record_pdu(struct lacpdu *lacpdu, struct port *port)
{
if (lacpdu && port) {
struct port_params *partner = &port->partner_oper;
__choose_matched(lacpdu, port);
/* record the new parameter values for the partner
* operational
*/
partner->port_number = ntohs(lacpdu->actor_port);
partner->port_priority = ntohs(lacpdu->actor_port_priority);
partner->system = lacpdu->actor_system;
partner->system_priority = ntohs(lacpdu->actor_system_priority);
partner->key = ntohs(lacpdu->actor_key);
partner->port_state = lacpdu->actor_state;
--------analysis: Update the member of the partner here.
/* set actor_oper_port_state.defaulted to FALSE */
port->actor_oper_port_state &= ~AD_STATE_DEFAULTED;
/* set the partner sync. to on if the partner is sync,
* and the port is matched
*/
if ((port->sm_vars & AD_PORT_MATCHED) &&
(lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) {
partner->port_state |= AD_STATE_SYNCHRONIZATION;
pr_debug("%s partner sync=1\n", port->slave->dev->name);
} else {
partner->port_state &= ~AD_STATE_SYNCHRONIZATION;
pr_debug("%s partner sync=0\n", port->slave->dev->name);
}
}
}
static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
{
[...]
/* search on all aggregators for a suitable aggregator for this port */
bond_for_each_slave(bond, slave, iter) {
aggregator = &(SLAVE_AD_INFO(slave)->aggregator);
/* keep a free aggregator for later use(if needed) */
if (!aggregator->lag_ports) {
if (!free_aggregator)
free_aggregator = aggregator;
continue;
}
/* check if current aggregator suits us */
----analysis: here check the number of aggregator's partner, but it was updated by bond_3ad_lacpdu_recv
if (((aggregator->actor_oper_aggregator_key == port->actor_oper_port_key) && /* if all parameters match AND */
MAC_ADDRESS_EQUAL(&(aggregator->partner_system), &(port->partner_oper.system)) &&
(aggregator->partner_system_priority == port->partner_oper.system_priority) &&
(aggregator->partner_oper_aggregator_key == port->partner_oper.key)
) &&
((!MAC_ADDRESS_EQUAL(&(port->partner_oper.system), &(null_mac_addr)) && /* partner answers */
!aggregator->is_individual) /* but is not individual OR */
)
) {
/* attach to the founded aggregator */
port->aggregator = aggregator;
port->actor_port_aggregator_identifier =
port->aggregator->aggregator_identifier;
port->next_port_in_aggregator = aggregator->lag_ports;
port->aggregator->num_of_ports++;
aggregator->lag_ports = port;
netdev_dbg(bond->dev, "Port %d joined LAG %d(existing LAG)\n",
port->actor_port_number,
port->aggregator->aggregator_identifier);
/* mark this port as selected */
port->sm_vars |= AD_PORT_SELECTED;
found = 1;
----analysis: found is 0
break;
}
}
/* the port couldn't find an aggregator - attach it to a new
* aggregator
*/
if (!found) {
----analysis: No proper free_aggregator is found.
if (free_aggregator) {
/* assign port a new aggregator */
port->aggregator = free_aggregator;
port->actor_port_aggregator_identifier =
port->aggregator->aggregator_identifier;
/* update the new aggregator's parameters
* if port was responsed from the end-user
*/
if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
/* if port is full duplex */
port->aggregator->is_individual = false;
else
port->aggregator->is_individual = true;
port->aggregator->actor_admin_aggregator_key =
port->actor_admin_port_key;
port->aggregator->actor_oper_aggregator_key =
port->actor_oper_port_key;
port->aggregator->partner_system =
port->partner_oper.system;
port->aggregator->partner_system_priority =
port->partner_oper.system_priority;
port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
port->aggregator->receive_state = 1;
port->aggregator->transmit_state = 1;
port->aggregator->lag_ports = port;
port->aggregator->num_of_ports++;
/* mark this port as selected */
port->sm_vars |= AD_PORT_SELECTED;
netdev_dbg(bond->dev, "Port %d joined LAG %d(new LAG)\n",
port->actor_port_number,
port->aggregator->aggregator_identifier);
} else {
----analysis: The fault scenario goes to this branch, and port->aggregator remains NULL.
netdev_err(bond->dev, "Port %d (on %s) did not find a suitable aggregator\n",
port->actor_port_number, port->slave->dev->name);
}
}
/* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
* in all aggregator's ports, else set ready=FALSE in all
* aggregator's ports
*/
__set_agg_ports_ready(port->aggregator,
__agg_ports_are_ready(port->aggregator));
----analysis: port->aggregator is still NULL, which causes problem.
[...]
}
In this case ,I want to use old aggregator, but init it's number.And I don't know if that's correct.
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index cfb0060fc..bd66f3f48 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1450,7 +1450,6 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
/* clear the port's relations to this
* aggregator
*/
- port->aggregator = NULL;
port->next_port_in_aggregator = NULL;
port->actor_port_aggregator_identifier = 0;
@@ -1521,43 +1520,47 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
if (free_aggregator) {
/* assign port a new aggregator */
port->aggregator = free_aggregator;
- port->actor_port_aggregator_identifier =
- port->aggregator->aggregator_identifier;
-
- /* update the new aggregator's parameters
- * if port was responsed from the end-user
- */
- if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
- /* if port is full duplex */
- port->aggregator->is_individual = false;
- else
- port->aggregator->is_individual = true;
-
- port->aggregator->actor_admin_aggregator_key =
- port->actor_admin_port_key;
- port->aggregator->actor_oper_aggregator_key =
- port->actor_oper_port_key;
- port->aggregator->partner_system =
- port->partner_oper.system;
- port->aggregator->partner_system_priority =
- port->partner_oper.system_priority;
- port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
- port->aggregator->receive_state = 1;
- port->aggregator->transmit_state = 1;
- port->aggregator->lag_ports = port;
- port->aggregator->num_of_ports++;
-
- /* mark this port as selected */
- port->sm_vars |= AD_PORT_SELECTED;
-
- netdev_dbg(bond->dev, "Port %d joined LAG %d(new LAG)\n",
- port->actor_port_number,
- port->aggregator->aggregator_identifier);
} else {
+ /* use old aggregator */
netdev_err(bond->dev, "Port %d (on %s) did not find a suitable aggregator\n",
- port->actor_port_number, port->slave->dev->name);
- return;
+ port->actor_port_number, port->slave->dev->name);
+ if (port->aggregator == NULL) {
+ netdev_err(bond->dev, "old aggregator is still NULL\n");
+ return;
+ }
}
+ port->actor_port_aggregator_identifier =
+ port->aggregator->aggregator_identifier;
+
+ /* update the new aggregator's parameters
+ * if port was responsed from the end-user
+ */
+ if (port->actor_oper_port_key & AD_DUPLEX_KEY_MASKS)
+ /* if port is full duplex */
+ port->aggregator->is_individual = false;
+ else
+ port->aggregator->is_individual = true;
+
+ port->aggregator->actor_admin_aggregator_key =
+ port->actor_admin_port_key;
+ port->aggregator->actor_oper_aggregator_key =
+ port->actor_oper_port_key;
+ port->aggregator->partner_system =
+ port->partner_oper.system;
+ port->aggregator->partner_system_priority =
+ port->partner_oper.system_priority;
+ port->aggregator->partner_oper_aggregator_key = port->partner_oper.key;
+ port->aggregator->receive_state = 1;
+ port->aggregator->transmit_state = 1;
+ port->aggregator->lag_ports = port;
+ port->aggregator->num_of_ports++;
+
+ /* mark this port as selected */
+ port->sm_vars |= AD_PORT_SELECTED;
+
+ netdev_dbg(bond->dev, "Port %d joined LAG %d(new LAG)\n",
+ port->actor_port_number,
+ port->aggregator->aggregator_identifier);
}
/* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
* in all aggregator's ports, else set ready=FALSE in all
Thanks.
Powered by blists - more mailing lists