[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100929184413.GY7497@gospo.rdu.redhat.com>
Date: Wed, 29 Sep 2010 14:44:13 -0400
From: Andy Gospodarek <andy@...yhouse.net>
To: Flavio Leitner <fleitner@...hat.com>
Cc: netdev@...r.kernel.org
Subject: Re: [PATCH] bonding: rejoin multicast groups on VLANs
On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> It fixes bonding to rejoin multicast groups added
> to VLAN devices on top of bonding when a failover
> happens.
>
> The first packet may be discarded, so the timer
> assure that at least 3 Reports are sent.
>
Good find, Flavio. Clearly the fact that multicast membership is broken
needs to be fixed, but I would rather not see timers used at all. We
worked hard in the past to eliminate timers for several reasons, so I
would rather see a workqueue used.
I also don't like retransmitting the membership report 3 times when it
may not be needed. Though many switches can handle it, the cost of
receiving and processing what might be a large list of multicast
addresses every 200ms for 600ms doesn't seem ideal. It also feels like
a hack. :)
If retransmission of the membership reports is a requirement for some
users, I would rather see it as a configuration option.
Maybe something like this?
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3b16f62..b7b4a74 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -109,6 +109,7 @@ static char *arp_validate;
static char *fail_over_mac;
static int all_slaves_active = 0;
static struct bond_params bonding_defaults;
+static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
module_param(max_bonds, int, 0);
MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
@@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
"by setting active flag for all slaves. "
"0 for never (default), 1 for always.");
+module_param(resend_igmp, int, 0);
+MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
/*----------------------------- Global variables ----------------------------*/
@@ -865,18 +868,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
}
-/*
- * Retrieve the list of registered multicast addresses for the bonding
- * device and retransmit an IGMP JOIN request to the current active
- * slave.
- */
-static void bond_resend_igmp_join_requests(struct bonding *bond)
+static void __bond_resend_igmp_join_requests(struct net_device *dev)
{
struct in_device *in_dev;
struct ip_mc_list *im;
rcu_read_lock();
- in_dev = __in_dev_get_rcu(bond->dev);
+
+ in_dev = __in_dev_get_rcu(dev);
if (in_dev) {
for (im = in_dev->mc_list; im; im = im->next)
ip_mc_rejoin_group(im);
@@ -885,6 +884,48 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
rcu_read_unlock();
}
+
+/*
+ * Retrieve the list of registered multicast addresses for the bonding
+ * device and retransmit an IGMP JOIN request to the current active
+ * slave.
+ */
+static void bond_resend_igmp_join_requests(struct bonding *bond)
+{
+ struct net_device *vlan_dev;
+ struct vlan_entry *vlan;
+
+ read_lock(&bond->lock);
+ if (bond->kill_timers)
+ goto out;
+
+ /* rejoin all groups on bond device */
+ __bond_resend_igmp_join_requests(bond->dev);
+
+ /* rejoin all groups on vlan devices */
+ if (bond->vlgrp) {
+ list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+ vlan_dev = vlan_group_get_device(bond->vlgrp,
+ vlan->vlan_id);
+ if (vlan_dev)
+ __bond_resend_igmp_join_requests(vlan_dev);
+ }
+ }
+
+ if (--bond->igmp_retrans > 0)
+ queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
+
+out:
+ read_unlock(&bond->lock);
+}
+
+void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
+{
+ struct bonding *bond = container_of(work, struct bonding,
+ mcast_work.work);
+ bond_resend_igmp_join_requests(bond);
+}
+
/*
* flush all members of flush->mc_list from device dev->mc_list
*/
@@ -944,7 +985,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
netdev_for_each_mc_addr(ha, bond->dev)
dev_mc_add(new_active->dev, ha->addr);
- bond_resend_igmp_join_requests(bond);
+
+ /* rejoin multicast groups */
+ bond->igmp_retrans = bond->params.resend_igmp;
+ queue_delayed_work(bond->wq, &bond->mcast_work, 0);
}
}
@@ -3744,6 +3788,9 @@ static int bond_open(struct net_device *bond_dev)
bond->kill_timers = 0;
+ /* multicast retrans */
+ INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
+
if (bond_is_lb(bond)) {
/* bond_alb_initialize must be called before the timer
* is started.
@@ -3828,6 +3875,8 @@ static int bond_close(struct net_device *bond_dev)
break;
}
+ if (delayed_work_pending(&bond->mcast_work))
+ cancel_delayed_work(&bond->ad_work);
if (bond_is_lb(bond)) {
/* Must be called only after all
@@ -4699,6 +4748,9 @@ static void bond_work_cancel_all(struct bonding *bond)
if (bond->params.mode == BOND_MODE_8023AD &&
delayed_work_pending(&bond->ad_work))
cancel_delayed_work(&bond->ad_work);
+
+ if (delayed_work_pending(&bond->mcast_work))
+ cancel_delayed_work(&bond->ad_work);
}
/*
@@ -4891,6 +4943,12 @@ static int bond_check_params(struct bond_params *params)
all_slaves_active = 0;
}
+ if (resend_igmp < 0 || resend_igmp > 255) {
+ pr_warning("Warning: resend_igmp (%d) should be between "
+ "0 and 255, resetting to %d\n",
+ resend_igmp, BOND_DEFAULT_RESEND_IGMP);
+ resend_igmp = BOND_DEFAULT_RESEND_IGMP;
+ }
/* reset values for TLB/ALB */
if ((bond_mode == BOND_MODE_TLB) ||
(bond_mode == BOND_MODE_ALB)) {
@@ -5063,6 +5121,7 @@ static int bond_check_params(struct bond_params *params)
params->fail_over_mac = fail_over_mac_value;
params->tx_queues = tx_queues;
params->all_slaves_active = all_slaves_active;
+ params->resend_igmp = resend_igmp;
if (primary) {
strncpy(params->primary, primary, IFNAMSIZ);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index c6fdd85..c49bdb0 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -136,6 +136,7 @@ struct bond_params {
__be32 arp_targets[BOND_MAX_ARP_TARGETS];
int tx_queues;
int all_slaves_active;
+ int resend_igmp;
};
struct bond_parm_tbl {
@@ -198,6 +199,7 @@ struct bonding {
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
rwlock_t lock;
rwlock_t curr_slave_lock;
+ s8 igmp_retrans;
s8 kill_timers;
s8 send_grat_arp;
s8 send_unsol_na;
@@ -223,6 +225,7 @@ struct bonding {
struct delayed_work arp_work;
struct delayed_work alb_work;
struct delayed_work ad_work;
+ struct delayed_work mcast_work;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_addr master_ipv6;
#endif
diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
index 2c79943..d2f78b7 100644
--- a/include/linux/if_bonding.h
+++ b/include/linux/if_bonding.h
@@ -84,6 +84,9 @@
#define BOND_DEFAULT_MAX_BONDS 1 /* Default maximum number of devices to support */
#define BOND_DEFAULT_TX_QUEUES 16 /* Default number of tx queues per device */
+
+#define BOND_DEFAULT_RESEND_IGMP 1 /* Default number of IGMP membership reports
+ to resend on link failure. */
/* hashing types */
#define BOND_XMIT_POLICY_LAYER2 0 /* layer 2 (MAC only), default */
#define BOND_XMIT_POLICY_LAYER34 1 /* layer 3+4 (IP ^ (TCP || UDP)) */
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists