netdev - Re: [PATCH] bonding: rejoin multicast groups on VLANs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20100929193539.GC2864@redhat.com>
Date:	Wed, 29 Sep 2010 16:35:39 -0300
From:	Flavio Leitner <fbl@...hat.com>
To:	Andy Gospodarek <andy@...yhouse.net>
Cc:	netdev@...r.kernel.org
Subject: Re: [PATCH] bonding: rejoin multicast groups on VLANs

On Wed, Sep 29, 2010 at 02:44:13PM -0400, Andy Gospodarek wrote:
> On Wed, Sep 29, 2010 at 04:12:24AM -0300, Flavio Leitner wrote:
> > It fixes bonding to rejoin multicast groups added
> > to VLAN devices on top of bonding when a failover
> > happens.
> > 
> > The first packet may be discarded, so the timer
> > assure that at least 3 Reports are sent.
> > 
> 
> Good find, Flavio.  Clearly the fact that multicast membership is broken
> needs to be fixed, but I would rather not see timers used at all.  We
> worked hard in the past to eliminate timers for several reasons, so I
> would rather see a workqueue used.

I noticed that the code is using workqueues now, just thought a
simple thing which may run couple times would fit perfectly with
a simple timer.


> I also don't like retransmitting the membership report 3 times when it
> may not be needed.  Though many switches can handle it, the cost of
> receiving and processing what might be a large list of multicast
> addresses every 200ms for 600ms doesn't seem ideal.  It also feels like
> a hack. :)

Definitely a parameter is much better, but I wasn't sure about
the patch approach so I was expecting a review like this and then
do the refinements needed. Better to post early, right? :)

I see your point to change the default to one membership report,
but we can't assure during a failover if everything has been
received. Also, it isn't supposed to keep failing flooding the
network, so I would rather have couple membership reports being
send than watch an important multicast application failing.

Perhaps 3 is too much, but one sounds too few to me.

what you think?

Flavio

> 
> If retransmission of the membership reports is a requirement for some
> users, I would rather see it as a configuration option.
> 
> Maybe something like this?
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 3b16f62..b7b4a74 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -109,6 +109,7 @@ static char *arp_validate;
>  static char *fail_over_mac;
>  static int all_slaves_active = 0;
>  static struct bond_params bonding_defaults;
> +static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
>  
>  module_param(max_bonds, int, 0);
>  MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
> @@ -163,6 +164,8 @@ module_param(all_slaves_active, int, 0);
>  MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface"
>  				     "by setting active flag for all slaves.  "
>  				     "0 for never (default), 1 for always.");
> +module_param(resend_igmp, int, 0);
> +MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on link failure");
>  
>  /*----------------------------- Global variables ----------------------------*/
>  
> @@ -865,18 +868,14 @@ static void bond_mc_del(struct bonding *bond, void *addr)
>  }
>  
>  
> -/*
> - * Retrieve the list of registered multicast addresses for the bonding
> - * device and retransmit an IGMP JOIN request to the current active
> - * slave.
> - */
> -static void bond_resend_igmp_join_requests(struct bonding *bond)
> +static void __bond_resend_igmp_join_requests(struct net_device *dev)
>  {
>  	struct in_device *in_dev;
>  	struct ip_mc_list *im;
>  
>  	rcu_read_lock();
> -	in_dev = __in_dev_get_rcu(bond->dev);
> +
> +	in_dev = __in_dev_get_rcu(dev);
>  	if (in_dev) {
>  		for (im = in_dev->mc_list; im; im = im->next)
>  			ip_mc_rejoin_group(im);
> @@ -885,6 +884,48 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
>  	rcu_read_unlock();
>  }
>  
> +
> +/*
> + * Retrieve the list of registered multicast addresses for the bonding
> + * device and retransmit an IGMP JOIN request to the current active
> + * slave.
> + */
> +static void bond_resend_igmp_join_requests(struct bonding *bond)
> +{
> +	struct net_device *vlan_dev;
> +	struct vlan_entry *vlan;
> +
> +	read_lock(&bond->lock);
> +	if (bond->kill_timers)
> +		goto out;
> +
> +	/* rejoin all groups on bond device */
> +	__bond_resend_igmp_join_requests(bond->dev);
> +
> +	/* rejoin all groups on vlan devices */
> +	if (bond->vlgrp) {
> +		list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
> +			vlan_dev = vlan_group_get_device(bond->vlgrp,
> +							 vlan->vlan_id);
> +			if (vlan_dev)
> +				__bond_resend_igmp_join_requests(vlan_dev);
> +		}
> +	}
> +
> +	if (--bond->igmp_retrans > 0)
> +		queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
> +
> +out:
> +	read_unlock(&bond->lock);
> +}
> +
> +void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
> +{
> +	struct bonding *bond = container_of(work, struct bonding,
> +					    mcast_work.work);
> +	bond_resend_igmp_join_requests(bond);
> +}
> +
>  /*
>   * flush all members of flush->mc_list from device dev->mc_list
>   */
> @@ -944,7 +985,10 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
>  
>  		netdev_for_each_mc_addr(ha, bond->dev)
>  			dev_mc_add(new_active->dev, ha->addr);
> -		bond_resend_igmp_join_requests(bond);
> +
> +		/* rejoin multicast groups */
> +		bond->igmp_retrans = bond->params.resend_igmp;
> +		queue_delayed_work(bond->wq, &bond->mcast_work, 0);
>  	}
>  }
>  
> @@ -3744,6 +3788,9 @@ static int bond_open(struct net_device *bond_dev)
>  
>  	bond->kill_timers = 0;
>  
> +	/* multicast retrans */
> +	INIT_DELAYED_WORK(&bond->mcast_work, bond_resend_igmp_join_requests_delayed);
> +
>  	if (bond_is_lb(bond)) {
>  		/* bond_alb_initialize must be called before the timer
>  		 * is started.
> @@ -3828,6 +3875,8 @@ static int bond_close(struct net_device *bond_dev)
>  		break;
>  	}
>  
> +	if (delayed_work_pending(&bond->mcast_work))
> +		cancel_delayed_work(&bond->ad_work);
>  
>  	if (bond_is_lb(bond)) {
>  		/* Must be called only after all
> @@ -4699,6 +4748,9 @@ static void bond_work_cancel_all(struct bonding *bond)
>  	if (bond->params.mode == BOND_MODE_8023AD &&
>  	    delayed_work_pending(&bond->ad_work))
>  		cancel_delayed_work(&bond->ad_work);
> +
> +	if (delayed_work_pending(&bond->mcast_work))
> +		cancel_delayed_work(&bond->ad_work);
>  }
>  
>  /*
> @@ -4891,6 +4943,12 @@ static int bond_check_params(struct bond_params *params)
>  		all_slaves_active = 0;
>  	}
>  
> +	if (resend_igmp < 0 || resend_igmp > 255) {
> +		pr_warning("Warning: resend_igmp (%d) should be between "
> +			   "0 and 255, resetting to %d\n",
> +			   resend_igmp, BOND_DEFAULT_RESEND_IGMP);
> +		resend_igmp = BOND_DEFAULT_RESEND_IGMP;
> +	}
>  	/* reset values for TLB/ALB */
>  	if ((bond_mode == BOND_MODE_TLB) ||
>  	    (bond_mode == BOND_MODE_ALB)) {
> @@ -5063,6 +5121,7 @@ static int bond_check_params(struct bond_params *params)
>  	params->fail_over_mac = fail_over_mac_value;
>  	params->tx_queues = tx_queues;
>  	params->all_slaves_active = all_slaves_active;
> +	params->resend_igmp = resend_igmp;
>  
>  	if (primary) {
>  		strncpy(params->primary, primary, IFNAMSIZ);
> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
> index c6fdd85..c49bdb0 100644
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -136,6 +136,7 @@ struct bond_params {
>  	__be32 arp_targets[BOND_MAX_ARP_TARGETS];
>  	int tx_queues;
>  	int all_slaves_active;
> +	int resend_igmp;
>  };
>  
>  struct bond_parm_tbl {
> @@ -198,6 +199,7 @@ struct bonding {
>  	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
>  	rwlock_t lock;
>  	rwlock_t curr_slave_lock;
> +	s8       igmp_retrans;
>  	s8       kill_timers;
>  	s8	 send_grat_arp;
>  	s8	 send_unsol_na;
> @@ -223,6 +225,7 @@ struct bonding {
>  	struct   delayed_work arp_work;
>  	struct   delayed_work alb_work;
>  	struct   delayed_work ad_work;
> +	struct   delayed_work mcast_work;
>  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
>  	struct   in6_addr master_ipv6;
>  #endif
> diff --git a/include/linux/if_bonding.h b/include/linux/if_bonding.h
> index 2c79943..d2f78b7 100644
> --- a/include/linux/if_bonding.h
> +++ b/include/linux/if_bonding.h
> @@ -84,6 +84,9 @@
>  #define BOND_DEFAULT_MAX_BONDS  1   /* Default maximum number of devices to support */
>  
>  #define BOND_DEFAULT_TX_QUEUES 16   /* Default number of tx queues per device */
> +
> +#define BOND_DEFAULT_RESEND_IGMP	1 /* Default number of IGMP membership reports
> +					     to resend on link failure. */
>  /* hashing types */
>  #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
>  #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */

-- 
Flavio
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html