lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 03 Jun 2015 16:03:48 +0200
From:	Hannes Frederic Sowa <hannes@...essinduktion.org>
To:	Andy Gospodarek <gospo@...ulusnetworks.com>
Cc:	netdev@...r.kernel.org, davem@...emloft.net,
	ddutt@...ulusnetworks.com
Subject: Re: [PATCH net-next] net: change fib behavior based on interface
 link status

On Di, 2015-06-02 at 23:07 -0400, Andy Gospodarek wrote:
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 6f5f71f..5bd953c 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2986,6 +2986,7 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
>  bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
>  
>  extern int		netdev_budget;
> +extern int		kill_routes_on_linkdown;

Would it make sense to make it per netns?

>  
>  /* Called by rtnetlink.c:rtnl_unlock() */
>  void netdev_run_todo(void);
> diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
> index 6d67383..4fbfda5 100644
> --- a/include/net/fib_rules.h
> +++ b/include/net/fib_rules.h
> @@ -37,6 +37,7 @@ struct fib_lookup_arg {
>  	struct fib_rule		*rule;
>  	int			flags;
>  #define FIB_LOOKUP_NOREF	1
> +#define FIB_LOOKUP_ALLOWDEAD	2
>  };
>  
>  struct fib_rules_ops {
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 54271ed..efb195b 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -250,6 +250,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id);
>  struct fib_table *fib_get_table(struct net *net, u32 id);
>  
>  int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res);
> +int fib_lookup_flags(struct net *n, struct flowi4 *flp, struct fib_result *res, int flags);
>  
>  static inline int fib_lookup(struct net *net, struct flowi4 *flp,
>  			     struct fib_result *res)
> diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
> index 17fb02f..bc264d4 100644
> --- a/include/uapi/linux/rtnetlink.h
> +++ b/include/uapi/linux/rtnetlink.h
> @@ -338,6 +338,7 @@ struct rtnexthop {
>  #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
>  #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
>  #define RTNH_F_OFFLOAD		8	/* offloaded route */
> +#define RTNH_F_DEAD_LINKDOWN	16	/* Route dead due to carrier down */
>  
>  /* Macros to handle hexthops */
>  

-- >8 --

> diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
> index 0956373..b6632dc 100644
> --- a/include/uapi/linux/sysctl.h
> +++ b/include/uapi/linux/sysctl.h
> @@ -276,6 +276,7 @@ enum
>  	NET_CORE_AEVENT_ETIME=20,
>  	NET_CORE_AEVENT_RSEQTH=21,
>  	NET_CORE_WARNINGS=22,
> +	NET_CORE_KILL_ROUTES_ON_LINKDOWN=23,
>  };
>  
>  /* /proc/sys/net/ethernet */
> diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
> index 7e7746a..f319116 100644
> --- a/kernel/sysctl_binary.c
> +++ b/kernel/sysctl_binary.c
> @@ -196,6 +196,7 @@ static const struct bin_table bin_net_core_table[] = {
>  	{ CTL_INT,	NET_CORE_AEVENT_ETIME,	"xfrm_aevent_etime" },
>  	{ CTL_INT,	NET_CORE_AEVENT_RSEQTH,	"xfrm_aevent_rseqth" },
>  	{ CTL_INT,	NET_CORE_WARNINGS,	"warnings" },
> +	{ CTL_INT,	NET_CORE_KILL_ROUTES_ON_LINKDOWN,	"kill_routes_on_linkdown" },
>  	{},
>  };

-- >8 --

I don't think we need this hunk any more as we don't update binary
sysctls anymore.

>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 0602e91..50dc19c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -3167,6 +3167,8 @@ EXPORT_SYMBOL(netdev_max_backlog);
>  int netdev_tstamp_prequeue __read_mostly = 1;
>  int netdev_budget __read_mostly = 300;
>  int weight_p __read_mostly = 64;            /* old backlog weight */
> +int kill_routes_on_linkdown = 0;
> +EXPORT_SYMBOL(kill_routes_on_linkdown);
>  
>  /* Called with irq disabled */
>  static inline void ____napi_schedule(struct softnet_data *sd,
> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
> index 95b6139..fef1804 100644
> --- a/net/core/sysctl_net_core.c
> +++ b/net/core/sysctl_net_core.c
> @@ -392,6 +392,13 @@ static struct ctl_table net_core_table[] = {
>  		.mode		= 0644,
>  		.proc_handler	= proc_dointvec
>  	},
> +	{
> +		.procname	= "kill_routes_on_linkdown",
> +		.data		= &kill_routes_on_linkdown,
> +		.maxlen		= sizeof(int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_dointvec
> +	},
>  	{ }
>  };
>  
> diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
> index 872494e..94348c7 100644
> --- a/net/ipv4/fib_frontend.c
> +++ b/net/ipv4/fib_frontend.c
> @@ -1107,6 +1107,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
>  	struct in_device *in_dev;
>  	struct net *net = dev_net(dev);
> +	unsigned flags;
>  
>  	if (event == NETDEV_UNREGISTER) {
>  		fib_disable_ip(dev, 2);
> @@ -1130,10 +1131,17 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
>  		rt_cache_flush(net);
>  		break;
>  	case NETDEV_DOWN:
> -		fib_disable_ip(dev, 0);
> +		fib_disable_ip(dev, 1);

Shouldn't this depend on the sysctl?

>  		break;
> -	case NETDEV_CHANGEMTU:
>  	case NETDEV_CHANGE:
> +		if (kill_routes_on_linkdown) {
> +			flags = dev_get_flags(dev);
> +			if (flags & (IFF_RUNNING|IFF_LOWER_UP))
> +				fib_sync_up(dev);
> +			else
> +				fib_sync_down_dev(dev, 0);
> +		}
> +	case NETDEV_CHANGEMTU:
>  		rt_cache_flush(net);
>  		break;
>  	}
> diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
> index 5615198..d135ec9 100644
> --- a/net/ipv4/fib_rules.c
> +++ b/net/ipv4/fib_rules.c
> @@ -49,9 +49,14 @@ struct fib4_rule {
>  
>  int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
>  {
> +	return fib_lookup_flags(net, flp, res, 0);
> +}
>
> +int fib_lookup_flags(struct net *net, struct flowi4 *flp, struct fib_result *res, int flags)
> +{
>  	struct fib_lookup_arg arg = {
>  		.result = res,
> -		.flags = FIB_LOOKUP_NOREF,
> +		.flags = FIB_LOOKUP_NOREF | flags,
>  	};
>  	int err;

Can fib_lookup take the flags argument always without adding a new
wrapper?
 
> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
> index 28ec3c1..c0874ee 100644
> --- a/net/ipv4/fib_semantics.c
> +++ b/net/ipv4/fib_semantics.c
> @@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
>  #ifdef CONFIG_IP_ROUTE_CLASSID
>  		    nh->nh_tclassid != onh->nh_tclassid ||
>  #endif
> -		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
> +		    ((nh->nh_flags ^ onh->nh_flags) & ~(RTNH_F_DEAD|RTNH_F_DEAD_LINKDOWN)))
>  			return -1;
>  		onh++;
>  	} endfor_nexthops(fi);
> @@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
>  		    nfi->fib_type == fi->fib_type &&
>  		    memcmp(nfi->fib_metrics, fi->fib_metrics,
>  			   sizeof(u32) * RTAX_MAX) == 0 &&
> -		    ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
> +		    ((nfi->fib_flags ^ fi->fib_flags) & ~(RTNH_F_DEAD|RTNH_F_DEAD_LINKDOWN)) == 0 &&
>  		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
>  			return fi;
>  	}
> @@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
>  				return -ENODEV;
>  			if (!(dev->flags & IFF_UP))
>  				return -ENETDOWN;
> +			if (!netif_carrier_ok(dev) && kill_routes_on_linkdown)
> +				nh->nh_flags |= RTNH_F_DEAD;
>  			nh->nh_dev = dev;
>  			dev_hold(dev);
>  			nh->nh_scope = RT_SCOPE_LINK;
> @@ -621,7 +623,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
>  			/* It is not necessary, but requires a bit of thinking */
>  			if (fl4.flowi4_scope < RT_SCOPE_LINK)
>  				fl4.flowi4_scope = RT_SCOPE_LINK;
> -			err = fib_lookup(net, &fl4, &res);
> +			err = fib_lookup_flags(net, &fl4, &res, FIB_LOOKUP_ALLOWDEAD);
>  			if (err) {
>  				rcu_read_unlock();
>  				return err;
> @@ -636,6 +638,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
>  		if (!dev)
>  			goto out;
>  		dev_hold(dev);
> +		if (!netif_carrier_ok(dev) && kill_routes_on_linkdown)
> +			nh->nh_flags |= RTNH_F_DEAD;
>  		err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
>  	} else {
>  		struct in_device *in_dev;
> @@ -654,6 +658,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
>  		nh->nh_dev = in_dev->dev;
>  		dev_hold(nh->nh_dev);
>  		nh->nh_scope = RT_SCOPE_HOST;
> +		if (!netif_carrier_ok(nh->nh_dev) && kill_routes_on_linkdown)
> +			nh->nh_flags |= RTNH_F_DEAD;
>  		err = 0;
>  	}
>  out:
> @@ -760,6 +766,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
>  	struct fib_info *ofi;
>  	int nhs = 1;
>  	struct net *net = cfg->fc_nlinfo.nl_net;
> +	int dead;
>  
>  	if (cfg->fc_type > RTN_MAX)
>  		goto err_inval;
> @@ -920,11 +927,18 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
>  		if (!nh->nh_dev)
>  			goto failure;
>  	} else {
> +		dead = 0;
>  		change_nexthops(fi) {
>  			err = fib_check_nh(cfg, fi, nexthop_nh);
>  			if (err != 0)
>  				goto failure;
> +			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
> +				dead++;
>  		} endfor_nexthops(fi)
> +		if ((dead == fi->fib_nhs)) {
> +			fi->fib_flags |= RTNH_F_DEAD;
> +			fi->fib_flags |= RTNH_F_DEAD_LINKDOWN;
> +		}
>  	}
>  
>  	if (fi->fib_prefsrc) {
> @@ -1097,6 +1111,8 @@ int fib_sync_down_addr(struct net *net, __be32 local)
>  			continue;
>  		if (fi->fib_prefsrc == local) {
>  			fi->fib_flags |= RTNH_F_DEAD;
> +			/* Addr is gone, more serious than a linkdown */
> +			fi->fib_flags &= ~RTNH_F_DEAD_LINKDOWN;
>  			ret++;
>  		}
>  	}
> @@ -1112,7 +1128,7 @@ int fib_sync_down_dev(struct net_device *dev, int force)
>  	struct hlist_head *head = &fib_info_devhash[hash];
>  	struct fib_nh *nh;
>  
> -	if (force)
> +	if (force > 1)
>  		scope = -1;

Why was this changed? I think force can be made a boolean.

>  
>  	hlist_for_each_entry(nh, head, nh_hash) {
> @@ -1147,6 +1163,11 @@ int fib_sync_down_dev(struct net_device *dev, int force)
>  		} endfor_nexthops(fi)
>  		if (dead == fi->fib_nhs) {
>  			fi->fib_flags |= RTNH_F_DEAD;
> +			/* force marks route down due to admin down and device removal. */
> +			if (!force)
> +				fi->fib_flags |= RTNH_F_DEAD_LINKDOWN;
> +			else
> +				fi->fib_flags &= ~RTNH_F_DEAD_LINKDOWN;
>  			ret++;
>  		}
>  	}
> @@ -1223,10 +1244,12 @@ int fib_sync_up(struct net_device *dev)
>  	struct hlist_head *head;
>  	struct fib_nh *nh;
>  	int ret;
> +	int link_up;
>  
>  	if (!(dev->flags & IFF_UP))
>  		return 0;
>  
> +	link_up = netif_carrier_ok(dev);
>  	prev_fi = NULL;
>  	hash = fib_devindex_hashfn(dev->ifindex);
>  	head = &fib_info_devhash[hash];
> @@ -1253,16 +1276,27 @@ int fib_sync_up(struct net_device *dev)
>  			if (nexthop_nh->nh_dev != dev ||
>  			    !__in_dev_get_rtnl(dev))
>  				continue;
> -			alive++;
> +			if (link_up) {
> +				/* Link is up, so mark NH as alive */
> +				nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
> +				alive++;
> +			} else
> +				nexthop_nh->nh_flags |= RTNH_F_DEAD;
> +
>  			spin_lock_bh(&fib_multipath_lock);
>  			nexthop_nh->nh_power = 0;
> -			nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
>  			spin_unlock_bh(&fib_multipath_lock);
>  		} endfor_nexthops(fi)
>  
>  		if (alive > 0) {
> +			/* Some NHs are alive, unmark the route as dead */
> +			fi->fib_flags &= ~RTNH_F_DEAD_LINKDOWN;
>  			fi->fib_flags &= ~RTNH_F_DEAD;
>  			ret++;
> +		} else {
> +			/* No NHs are alive, mark the route as dead */
> +			fi->fib_flags |= RTNH_F_DEAD_LINKDOWN;
> +			fi->fib_flags |= RTNH_F_DEAD;
>  		}
>  	}
>  
> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
> index 01bce15..eedf287 100644
> --- a/net/ipv4/fib_trie.c
> +++ b/net/ipv4/fib_trie.c
> @@ -1401,12 +1401,18 @@ found:
>  #endif
>  			return err;
>  		}
> -		if (fi->fib_flags & RTNH_F_DEAD)
> -			continue;
> +		if (!(fib_flags & FIB_LOOKUP_ALLOWDEAD)) {
> +			/* if route is dead and link is down, keep looking  */
> +			if ((fi->fib_flags & RTNH_F_DEAD) &&
> +			    (fi->fib_flags & RTNH_F_DEAD_LINKDOWN))
> +				continue;
> +		}
>  		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
>  			const struct fib_nh *nh = &fi->fib_nh[nhsel];
>  
> -			if (nh->nh_flags & RTNH_F_DEAD)
> +			/* allow next-hop to be added if link is down */
> +			if ((nh->nh_flags & RTNH_F_DEAD) &&
> +			    !(fi->fib_flags & RTNH_F_DEAD_LINKDOWN))
>  				continue;
>  			if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
>  				continue;
> @@ -1829,7 +1835,12 @@ int fib_table_flush(struct fib_table *tb)
>  		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
>  			struct fib_info *fi = fa->fa_info;
>  
> -			if (!fi || !(fi->fib_flags & RTNH_F_DEAD)) {
> +			/* DEAD and DEAD_LINKDOWN will not both be set
> +			 * with IFF_UP is cleared, so do not flush
> +			 * entries with only DEAD set
> +			 */
> +			if (!fi || !((fi->fib_flags & RTNH_F_DEAD) &&
> +			    !(fi->fib_flags & RTNH_F_DEAD_LINKDOWN))) {
>  				slen = fa->fa_slen;
>  				continue;
>  			}

Otherwise looks good, thanks!

Bye,
Hannes


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ