lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <55E71FCA.1060006@cumulusnetworks.com>
Date:	Wed, 02 Sep 2015 09:11:54 -0700
From:	roopa <roopa@...ulusnetworks.com>
To:	davem@...emloft.net
CC:	Mazziesaccount@...il.com, hannes@...essinduktion.org,
	kuznet@....inr.ac.ru, jmorris@...ei.org, yoshfuji@...ux-ipv6.org,
	netdev@...r.kernel.org
Subject: Re: [PATCH net-next] ipv6: fix multipath route replace error recovery

On 9/1/15, 10:54 PM, Roopa Prabhu wrote:
> From: Roopa Prabhu <roopa@...ulusnetworks.com>
>
> Problem:
> The ecmp route replace support for ipv6 in the kernel, deletes the
> existing ecmp route too early, ie when it installs the first nexthop.
> If there is an error in installing the subsequent nexthops, its too late
> to recover the already deleted existing route
>
> This patch fixes the problem with the following:
> a) Changes the existing multipath route add code to a two stage process:
>    build rt6_infos + insert them
> 	ip6_route_add rt6_info creation code is moved into
> 	ip6_route_info_create.
> b) This ensures that all errors are caught during building rt6_infos
>    and we fail early
> c) Separates multipath add and del code. Because add needs the special
>    two stage mode in a) and delete essentially does not care.
> d) In any event if the code fails during inserting a route again, a
>    warning is printed (This should be unlikely)
>
> Before the patch:
> $ip -6 route show
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
>
> /* Try replacing the route with a duplicate nexthop */
> $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
> fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
> swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
> RTNETLINK answers: File exists
>
> $ip -6 route show
> /* previously added ecmp route 3000:1000:1000:1000::2 dissappears from
>   * kernel */
>
> After the patch:
> $ip -6 route show
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
>
> /* Try replacing the route with a duplicate nexthop */
> $ip -6 route change 3000:1000:1000:1000::2/128 nexthop via
> fe80::202:ff:fe00:b dev swp49s0 nexthop via fe80::202:ff:fe00:d dev
> swp49s1 nexthop via fe80::202:ff:fe00:d dev swp49s1
> RTNETLINK answers: File exists
>
> $ip -6 route show
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:b dev swp49s0 metric 1024
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:d dev swp49s1 metric 1024
> 3000:1000:1000:1000::2 via fe80::202:ff:fe00:f dev swp49s2 metric 1024
>
> Fixes: 4a287eba2de3 ("IPv6 routing, NLM_F_* flag support: REPLACE and EXCL flags support, warn about missing CREATE flag")
> Signed-off-by: Roopa Prabhu <roopa@...ulusnetworks.com>
> ---
> This bug is present in 4.1 kernel and 4.2 too.
> Since 4.2 is out or almost out, I am submitting the patch against net-next.
> I can respin against net if needed. The part of the patch that I would appreciate
> more eyes on is the cleanup of the rt6_infos in ip_route_multipath_add. And
> I have tried to keep the changes local to route.c closer to the netlink
> message handling. Most of the code changes are moving code into separate
> functions.
>
>   net/ipv6/route.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++-------
>   1 file changed, 179 insertions(+), 26 deletions(-)
>
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index f45cac6..b1b8c96 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1748,7 +1748,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
>   	return -EINVAL;
>   }
>   
> -int ip6_route_add(struct fib6_config *cfg)
> +int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
>   {
>   	int err;
>   	struct net *net = cfg->fc_nlinfo.nl_net;
> @@ -1756,7 +1756,6 @@ int ip6_route_add(struct fib6_config *cfg)
>   	struct net_device *dev = NULL;
>   	struct inet6_dev *idev = NULL;
>   	struct fib6_table *table;
> -	struct mx6_config mxc = { .mx = NULL, };
>   	int addr_type;
>   
>   	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
> @@ -1981,6 +1980,32 @@ install_route:
>   
>   	cfg->fc_nlinfo.nl_net = dev_net(dev);
>   
> +	*rt_ret = rt;
> +
> +	return 0;
> +out:
> +	if (dev)
> +		dev_put(dev);
> +	if (idev)
> +		in6_dev_put(idev);
> +	if (rt)
> +		dst_free(&rt->dst);
> +
> +	*rt_ret = NULL;
> +
> +	return err;
> +}
> +
> +int ip6_route_add(struct fib6_config *cfg)
> +{
> +	struct mx6_config mxc = { .mx = NULL, };
> +	struct rt6_info *rt = NULL;
> +	int err;
> +
> +	err = ip6_route_info_create(cfg, &rt);
> +	if (err)
> +		goto out;
> +
>   	err = ip6_convert_metrics(&mxc, cfg);
>   	if (err)
>   		goto out;
> @@ -1988,14 +2013,12 @@ install_route:
>   	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
>   
>   	kfree(mxc.mx);
> +
>   	return err;
>   out:
> -	if (dev)
> -		dev_put(dev);
> -	if (idev)
> -		in6_dev_put(idev);
>   	if (rt)
>   		dst_free(&rt->dst);
> +
>   	return err;
>   }
>   
> @@ -2776,19 +2799,79 @@ errout:
>   	return err;
>   }
>   
> -static int ip6_route_multipath(struct fib6_config *cfg, int add)
> +struct rt6_nh {
> +	struct rt6_info *rt6_info;
> +	struct fib6_config r_cfg;
> +	struct mx6_config mxc;
> +	struct list_head next;
> +};
> +
> +static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
> +{
> +	struct rt6_nh *nh;
> +	char *errstr = "IPV6: unexpected error replacing route";
> +
> +	list_for_each_entry(nh, rt6_nh_list, next) {
> +		printk(KERN_WARNING "%s: %pI6 nexthop %pI6 ifi %d\n",
> +		       errstr, &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
> +		       nh->r_cfg.fc_ifindex);
> +	}
> +}
> +
> +static int ip6_route_info_append(struct list_head *rt6_nh_list,
> +				 struct rt6_info *rt, struct fib6_config *r_cfg)
> +{
> +	struct rt6_nh *nh;
> +	struct rt6_info *rtnh;
> +	int err = -EEXIST;
> +
> +	list_for_each_entry(nh, rt6_nh_list, next) {
> +		/* check if rt6_info already exists */
> +		rtnh = nh->rt6_info;
> +
> +		if (rtnh->dst.dev == rt->dst.dev &&
> +		    rtnh->rt6i_idev == rt->rt6i_idev &&
> +		    ipv6_addr_equal(&rtnh->rt6i_gateway,
> +				    &rt->rt6i_gateway))
> +			return err;
> +	}
> +
> +	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
> +	if (!nh)
> +		return -ENOMEM;
> +	nh->rt6_info = rt;
> +	err = ip6_convert_metrics(&nh->mxc, r_cfg);
> +	if (err) {
> +		kfree(nh);
> +		return err;
> +	}
> +	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
> +	list_add_tail(&nh->next, rt6_nh_list);
> +
> +	return 0;
> +}
> +
> +static int ip6_route_multipath_add(struct fib6_config *cfg)
>   {
>   	struct fib6_config r_cfg;
>   	struct rtnexthop *rtnh;
> +	struct rt6_info *rt;
> +	struct rt6_nh *err_nh;
> +	struct rt6_nh *nh, *nh_safe;
>   	int remaining;
>   	int attrlen;
> -	int err = 0, last_err = 0;
> +	int err = 1;
> +	int nhn = 0;
> +	int replace = (cfg->fc_nlinfo.nlh &&
> +		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
> +	LIST_HEAD(rt6_nh_list);
>   
>   	remaining = cfg->fc_mp_len;
> -beginning:
>   	rtnh = (struct rtnexthop *)cfg->fc_mp;
>   
> -	/* Parse a Multipath Entry */
> +	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
> +	 * rt6_info structs per nexthop
> +	 */
>   	while (rtnh_ok(rtnh, remaining)) {
>   		memcpy(&r_cfg, cfg, sizeof(*cfg));
>   		if (rtnh->rtnh_ifindex)
> @@ -2808,22 +2891,30 @@ beginning:
>   			if (nla)
>   				r_cfg.fc_encap_type = nla_get_u16(nla);
>   		}
> -		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
> +
> +		err = ip6_route_info_create(&r_cfg, &rt);
> +		if (err)
> +			goto cleanup;
> +
> +		err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
> +		if (err)
realized that rt needs to be freed here. Will spin v2 soon...
> +			goto cleanup;
> +
> +		rtnh = rtnh_next(rtnh, &remaining);
> +	}
> +
> +	err_nh = NULL;
> +	list_for_each_entry(nh, &rt6_nh_list, next) {
> +		err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
> +		/* nh->rt6_info is used or freed at this point, reset to NULL*/
> +		nh->rt6_info = NULL;
>   		if (err) {
> -			last_err = err;
> -			/* If we are trying to remove a route, do not stop the
> -			 * loop when ip6_route_del() fails (because next hop is
> -			 * already gone), we should try to remove all next hops.
> -			 */
> -			if (add) {
> -				/* If add fails, we should try to delete all
> -				 * next hops that have been already added.
> -				 */
> -				add = 0;
> -				remaining = cfg->fc_mp_len - remaining;
> -				goto beginning;
> -			}
> +			if (replace && nhn)
> +				ip6_print_replace_route_err(&rt6_nh_list);
> +			err_nh = nh;
> +			goto add_errout;
>   		}
> +
>   		/* Because each route is added like a single route we remove
>   		 * these flags after the first nexthop: if there is a collision,
>   		 * we have already failed to add the first nexthop:
> @@ -2833,6 +2924,68 @@ beginning:
>   		 */
>   		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
>   						     NLM_F_REPLACE);
> +		nhn++;
> +	}
> +
> +	goto cleanup;
> +
> +add_errout:
> +	/* Delete routes that were already added */
> +	list_for_each_entry(nh, &rt6_nh_list, next) {
> +		if (err_nh == nh)
> +			break;
> +		ip6_route_del(&nh->r_cfg);
> +	}
> +
> +cleanup:
> +	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
> +		if (nh->rt6_info) {
> +			struct rt6_info *r = nh->rt6_info;
> +
> +			if (r->rt6i_idev)
> +				in6_dev_put(r->rt6i_idev);
> +			dst_free(&r->dst);
> +		}
> +		if (nh->mxc.mx)
> +			kfree(nh->mxc.mx);
> +		list_del(&nh->next);
> +		kfree(nh);
> +	}
> +
> +	return err;
> +}
> +
> +static int ip6_route_multipath_del(struct fib6_config *cfg)
> +{
> +	struct fib6_config r_cfg;
> +	struct rtnexthop *rtnh;
> +	int remaining;
> +	int attrlen;
> +	int err = 1, last_err = 0;
> +
> +	remaining = cfg->fc_mp_len;
> +	rtnh = (struct rtnexthop *)cfg->fc_mp;
> +
> +	/* Parse a Multipath Entry */
> +	while (rtnh_ok(rtnh, remaining)) {
> +		memcpy(&r_cfg, cfg, sizeof(*cfg));
> +		if (rtnh->rtnh_ifindex)
> +			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
> +
> +		attrlen = rtnh_attrlen(rtnh);
> +		if (attrlen > 0) {
> +			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
> +
> +			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
> +			if (nla) {
> +				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
> +				r_cfg.fc_flags |= RTF_GATEWAY;
> +			}
> +		}
> +		err = ip6_route_del(&r_cfg);
> +		if (err)
> +			last_err = err;
> +
>   		rtnh = rtnh_next(rtnh, &remaining);
>   	}
>   
> @@ -2849,7 +3002,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
>   		return err;
>   
>   	if (cfg.fc_mp)
> -		return ip6_route_multipath(&cfg, 0);
> +		return ip6_route_multipath_del(&cfg);
>   	else
>   		return ip6_route_del(&cfg);
>   }
> @@ -2864,7 +3017,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
>   		return err;
>   
>   	if (cfg.fc_mp)
> -		return ip6_route_multipath(&cfg, 1);
> +		return ip6_route_multipath_add(&cfg);
>   	else
>   		return ip6_route_add(&cfg);
>   }

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ