lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180228184356.duh7cxr3db7buq6d@kafai-mbp.dhcp.thefacebook.com>
Date:   Wed, 28 Feb 2018 10:44:11 -0800
From:   Martin KaFai Lau <kafai@...com>
To:     David Ahern <dsahern@...il.com>
CC:     <netdev@...r.kernel.org>, <davem@...emloft.net>,
        <idosch@...sch.org>, <roopa@...ulusnetworks.com>,
        <eric.dumazet@...il.com>, <weiwan@...gle.com>,
        <yoshfuji@...ux-ipv6.org>
Subject: Re: [PATCH RFC net-next 18/20] net/ipv6: separate handling of FIB
 entries from dst based routes

On Sun, Feb 25, 2018 at 11:47:28AM -0800, David Ahern wrote:
> Signed-off-by: David Ahern <dsahern@...il.com>
> ---
>  include/net/ip6_fib.h   |   4 +-
>  include/net/ip6_route.h |   3 +-
>  net/ipv6/addrconf.c     |  31 ++++++---
>  net/ipv6/anycast.c      |   7 +-
>  net/ipv6/ip6_fib.c      |  50 +++++++++------
>  net/ipv6/ip6_output.c   |   3 +-
>  net/ipv6/ndisc.c        |   6 +-
>  net/ipv6/route.c        | 167 +++++++++++++++++-------------------------------
>  8 files changed, 121 insertions(+), 150 deletions(-)
> 
> diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
> index 70978deac538..ff16e3d571a2 100644
> --- a/include/net/ip6_fib.h
> +++ b/include/net/ip6_fib.h
> @@ -315,9 +315,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
>  
>  	if (rt->rt6i_flags & RTF_PCPU ||
>  	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
> -		rt = rt->from;
> -
> -	rt6_get_cookie_safe(rt, &cookie);
> +		rt6_get_cookie_safe(rt->from, &cookie);
>  
>  	return cookie;
>  }
> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
> index 24c78fb6ac36..fcda09a58193 100644
> --- a/include/net/ip6_route.h
> +++ b/include/net/ip6_route.h
> @@ -113,8 +113,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
>  				      unsigned int prefs,
>  				      struct in6_addr *saddr)
>  {
> -	struct inet6_dev *idev =
> -			rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
> +	struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL;
>  	int err = 0;
>  
>  	if (rt && rt->rt6i_prefsrc.plen)
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 2a032b932922..4dd7b4e9de4c 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -927,7 +927,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
>  		pr_warn("Freeing alive inet6 address %p\n", ifp);
>  		return;
>  	}
> -	ip6_rt_put(ifp->rt);
> +	fib6_info_release(ifp->rt);
>  
>  	kfree_rcu(ifp, rcu);
>  }
> @@ -1080,6 +1080,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
>  	ifa->cstamp = ifa->tstamp = jiffies;
>  	ifa->tokenized = false;
>  
> +	fib6_info_hold(rt);
Did fib6_info_alloc() already bump the refcnt?  Why
another fib6_info_hold() is needed?  Comment would be
useful here.

>  	ifa->rt = rt;
>  
>  	ifa->idev = idev;
> @@ -1114,8 +1115,12 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
>  	inet6addr_notifier_call_chain(NETDEV_UP, ifa);
>  out:
>  	if (unlikely(err < 0)) {
> -		if (rt)
> -			ip6_rt_put(rt);
> +		/* one release for the hold taken when rt is set in ifa
> +		 * and a second release for the hold taken on rt create
> +		 */
> +		fib6_info_release(rt);
> +		fib6_info_release(rt);
The extra release corresponds to the above fib6_info_hold()?

> +
>  		if (ifa) {
>  			if (ifa->idev)
>  				in6_dev_put(ifa->idev);
> @@ -1203,7 +1208,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
>  		else {
>  			if (!(rt->rt6i_flags & RTF_EXPIRES))
>  				fib6_set_expires(rt, expires);
> -			ip6_rt_put(rt);
> +			fib6_info_release(rt);
>  		}
>  	}
>  }
> @@ -2350,8 +2355,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
>  			continue;
>  		if ((rt->rt6i_flags & noflags) != 0)
>  			continue;
> -		if (!dst_hold_safe(&rt->dst))
> -			rt = NULL;
> +		fib6_info_hold(rt);
>  		break;
>  	}
>  out:
> @@ -2663,7 +2667,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
>  			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
>  					      dev, expires, flags, GFP_ATOMIC);
>  		}
> -		ip6_rt_put(rt);
> +		fib6_info_release(rt);
>  	}
>  
>  	/* Try to figure out our local address for this prefix */
> @@ -3330,9 +3334,14 @@ static int fixup_permanent_addr(struct net *net,
>  		spin_lock(&ifp->lock);
>  		prev = ifp->rt;
>  		ifp->rt = rt;
> +		fib6_info_hold(rt);
>  		spin_unlock(&ifp->lock);
>  
> -		ip6_rt_put(prev);
> +		/* one release for the hold taken when rt is set in ifa
> +		 * and a second release for the hold taken on rt create
> +		 */
> +		fib6_info_release(prev);
> +		fib6_info_release(prev);
>  	}
>  
>  	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
> @@ -3706,6 +3715,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
>  
>  			rt = ifa->rt;
>  			ifa->rt = NULL;
> +			fib6_info_release(rt);
>  		} else {
>  			state = ifa->state;
>  			ifa->state = INET6_IFADDR_STATE_DEAD;
> @@ -5600,8 +5610,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
>  				ip6_del_rt(net, rt);
>  		}
>  		if (ifp->rt) {
> -			if (dst_hold_safe(&ifp->rt->dst))
> -				ip6_del_rt(net, ifp->rt);
> +			ip6_del_rt(net, ifp->rt);
> +			fib6_info_release(ifp->rt);
> +			ifp->rt = NULL;
>  		}
>  		rt_genid_bump_ipv6(net);
>  		break;
> diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
> index f719c980dfad..7a6cd048d095 100644
> --- a/net/ipv6/anycast.c
> +++ b/net/ipv6/anycast.c
> @@ -210,7 +210,7 @@ static void aca_put(struct ifacaddr6 *ac)
>  {
>  	if (refcount_dec_and_test(&ac->aca_refcnt)) {
>  		in6_dev_put(ac->aca_idev);
> -		dst_release(&ac->aca_rt->dst);
> +		fib6_info_release(ac->aca_rt);
>  		kfree(ac);
>  	}
>  }
> @@ -228,6 +228,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
>  	aca->aca_addr = *addr;
>  	in6_dev_hold(idev);
>  	aca->aca_idev = idev;
> +	fib6_info_hold(rt);
>  	aca->aca_rt = rt;
>  	aca->aca_users = 1;
>  	/* aca_tstamp should be updated upon changes */
> @@ -271,7 +272,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
>  	}
>  	aca = aca_alloc(rt, addr);
>  	if (!aca) {
> -		ip6_rt_put(rt);
> +		fib6_info_release(rt);
>  		err = -ENOMEM;
>  		goto out;
>  	}
> @@ -327,7 +328,6 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
>  	write_unlock_bh(&idev->lock);
>  	addrconf_leave_solict(idev, &aca->aca_addr);
>  
> -	dst_hold(&aca->aca_rt->dst);
>  	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
>  
>  	aca_put(aca);
> @@ -355,7 +355,6 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
>  
>  		addrconf_leave_solict(idev, &aca->aca_addr);
>  
> -		dst_hold(&aca->aca_rt->dst);
>  		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
>  
>  		aca_put(aca);
> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> index 6553550bd09b..25065472c51b 100644
> --- a/net/ipv6/ip6_fib.c
> +++ b/net/ipv6/ip6_fib.c
> @@ -695,7 +695,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
>  			/* clean up an intermediate node */
>  			if (!(fn->fn_flags & RTN_RTINFO)) {
>  				RCU_INIT_POINTER(fn->leaf, NULL);
> -				rt6_release(leaf);
> +				fib6_info_release(leaf);
>  			/* remove null_entry in the root node */
>  			} else if (fn->fn_flags & RTN_TL_ROOT &&
>  				   rcu_access_pointer(fn->leaf) ==
> @@ -879,12 +879,32 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
>  			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
>  				new_leaf = fib6_find_prefix(net, table, fn);
>  				atomic_inc(&new_leaf->rt6i_ref);
> +
>  				rcu_assign_pointer(fn->leaf, new_leaf);
> -				rt6_release(rt);
> +				fib6_info_release(rt);
>  			}
>  			fn = rcu_dereference_protected(fn->parent,
>  				    lockdep_is_held(&table->tb6_lock));
>  		}
> +
> +		if (rt->rt6i_pcpu) {
> +			int cpu;
> +
> +			/* release the reference to this fib entry from
> +			 * all of its cached pcpu routes
> +			 */
> +			for_each_possible_cpu(cpu) {
> +				struct rt6_info **ppcpu_rt;
> +				struct rt6_info *pcpu_rt;
> +
> +				ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu);
> +				pcpu_rt = *ppcpu_rt;
> +				if (pcpu_rt) {
> +					fib6_info_release(pcpu_rt->from);
> +					pcpu_rt->from = NULL;
> +				}
> +			}
> +		}
>  	}
>  }
>  
> @@ -1071,7 +1091,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
>  		fib6_purge_rt(iter, fn, info->nl_net);
>  		if (rcu_access_pointer(fn->rr_ptr) == iter)
>  			fn->rr_ptr = NULL;
> -		rt6_release(iter);
> +		fib6_info_release(iter);
>  
>  		if (nsiblings) {
>  			/* Replacing an ECMP route, remove all siblings */
> @@ -1087,7 +1107,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
>  					fib6_purge_rt(iter, fn, info->nl_net);
>  					if (rcu_access_pointer(fn->rr_ptr) == iter)
>  						fn->rr_ptr = NULL;
> -					rt6_release(iter);
> +					fib6_info_release(iter);
>  					nsiblings--;
>  					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
>  				} else {
> @@ -1155,9 +1175,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
>  	int replace_required = 0;
>  	int sernum = fib6_new_sernum(info->nl_net);
>  
> -	if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
> -		return -EINVAL;
> -
>  	if (info->nlh) {
>  		if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
>  			allow_create = 0;
> @@ -1272,7 +1289,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
>  			if (pn_leaf == rt) {
>  				pn_leaf = NULL;
>  				RCU_INIT_POINTER(pn->leaf, NULL);
> -				atomic_dec(&rt->rt6i_ref);
> +				fib6_info_release(rt);
>  			}
>  			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
>  				pn_leaf = fib6_find_prefix(info->nl_net, table,
> @@ -1284,7 +1301,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
>  					    info->nl_net->ipv6.fib6_null_entry;
>  				}
>  #endif
> -				atomic_inc(&pn_leaf->rt6i_ref);
> +				fib6_info_hold(pn_leaf);
>  				rcu_assign_pointer(pn->leaf, pn_leaf);
>  			}
>  		}
> @@ -1306,10 +1323,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
>  	     (fn->fn_flags & RTN_TL_ROOT &&
>  	      !rcu_access_pointer(fn->leaf))))
>  		fib6_repair_tree(info->nl_net, table, fn);
> -	/* Always release dst as dst->__refcnt is guaranteed
> -	 * to be taken before entering this function
> -	 */
> -	dst_release_immediate(&rt->dst);
>  	return err;
>  }
>  
> @@ -1609,7 +1622,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
>  				new_fn_leaf = net->ipv6.fib6_null_entry;
>  			}
>  #endif
> -			atomic_inc(&new_fn_leaf->rt6i_ref);
> +			fib6_info_hold(new_fn_leaf);
>  			rcu_assign_pointer(fn->leaf, new_fn_leaf);
>  			return pn;
>  		}
> @@ -1665,7 +1678,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
>  			return pn;
>  
>  		RCU_INIT_POINTER(pn->leaf, NULL);
> -		rt6_release(pn_leaf);
> +		fib6_info_release(pn_leaf);
>  		fn = pn;
>  	}
>  }
> @@ -1735,7 +1748,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
>  	call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
>  	if (!info->skip_notify)
>  		inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
> -	rt6_release(rt);
> +	fib6_info_release(rt);
>  }
>  
>  /* Need to own table->tb6_lock */
> @@ -2234,9 +2247,8 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
>  
>  	dev = rt->fib6_nh.nh_dev;
>  	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
> -		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
> -		   rt->dst.__use, rt->rt6i_flags,
> -		   dev ? dev->name : "");
> +		   rt->rt6i_metric, atomic_read(&rt->rt6i_ref), 0,
> +		   rt->rt6i_flags, dev ? dev->name : "");
>  	iter->w.leaf = NULL;
>  	return 0;
>  }
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 997c7f19ad62..ce728ff06514 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -969,7 +969,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
>  		if (!had_dst)
>  			*dst = ip6_route_output(net, sk, fl6);
>  		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
> -		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
> +		err = ip6_route_get_saddr(net, rt ? rt->from : NULL,
> +					  &fl6->daddr,
>  					  sk ? inet6_sk(sk)->srcprefs : 0,
>  					  &fl6->saddr);
>  		if (err)
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index 023da106b682..a921b9bb921f 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -1283,7 +1283,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
>  			ND_PRINTK(0, err,
>  				  "RA: %s got default router without neighbour\n",
>  				  __func__);
> -			ip6_rt_put(rt);
> +			fib6_info_release(rt);
>  			return;
>  		}
>  	}
> @@ -1313,7 +1313,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
>  			ND_PRINTK(0, err,
>  				  "RA: %s got default router without neighbour\n",
>  				  __func__);
> -			ip6_rt_put(rt);
> +			fib6_info_release(rt);
>  			return;
>  		}
>  		neigh->flags |= NTF_ROUTER;
> @@ -1502,7 +1502,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
>  		ND_PRINTK(2, warn, "RA: invalid RA options\n");
>  	}
>  out:
> -	ip6_rt_put(rt);
> +	fib6_info_release(rt);
>  	if (neigh)
>  		neigh_release(neigh);
>  }
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 19b91c60ee55..e2225588e31b 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -351,13 +351,11 @@ static void rt6_info_init(struct rt6_info *rt)
>  	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
>  	INIT_LIST_HEAD(&rt->rt6i_siblings);
>  	INIT_LIST_HEAD(&rt->rt6i_uncached);
> -	rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
>  }
>  
>  /* allocate dst with ip6_dst_ops */
> -static struct rt6_info *__ip6_dst_alloc(struct net *net,
> -					struct net_device *dev,
> -					int flags)
> +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
> +			       int flags)
>  {
>  	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
>  					1, DST_OBSOLETE_FORCE_CHK, flags);
> @@ -369,35 +367,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
>  
>  	return rt;
>  }
> -
> -struct rt6_info *ip6_dst_alloc(struct net *net,
> -			       struct net_device *dev,
> -			       int flags)
> -{
> -	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
> -
> -	if (rt) {
> -		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
> -		if (!rt->rt6i_pcpu) {
> -			dst_release_immediate(&rt->dst);
> -			return NULL;
> -		}
> -	}
> -
> -	return rt;
> -}
>  EXPORT_SYMBOL(ip6_dst_alloc);
>  
>  static void ip6_dst_destroy(struct dst_entry *dst)
>  {
>  	struct rt6_info *rt = (struct rt6_info *)dst;
> -	struct rt6_exception_bucket *bucket;
>  	struct rt6_info *from = rt->from;
>  	struct inet6_dev *idev;
> -	struct dst_metrics *m;
>  
>  	dst_destroy_metrics_generic(dst);
> -	free_percpu(rt->rt6i_pcpu);
>  	rt6_uncached_list_del(rt);
>  
>  	idev = rt->rt6i_idev;
> @@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
>  		rt->rt6i_idev = NULL;
>  		in6_dev_put(idev);
>  	}
> -	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
> -	if (bucket) {
> -		rt->rt6i_exception_bucket = NULL;
> -		kfree(bucket);
> -	}
> -
> -	m = rt->fib6_metrics;
> -	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
> -		kfree(m);
>  
>  	rt->from = NULL;
> -	dst_release(&from->dst);
> +	fib6_info_release(from);
>  }
>  
>  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
> @@ -889,7 +858,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
>  		else
>  			fib6_set_expires(rt, jiffies + HZ * lifetime);
>  
> -		ip6_rt_put(rt);
> +		fib6_info_release(rt);
>  	}
>  	return 0;
>  }
> @@ -1008,10 +977,8 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
>  
>  static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
>  {
> -	BUG_ON(from->from);
> -
>  	rt->rt6i_flags &= ~RTF_EXPIRES;
> -	dst_hold(&from->dst);
> +	fib6_info_hold(from);
>  	rt->from = from;
>  	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
>  	if (from->fib6_metrics != &dst_default_metrics) {
> @@ -1083,7 +1050,7 @@ static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
>  	struct rt6_info *nrt;
>  
>  	dev = ip6_rt_get_dev_rcu(rt);
> -	nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
> +	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
>  	if (nrt)
>  		ip6_rt_copy_init(nrt, rt);
>  
> @@ -1189,8 +1156,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt)
>  {
>  	struct nl_info info = {	.nl_net = net, };
>  
> -	/* Hold dst to account for the reference from the fib6 tree */
> -	dst_hold(&rt->dst);
>  	return __ip6_ins_rt(rt, &info, NULL);
>  }
>  
> @@ -1207,7 +1172,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
>  
>  	rcu_read_lock();
>  	dev = ip6_rt_get_dev_rcu(ort);
> -	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
> +	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
>  	rcu_read_unlock();
>  	if (!rt)
>  		return NULL;
> @@ -1242,7 +1207,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
>  
>  	rcu_read_lock();
>  	dev = ip6_rt_get_dev_rcu(rt);
> -	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
> +	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
>  	rcu_read_unlock();
>  	if (!pcpu_rt)
>  		return NULL;
> @@ -1303,7 +1268,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
>  	net = dev_net(rt6_ex->rt6i->dst.dev);
>  	rt6_ex->rt6i->rt6i_node = NULL;
>  	hlist_del_rcu(&rt6_ex->hlist);
> -	rt6_release(rt6_ex->rt6i);
> +	ip6_rt_put(rt6_ex->rt6i);
>  	kfree_rcu(rt6_ex, rcu);
>  	WARN_ON_ONCE(!bucket->depth);
>  	bucket->depth--;
> @@ -1868,17 +1833,11 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>  
>  		struct rt6_info *uncached_rt;
>  
> -		if (ip6_hold_safe(net, &f6i, true)) {
> -			dst_use_noref(&f6i->dst, jiffies);
> -		} else {
> -			rcu_read_unlock();
> -			uncached_rt = f6i;
> -			goto uncached_rt_out;
> -		}
> +		fib6_info_hold(f6i);
>  		rcu_read_unlock();
>  
>  		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
> -		dst_release(&rt->dst);
> +		fib6_info_release(f6i);
>  
>  		if (uncached_rt) {
>  			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
> @@ -1891,7 +1850,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>  			dst_hold(&uncached_rt->dst);
>  		}
>  
> -uncached_rt_out:
>  		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
>  		return uncached_rt;
>  
> @@ -1900,24 +1858,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>  
>  		struct rt6_info *pcpu_rt;
>  
> -		dst_use_noref(&f6i->dst, jiffies);
>  		local_bh_disable();
>  		pcpu_rt = rt6_get_pcpu_route(f6i);
>  
> -		if (!pcpu_rt) {
> -			/* atomic_inc_not_zero() is needed when using rcu */
> -			if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
> -				/* No dst_hold() on rt is needed because grabbing
> -				 * rt->rt6i_ref makes sure rt can't be released.
> -				 */
> -				pcpu_rt = rt6_make_pcpu_route(net, f6i);
> -				rt6_release(f6i);
> -			} else {
> -				/* rt is already removed from tree */
> -				pcpu_rt = net->ipv6.ip6_null_entry;
> -				dst_hold(&pcpu_rt->dst);
> -			}
> -		}
> +		if (!pcpu_rt)
> +			pcpu_rt = rt6_make_pcpu_route(net, f6i);
> +
>  		local_bh_enable();
>  		rcu_read_unlock();
>  		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
> @@ -2088,11 +2034,26 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
>   *	Destination cache support functions
>   */
>  
> +static bool fib6_check(struct rt6_info *f6i, u32 cookie)
> +{
> +	u32 rt_cookie = 0;
> +
> +	if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
> +	     rt_cookie != cookie)
> +		return false;
> +
> +	if (fib6_check_expired(f6i))
> +		return false;
> +
> +	return true;
> +}
> +
>  static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
>  {
>  	u32 rt_cookie = 0;
>  
> -	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
> +	if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
> +	    rt_cookie != cookie)
>  		return NULL;
>  
>  	if (rt6_check_expired(rt))
> @@ -2105,7 +2066,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
>  {
>  	if (!__rt6_check_expired(rt) &&
>  	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
> -	    rt6_check(rt->from, cookie))
> +	    fib6_check(rt->from, cookie))
>  		return &rt->dst;
>  	else
>  		return NULL;
> @@ -2136,7 +2097,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
>  	if (rt) {
>  		if (rt->rt6i_flags & RTF_CACHE) {
>  			if (rt6_check_expired(rt)) {
> -				ip6_del_rt(dev_net(dst->dev), rt);
> +				rt6_remove_exception_rt(rt);
>  				dst = NULL;
>  			}
>  		} else {
> @@ -2157,12 +2118,12 @@ static void ip6_link_failure(struct sk_buff *skb)
>  	if (rt) {
>  		if (rt->rt6i_flags & RTF_CACHE) {
>  			if (dst_hold_safe(&rt->dst))
> -				ip6_del_rt(dev_net(rt->dst.dev), rt);
> -		} else {
> +				rt6_remove_exception_rt(rt);
> +		} else if (rt->from) {
>  			struct fib6_node *fn;
>  
>  			rcu_read_lock();
> -			fn = rcu_dereference(rt->rt6i_node);
> +			fn = rcu_dereference(rt->from->rt6i_node);
>  			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
>  				fn->fn_sernum = -1;
>  			rcu_read_unlock();
> @@ -2752,13 +2713,13 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
>  	if (!table)
>  		goto out;
>  
> -	rt = ip6_dst_alloc(net, NULL,
> -			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
> -
> -	if (!rt) {
> -		err = -ENOMEM;
> +	err = -ENOMEM;
> +	rt = fib6_info_alloc(gfp_flags);
> +	if (!rt)
>  		goto out;
> -	}
> +
> +	if (cfg->fc_flags & RTF_ADDRCONF)
> +		rt->dst_nocount = true;
>  
>  	err = ip6_convert_metrics(net, rt, cfg);
>  	if (err < 0)
> @@ -2915,7 +2876,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
>  	    !netif_carrier_ok(dev))
>  		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
>  	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
> -	rt->fib6_nh.nh_dev = rt->dst.dev = dev;
> +	rt->fib6_nh.nh_dev = dev;
>  	rt->rt6i_idev = idev;
>  	rt->rt6i_table = table;
>  
> @@ -2927,9 +2888,8 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
>  		dev_put(dev);
>  	if (idev)
>  		in6_dev_put(idev);
> -	if (rt)
> -		dst_release_immediate(&rt->dst);
>  
> +	fib6_info_release(rt);
>  	return ERR_PTR(err);
>  }
>  
> @@ -2944,6 +2904,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
>  		return PTR_ERR(rt);
>  
>  	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
> +	fib6_info_release(rt);
>  
>  	return err;
>  }
> @@ -2965,7 +2926,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
>  	spin_unlock_bh(&table->tb6_lock);
>  
>  out:
> -	ip6_rt_put(rt);
> +	fib6_info_release(rt);
>  	return err;
>  }
>  
> @@ -3019,7 +2980,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
>  out_unlock:
>  	spin_unlock_bh(&table->tb6_lock);
>  out_put:
> -	ip6_rt_put(rt);
> +	fib6_info_release(rt);
>  
>  	if (skb) {
>  		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
> @@ -3090,8 +3051,7 @@ static int ip6_route_del(struct fib6_config *cfg,
>  				continue;
>  			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
>  				continue;
> -			if (!dst_hold_safe(&rt->dst))
> -				break;
> +			fib6_info_hold(rt);
>  			rcu_read_unlock();
>  
>  			/* if gateway was specified only delete the one hop */
> @@ -3359,12 +3319,9 @@ static void __rt6_purge_dflt_routers(struct net *net,
>  	for_each_fib6_node_rt_rcu(&table->tb6_root) {
>  		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
>  		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
> -			if (dst_hold_safe(&rt->dst)) {
> -				rcu_read_unlock();
> -				ip6_del_rt(net, rt);
> -			} else {
> -				rcu_read_unlock();
> -			}
> +			fib6_info_hold(rt);
> +			rcu_read_unlock();
> +			ip6_del_rt(net, rt);
>  			goto restart;
>  		}
>  	}
> @@ -3514,7 +3471,7 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
>  	struct net_device *dev = idev->dev;
>  	struct rt6_info *rt;
>  
> -	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
> +	rt = fib6_info_alloc(gfp_flags);
>  	if (!rt)
>  		return ERR_PTR(-ENOMEM);
>  
> @@ -3535,8 +3492,8 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
>  	}
>  
>  	rt->fib6_nh.nh_gw = *addr;
> +	dev_hold(dev);
>  	rt->fib6_nh.nh_dev = dev;
> -	rt->rt6i_gateway  = *addr;
>  	rt->rt6i_dst.addr = *addr;
>  	rt->rt6i_dst.plen = 128;
>  	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
> @@ -4181,7 +4138,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
>  		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
>  					    rt, &r_cfg);
>  		if (err) {
> -			dst_release_immediate(&rt->dst);
> +			fib6_info_release(rt);
>  			goto cleanup;
>  		}
>  
> @@ -4198,6 +4155,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
>  	list_for_each_entry(nh, &rt6_nh_list, next) {
>  		rt_last = nh->rt6_info;
>  		err = __ip6_ins_rt(nh->rt6_info, info, extack);
> +		fib6_info_release(nh->rt6_info);
> +
>  		/* save reference to first route for notification */
>  		if (!rt_notif && !err)
>  			rt_notif = nh->rt6_info;
> @@ -4245,7 +4204,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
>  cleanup:
>  	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
>  		if (nh->rt6_info)
> -			dst_release_immediate(&nh->rt6_info->dst);
> +			fib6_info_release(nh->rt6_info);
>  		list_del(&nh->next);
>  		kfree(nh);
>  	}
> @@ -4669,14 +4628,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
>  		goto errout;
>  	}
>  
> -	if (fibmatch && rt->from) {
> -		struct rt6_info *ort = rt->from;
> -
> -		dst_hold(&ort->dst);
> -		ip6_rt_put(rt);
> -		rt = ort;
> -	}
> -
>  	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
>  	if (!skb) {
>  		ip6_rt_put(rt);
> @@ -4686,12 +4637,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
>  
>  	skb_dst_set(skb, &rt->dst);
>  	if (fibmatch)
> -		err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
> +		err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
>  				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
>  				    nlh->nlmsg_seq, 0);
>  	else
> -		err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
> -				    iif, RTM_NEWROUTE,
> +		err = rt6_fill_node(net, skb, rt->from, dst,
> +				    &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
>  				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
>  				    0);
>  	if (err < 0) {
> -- 
> 2.11.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ