[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180228184356.duh7cxr3db7buq6d@kafai-mbp.dhcp.thefacebook.com>
Date: Wed, 28 Feb 2018 10:44:11 -0800
From: Martin KaFai Lau <kafai@...com>
To: David Ahern <dsahern@...il.com>
CC: <netdev@...r.kernel.org>, <davem@...emloft.net>,
<idosch@...sch.org>, <roopa@...ulusnetworks.com>,
<eric.dumazet@...il.com>, <weiwan@...gle.com>,
<yoshfuji@...ux-ipv6.org>
Subject: Re: [PATCH RFC net-next 18/20] net/ipv6: separate handling of FIB
entries from dst based routes
On Sun, Feb 25, 2018 at 11:47:28AM -0800, David Ahern wrote:
> Signed-off-by: David Ahern <dsahern@...il.com>
> ---
> include/net/ip6_fib.h | 4 +-
> include/net/ip6_route.h | 3 +-
> net/ipv6/addrconf.c | 31 ++++++---
> net/ipv6/anycast.c | 7 +-
> net/ipv6/ip6_fib.c | 50 +++++++++------
> net/ipv6/ip6_output.c | 3 +-
> net/ipv6/ndisc.c | 6 +-
> net/ipv6/route.c | 167 +++++++++++++++++-------------------------------
> 8 files changed, 121 insertions(+), 150 deletions(-)
>
> diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
> index 70978deac538..ff16e3d571a2 100644
> --- a/include/net/ip6_fib.h
> +++ b/include/net/ip6_fib.h
> @@ -315,9 +315,7 @@ static inline u32 rt6_get_cookie(const struct rt6_info *rt)
>
> if (rt->rt6i_flags & RTF_PCPU ||
> (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
> - rt = rt->from;
> -
> - rt6_get_cookie_safe(rt, &cookie);
> + rt6_get_cookie_safe(rt->from, &cookie);
>
> return cookie;
> }
> diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
> index 24c78fb6ac36..fcda09a58193 100644
> --- a/include/net/ip6_route.h
> +++ b/include/net/ip6_route.h
> @@ -113,8 +113,7 @@ static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
> unsigned int prefs,
> struct in6_addr *saddr)
> {
> - struct inet6_dev *idev =
> - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
> + struct inet6_dev *idev = rt ? rt->rt6i_idev : NULL;
> int err = 0;
>
> if (rt && rt->rt6i_prefsrc.plen)
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 2a032b932922..4dd7b4e9de4c 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -927,7 +927,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
> pr_warn("Freeing alive inet6 address %p\n", ifp);
> return;
> }
> - ip6_rt_put(ifp->rt);
> + fib6_info_release(ifp->rt);
>
> kfree_rcu(ifp, rcu);
> }
> @@ -1080,6 +1080,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
> ifa->cstamp = ifa->tstamp = jiffies;
> ifa->tokenized = false;
>
> + fib6_info_hold(rt);
Did fib6_info_alloc() already bump the refcnt? Why
another fib6_info_hold() is needed? Comment would be
useful here.
> ifa->rt = rt;
>
> ifa->idev = idev;
> @@ -1114,8 +1115,12 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
> inet6addr_notifier_call_chain(NETDEV_UP, ifa);
> out:
> if (unlikely(err < 0)) {
> - if (rt)
> - ip6_rt_put(rt);
> + /* one release for the hold taken when rt is set in ifa
> + * and a second release for the hold taken on rt create
> + */
> + fib6_info_release(rt);
> + fib6_info_release(rt);
The extra release corresponds to the above fib6_info_hold()?
> +
> if (ifa) {
> if (ifa->idev)
> in6_dev_put(ifa->idev);
> @@ -1203,7 +1208,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r
> else {
> if (!(rt->rt6i_flags & RTF_EXPIRES))
> fib6_set_expires(rt, expires);
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> }
> }
> }
> @@ -2350,8 +2355,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
> continue;
> if ((rt->rt6i_flags & noflags) != 0)
> continue;
> - if (!dst_hold_safe(&rt->dst))
> - rt = NULL;
> + fib6_info_hold(rt);
> break;
> }
> out:
> @@ -2663,7 +2667,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
> addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
> dev, expires, flags, GFP_ATOMIC);
> }
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> }
>
> /* Try to figure out our local address for this prefix */
> @@ -3330,9 +3334,14 @@ static int fixup_permanent_addr(struct net *net,
> spin_lock(&ifp->lock);
> prev = ifp->rt;
> ifp->rt = rt;
> + fib6_info_hold(rt);
> spin_unlock(&ifp->lock);
>
> - ip6_rt_put(prev);
> + /* one release for the hold taken when rt is set in ifa
> + * and a second release for the hold taken on rt create
> + */
> + fib6_info_release(prev);
> + fib6_info_release(prev);
> }
>
> if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
> @@ -3706,6 +3715,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
>
> rt = ifa->rt;
> ifa->rt = NULL;
> + fib6_info_release(rt);
> } else {
> state = ifa->state;
> ifa->state = INET6_IFADDR_STATE_DEAD;
> @@ -5600,8 +5610,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
> ip6_del_rt(net, rt);
> }
> if (ifp->rt) {
> - if (dst_hold_safe(&ifp->rt->dst))
> - ip6_del_rt(net, ifp->rt);
> + ip6_del_rt(net, ifp->rt);
> + fib6_info_release(ifp->rt);
> + ifp->rt = NULL;
> }
> rt_genid_bump_ipv6(net);
> break;
> diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
> index f719c980dfad..7a6cd048d095 100644
> --- a/net/ipv6/anycast.c
> +++ b/net/ipv6/anycast.c
> @@ -210,7 +210,7 @@ static void aca_put(struct ifacaddr6 *ac)
> {
> if (refcount_dec_and_test(&ac->aca_refcnt)) {
> in6_dev_put(ac->aca_idev);
> - dst_release(&ac->aca_rt->dst);
> + fib6_info_release(ac->aca_rt);
> kfree(ac);
> }
> }
> @@ -228,6 +228,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt,
> aca->aca_addr = *addr;
> in6_dev_hold(idev);
> aca->aca_idev = idev;
> + fib6_info_hold(rt);
> aca->aca_rt = rt;
> aca->aca_users = 1;
> /* aca_tstamp should be updated upon changes */
> @@ -271,7 +272,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
> }
> aca = aca_alloc(rt, addr);
> if (!aca) {
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> err = -ENOMEM;
> goto out;
> }
> @@ -327,7 +328,6 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
> write_unlock_bh(&idev->lock);
> addrconf_leave_solict(idev, &aca->aca_addr);
>
> - dst_hold(&aca->aca_rt->dst);
> ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
>
> aca_put(aca);
> @@ -355,7 +355,6 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
>
> addrconf_leave_solict(idev, &aca->aca_addr);
>
> - dst_hold(&aca->aca_rt->dst);
> ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
>
> aca_put(aca);
> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
> index 6553550bd09b..25065472c51b 100644
> --- a/net/ipv6/ip6_fib.c
> +++ b/net/ipv6/ip6_fib.c
> @@ -695,7 +695,7 @@ static struct fib6_node *fib6_add_1(struct net *net,
> /* clean up an intermediate node */
> if (!(fn->fn_flags & RTN_RTINFO)) {
> RCU_INIT_POINTER(fn->leaf, NULL);
> - rt6_release(leaf);
> + fib6_info_release(leaf);
> /* remove null_entry in the root node */
> } else if (fn->fn_flags & RTN_TL_ROOT &&
> rcu_access_pointer(fn->leaf) ==
> @@ -879,12 +879,32 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
> if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
> new_leaf = fib6_find_prefix(net, table, fn);
> atomic_inc(&new_leaf->rt6i_ref);
> +
> rcu_assign_pointer(fn->leaf, new_leaf);
> - rt6_release(rt);
> + fib6_info_release(rt);
> }
> fn = rcu_dereference_protected(fn->parent,
> lockdep_is_held(&table->tb6_lock));
> }
> +
> + if (rt->rt6i_pcpu) {
> + int cpu;
> +
> + /* release the reference to this fib entry from
> + * all of its cached pcpu routes
> + */
> + for_each_possible_cpu(cpu) {
> + struct rt6_info **ppcpu_rt;
> + struct rt6_info *pcpu_rt;
> +
> + ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu);
> + pcpu_rt = *ppcpu_rt;
> + if (pcpu_rt) {
> + fib6_info_release(pcpu_rt->from);
> + pcpu_rt->from = NULL;
> + }
> + }
> + }
> }
> }
>
> @@ -1071,7 +1091,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
> fib6_purge_rt(iter, fn, info->nl_net);
> if (rcu_access_pointer(fn->rr_ptr) == iter)
> fn->rr_ptr = NULL;
> - rt6_release(iter);
> + fib6_info_release(iter);
>
> if (nsiblings) {
> /* Replacing an ECMP route, remove all siblings */
> @@ -1087,7 +1107,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
> fib6_purge_rt(iter, fn, info->nl_net);
> if (rcu_access_pointer(fn->rr_ptr) == iter)
> fn->rr_ptr = NULL;
> - rt6_release(iter);
> + fib6_info_release(iter);
> nsiblings--;
> info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
> } else {
> @@ -1155,9 +1175,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
> int replace_required = 0;
> int sernum = fib6_new_sernum(info->nl_net);
>
> - if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
> - return -EINVAL;
> -
> if (info->nlh) {
> if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
> allow_create = 0;
> @@ -1272,7 +1289,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
> if (pn_leaf == rt) {
> pn_leaf = NULL;
> RCU_INIT_POINTER(pn->leaf, NULL);
> - atomic_dec(&rt->rt6i_ref);
> + fib6_info_release(rt);
> }
> if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
> pn_leaf = fib6_find_prefix(info->nl_net, table,
> @@ -1284,7 +1301,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
> info->nl_net->ipv6.fib6_null_entry;
> }
> #endif
> - atomic_inc(&pn_leaf->rt6i_ref);
> + fib6_info_hold(pn_leaf);
> rcu_assign_pointer(pn->leaf, pn_leaf);
> }
> }
> @@ -1306,10 +1323,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
> (fn->fn_flags & RTN_TL_ROOT &&
> !rcu_access_pointer(fn->leaf))))
> fib6_repair_tree(info->nl_net, table, fn);
> - /* Always release dst as dst->__refcnt is guaranteed
> - * to be taken before entering this function
> - */
> - dst_release_immediate(&rt->dst);
> return err;
> }
>
> @@ -1609,7 +1622,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
> new_fn_leaf = net->ipv6.fib6_null_entry;
> }
> #endif
> - atomic_inc(&new_fn_leaf->rt6i_ref);
> + fib6_info_hold(new_fn_leaf);
> rcu_assign_pointer(fn->leaf, new_fn_leaf);
> return pn;
> }
> @@ -1665,7 +1678,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
> return pn;
>
> RCU_INIT_POINTER(pn->leaf, NULL);
> - rt6_release(pn_leaf);
> + fib6_info_release(pn_leaf);
> fn = pn;
> }
> }
> @@ -1735,7 +1748,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
> call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
> if (!info->skip_notify)
> inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
> - rt6_release(rt);
> + fib6_info_release(rt);
> }
>
> /* Need to own table->tb6_lock */
> @@ -2234,9 +2247,8 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
>
> dev = rt->fib6_nh.nh_dev;
> seq_printf(seq, " %08x %08x %08x %08x %8s\n",
> - rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
> - rt->dst.__use, rt->rt6i_flags,
> - dev ? dev->name : "");
> + rt->rt6i_metric, atomic_read(&rt->rt6i_ref), 0,
> + rt->rt6i_flags, dev ? dev->name : "");
> iter->w.leaf = NULL;
> return 0;
> }
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 997c7f19ad62..ce728ff06514 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -969,7 +969,8 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
> if (!had_dst)
> *dst = ip6_route_output(net, sk, fl6);
> rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
> - err = ip6_route_get_saddr(net, rt, &fl6->daddr,
> + err = ip6_route_get_saddr(net, rt ? rt->from : NULL,
> + &fl6->daddr,
> sk ? inet6_sk(sk)->srcprefs : 0,
> &fl6->saddr);
> if (err)
> diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
> index 023da106b682..a921b9bb921f 100644
> --- a/net/ipv6/ndisc.c
> +++ b/net/ipv6/ndisc.c
> @@ -1283,7 +1283,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
> ND_PRINTK(0, err,
> "RA: %s got default router without neighbour\n",
> __func__);
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> return;
> }
> }
> @@ -1313,7 +1313,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
> ND_PRINTK(0, err,
> "RA: %s got default router without neighbour\n",
> __func__);
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> return;
> }
> neigh->flags |= NTF_ROUTER;
> @@ -1502,7 +1502,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
> ND_PRINTK(2, warn, "RA: invalid RA options\n");
> }
> out:
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> if (neigh)
> neigh_release(neigh);
> }
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 19b91c60ee55..e2225588e31b 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -351,13 +351,11 @@ static void rt6_info_init(struct rt6_info *rt)
> memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
> INIT_LIST_HEAD(&rt->rt6i_siblings);
> INIT_LIST_HEAD(&rt->rt6i_uncached);
> - rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
> }
>
> /* allocate dst with ip6_dst_ops */
> -static struct rt6_info *__ip6_dst_alloc(struct net *net,
> - struct net_device *dev,
> - int flags)
> +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
> + int flags)
> {
> struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
> 1, DST_OBSOLETE_FORCE_CHK, flags);
> @@ -369,35 +367,15 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
>
> return rt;
> }
> -
> -struct rt6_info *ip6_dst_alloc(struct net *net,
> - struct net_device *dev,
> - int flags)
> -{
> - struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
> -
> - if (rt) {
> - rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
> - if (!rt->rt6i_pcpu) {
> - dst_release_immediate(&rt->dst);
> - return NULL;
> - }
> - }
> -
> - return rt;
> -}
> EXPORT_SYMBOL(ip6_dst_alloc);
>
> static void ip6_dst_destroy(struct dst_entry *dst)
> {
> struct rt6_info *rt = (struct rt6_info *)dst;
> - struct rt6_exception_bucket *bucket;
> struct rt6_info *from = rt->from;
> struct inet6_dev *idev;
> - struct dst_metrics *m;
>
> dst_destroy_metrics_generic(dst);
> - free_percpu(rt->rt6i_pcpu);
> rt6_uncached_list_del(rt);
>
> idev = rt->rt6i_idev;
> @@ -405,18 +383,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
> rt->rt6i_idev = NULL;
> in6_dev_put(idev);
> }
> - bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
> - if (bucket) {
> - rt->rt6i_exception_bucket = NULL;
> - kfree(bucket);
> - }
> -
> - m = rt->fib6_metrics;
> - if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
> - kfree(m);
>
> rt->from = NULL;
> - dst_release(&from->dst);
> + fib6_info_release(from);
> }
>
> static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
> @@ -889,7 +858,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
> else
> fib6_set_expires(rt, jiffies + HZ * lifetime);
>
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> }
> return 0;
> }
> @@ -1008,10 +977,8 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
>
> static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
> {
> - BUG_ON(from->from);
> -
> rt->rt6i_flags &= ~RTF_EXPIRES;
> - dst_hold(&from->dst);
> + fib6_info_hold(from);
> rt->from = from;
> dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
> if (from->fib6_metrics != &dst_default_metrics) {
> @@ -1083,7 +1050,7 @@ static struct rt6_info *ip6_create_rt_rcu(struct rt6_info *rt)
> struct rt6_info *nrt;
>
> dev = ip6_rt_get_dev_rcu(rt);
> - nrt = __ip6_dst_alloc(dev_net(dev), dev, flags);
> + nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
> if (nrt)
> ip6_rt_copy_init(nrt, rt);
>
> @@ -1189,8 +1156,6 @@ int ip6_ins_rt(struct net *net, struct rt6_info *rt)
> {
> struct nl_info info = { .nl_net = net, };
>
> - /* Hold dst to account for the reference from the fib6 tree */
> - dst_hold(&rt->dst);
> return __ip6_ins_rt(rt, &info, NULL);
> }
>
> @@ -1207,7 +1172,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
>
> rcu_read_lock();
> dev = ip6_rt_get_dev_rcu(ort);
> - rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
> + rt = ip6_dst_alloc(dev_net(dev), dev, 0);
> rcu_read_unlock();
> if (!rt)
> return NULL;
> @@ -1242,7 +1207,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
>
> rcu_read_lock();
> dev = ip6_rt_get_dev_rcu(rt);
> - pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, flags);
> + pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
> rcu_read_unlock();
> if (!pcpu_rt)
> return NULL;
> @@ -1303,7 +1268,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
> net = dev_net(rt6_ex->rt6i->dst.dev);
> rt6_ex->rt6i->rt6i_node = NULL;
> hlist_del_rcu(&rt6_ex->hlist);
> - rt6_release(rt6_ex->rt6i);
> + ip6_rt_put(rt6_ex->rt6i);
> kfree_rcu(rt6_ex, rcu);
> WARN_ON_ONCE(!bucket->depth);
> bucket->depth--;
> @@ -1868,17 +1833,11 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>
> struct rt6_info *uncached_rt;
>
> - if (ip6_hold_safe(net, &f6i, true)) {
> - dst_use_noref(&f6i->dst, jiffies);
> - } else {
> - rcu_read_unlock();
> - uncached_rt = f6i;
> - goto uncached_rt_out;
> - }
> + fib6_info_hold(f6i);
> rcu_read_unlock();
>
> uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
> - dst_release(&rt->dst);
> + fib6_info_release(f6i);
>
> if (uncached_rt) {
> /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
> @@ -1891,7 +1850,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
> dst_hold(&uncached_rt->dst);
> }
>
> -uncached_rt_out:
> trace_fib6_table_lookup(net, uncached_rt, table, fl6);
> return uncached_rt;
>
> @@ -1900,24 +1858,12 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
>
> struct rt6_info *pcpu_rt;
>
> - dst_use_noref(&f6i->dst, jiffies);
> local_bh_disable();
> pcpu_rt = rt6_get_pcpu_route(f6i);
>
> - if (!pcpu_rt) {
> - /* atomic_inc_not_zero() is needed when using rcu */
> - if (atomic_inc_not_zero(&f6i->rt6i_ref)) {
> - /* No dst_hold() on rt is needed because grabbing
> - * rt->rt6i_ref makes sure rt can't be released.
> - */
> - pcpu_rt = rt6_make_pcpu_route(net, f6i);
> - rt6_release(f6i);
> - } else {
> - /* rt is already removed from tree */
> - pcpu_rt = net->ipv6.ip6_null_entry;
> - dst_hold(&pcpu_rt->dst);
> - }
> - }
> + if (!pcpu_rt)
> + pcpu_rt = rt6_make_pcpu_route(net, f6i);
> +
> local_bh_enable();
> rcu_read_unlock();
> trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
> @@ -2088,11 +2034,26 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
> * Destination cache support functions
> */
>
> +static bool fib6_check(struct rt6_info *f6i, u32 cookie)
> +{
> + u32 rt_cookie = 0;
> +
> + if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
> + rt_cookie != cookie)
> + return false;
> +
> + if (fib6_check_expired(f6i))
> + return false;
> +
> + return true;
> +}
> +
> static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
> {
> u32 rt_cookie = 0;
>
> - if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
> + if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
> + rt_cookie != cookie)
> return NULL;
>
> if (rt6_check_expired(rt))
> @@ -2105,7 +2066,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
> {
> if (!__rt6_check_expired(rt) &&
> rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
> - rt6_check(rt->from, cookie))
> + fib6_check(rt->from, cookie))
> return &rt->dst;
> else
> return NULL;
> @@ -2136,7 +2097,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
> if (rt) {
> if (rt->rt6i_flags & RTF_CACHE) {
> if (rt6_check_expired(rt)) {
> - ip6_del_rt(dev_net(dst->dev), rt);
> + rt6_remove_exception_rt(rt);
> dst = NULL;
> }
> } else {
> @@ -2157,12 +2118,12 @@ static void ip6_link_failure(struct sk_buff *skb)
> if (rt) {
> if (rt->rt6i_flags & RTF_CACHE) {
> if (dst_hold_safe(&rt->dst))
> - ip6_del_rt(dev_net(rt->dst.dev), rt);
> - } else {
> + rt6_remove_exception_rt(rt);
> + } else if (rt->from) {
> struct fib6_node *fn;
>
> rcu_read_lock();
> - fn = rcu_dereference(rt->rt6i_node);
> + fn = rcu_dereference(rt->from->rt6i_node);
> if (fn && (rt->rt6i_flags & RTF_DEFAULT))
> fn->fn_sernum = -1;
> rcu_read_unlock();
> @@ -2752,13 +2713,13 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
> if (!table)
> goto out;
>
> - rt = ip6_dst_alloc(net, NULL,
> - (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
> -
> - if (!rt) {
> - err = -ENOMEM;
> + err = -ENOMEM;
> + rt = fib6_info_alloc(gfp_flags);
> + if (!rt)
> goto out;
> - }
> +
> + if (cfg->fc_flags & RTF_ADDRCONF)
> + rt->dst_nocount = true;
>
> err = ip6_convert_metrics(net, rt, cfg);
> if (err < 0)
> @@ -2915,7 +2876,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
> !netif_carrier_ok(dev))
> rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
> rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
> - rt->fib6_nh.nh_dev = rt->dst.dev = dev;
> + rt->fib6_nh.nh_dev = dev;
> rt->rt6i_idev = idev;
> rt->rt6i_table = table;
>
> @@ -2927,9 +2888,8 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
> dev_put(dev);
> if (idev)
> in6_dev_put(idev);
> - if (rt)
> - dst_release_immediate(&rt->dst);
>
> + fib6_info_release(rt);
> return ERR_PTR(err);
> }
>
> @@ -2944,6 +2904,7 @@ int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
> return PTR_ERR(rt);
>
> err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
> + fib6_info_release(rt);
>
> return err;
> }
> @@ -2965,7 +2926,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
> spin_unlock_bh(&table->tb6_lock);
>
> out:
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
> return err;
> }
>
> @@ -3019,7 +2980,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
> out_unlock:
> spin_unlock_bh(&table->tb6_lock);
> out_put:
> - ip6_rt_put(rt);
> + fib6_info_release(rt);
>
> if (skb) {
> rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
> @@ -3090,8 +3051,7 @@ static int ip6_route_del(struct fib6_config *cfg,
> continue;
> if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
> continue;
> - if (!dst_hold_safe(&rt->dst))
> - break;
> + fib6_info_hold(rt);
> rcu_read_unlock();
>
> /* if gateway was specified only delete the one hop */
> @@ -3359,12 +3319,9 @@ static void __rt6_purge_dflt_routers(struct net *net,
> for_each_fib6_node_rt_rcu(&table->tb6_root) {
> if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
> (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
> - if (dst_hold_safe(&rt->dst)) {
> - rcu_read_unlock();
> - ip6_del_rt(net, rt);
> - } else {
> - rcu_read_unlock();
> - }
> + fib6_info_hold(rt);
> + rcu_read_unlock();
> + ip6_del_rt(net, rt);
> goto restart;
> }
> }
> @@ -3514,7 +3471,7 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
> struct net_device *dev = idev->dev;
> struct rt6_info *rt;
>
> - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
> + rt = fib6_info_alloc(gfp_flags);
> if (!rt)
> return ERR_PTR(-ENOMEM);
>
> @@ -3535,8 +3492,8 @@ struct rt6_info *addrconf_dst_alloc(struct net *net,
> }
>
> rt->fib6_nh.nh_gw = *addr;
> + dev_hold(dev);
> rt->fib6_nh.nh_dev = dev;
> - rt->rt6i_gateway = *addr;
> rt->rt6i_dst.addr = *addr;
> rt->rt6i_dst.plen = 128;
> tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
> @@ -4181,7 +4138,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
> err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
> rt, &r_cfg);
> if (err) {
> - dst_release_immediate(&rt->dst);
> + fib6_info_release(rt);
> goto cleanup;
> }
>
> @@ -4198,6 +4155,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
> list_for_each_entry(nh, &rt6_nh_list, next) {
> rt_last = nh->rt6_info;
> err = __ip6_ins_rt(nh->rt6_info, info, extack);
> + fib6_info_release(nh->rt6_info);
> +
> /* save reference to first route for notification */
> if (!rt_notif && !err)
> rt_notif = nh->rt6_info;
> @@ -4245,7 +4204,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
> cleanup:
> list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
> if (nh->rt6_info)
> - dst_release_immediate(&nh->rt6_info->dst);
> + fib6_info_release(nh->rt6_info);
> list_del(&nh->next);
> kfree(nh);
> }
> @@ -4669,14 +4628,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
> goto errout;
> }
>
> - if (fibmatch && rt->from) {
> - struct rt6_info *ort = rt->from;
> -
> - dst_hold(&ort->dst);
> - ip6_rt_put(rt);
> - rt = ort;
> - }
> -
> skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
> if (!skb) {
> ip6_rt_put(rt);
> @@ -4686,12 +4637,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
>
> skb_dst_set(skb, &rt->dst);
> if (fibmatch)
> - err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
> + err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
> RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
> nlh->nlmsg_seq, 0);
> else
> - err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
> - iif, RTM_NEWROUTE,
> + err = rt6_fill_node(net, skb, rt->from, dst,
> + &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
> NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
> 0);
> if (err < 0) {
> --
> 2.11.0
>
Powered by blists - more mailing lists