[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190330004731.23959-5-dsahern@kernel.org>
Date: Fri, 29 Mar 2019 17:47:27 -0700
From: David Ahern <dsahern@...nel.org>
To: davem@...emloft.net, netdev@...r.kernel.org
Cc: idosch@...lanox.com, David Ahern <dsahern@...il.com>
Subject: [PATCH net-next 4/8] ipv6: Move exceptions to fib_nh_common
From: David Ahern <dsahern@...il.com>
Exceptions are really per device, so move rt6i_exception_bucket to
fib_nh_common as a generic nhc_exception_bucket. Move the flushed
flag to common as well. fib_nh_common for both is a strategic choice
to reduce memory consumption. Moving to fib6_nh pushes the struct over
256 which increases the actual allocation of a fib entry to 512.
Exception flushing when a fib entry is deleted is limited to the exceptions
per nexthop that reference the to-be-deleted fib entry (ie., 'from' points
to it). When a fib6_nh is released, all exceptions are flushed.
Move the core logic of rt6_flush_exceptions, rt6_remove_exception_rt and
rt6_update_exception_stamp_rt to helpers that can be invoked per fib6_nh.
For fib6_nh_flush_exceptions, only remove the exception if from is NULL
(ie., flushing all exceptions) or the rt6_info->from matches (fib delete).
Signed-off-by: David Ahern <dsahern@...il.com>
---
include/net/ip6_fib.h | 6 +--
include/net/ip_fib.h | 4 +-
net/ipv6/ip6_fib.c | 7 ---
net/ipv6/route.c | 140 ++++++++++++++++++++++++++++++++------------------
4 files changed, 96 insertions(+), 61 deletions(-)
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 58dbb4e82908..c1d1e32e1a19 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -153,7 +153,6 @@ struct fib6_info {
struct rt6key fib6_prefsrc;
struct rt6_info * __percpu *rt6i_pcpu;
- struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
#ifdef CONFIG_IPV6_ROUTER_PREF
unsigned long last_probe;
@@ -162,12 +161,11 @@ struct fib6_info {
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
- u8 exception_bucket_flushed:1,
- should_flush:1,
+ u8 should_flush:1,
dst_nocount:1,
dst_nopolicy:1,
dst_host:1,
- unused:3;
+ unused:4;
struct fib6_nh fib6_nh;
struct rcu_head rcu;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cce437a1b2ff..063430ca0c6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -84,7 +84,8 @@ struct fib_nh_common {
unsigned char nhc_scope;
u8 nhc_family;
u8 nhc_has_gw:1,
- unused:7;
+ nhc_exceptions_flushed:1,
+ unused:6;
union {
__be32 ipv4;
struct in6_addr ipv6;
@@ -96,6 +97,7 @@ struct fib_nh_common {
/* v4 specific, but allows v6 gw with v4 routes */
struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
struct rtable __rcu *nhc_rth_input;
+ void __rcu *nhc_exceptions;
};
struct fib_nh {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8c00609a1513..cce976a59a8c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -170,16 +170,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
- struct rt6_exception_bucket *bucket;
WARN_ON(f6i->fib6_node);
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- if (bucket) {
- f6i->rt6i_exception_bucket = NULL;
- kfree(bucket);
- }
-
if (f6i->rt6i_pcpu) {
int cpu;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e0ee30cbd079..c66b9ac37036 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1413,6 +1413,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt)
static int rt6_insert_exception(struct rt6_info *nrt,
struct fib6_info *ort)
{
+ struct fib_nh_common *nhc = &ort->fib6_nh.nh_common;
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
@@ -1421,12 +1422,12 @@ static int rt6_insert_exception(struct rt6_info *nrt,
spin_lock_bh(&rt6_exception_lock);
- if (ort->exception_bucket_flushed) {
+ if (nhc->nhc_exceptions_flushed) {
err = -EINVAL;
goto out;
}
- bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
@@ -1435,7 +1436,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(nhc->nhc_exceptions, bucket);
}
#ifdef CONFIG_IPV6_SUBTREES
@@ -1490,8 +1491,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
return err;
}
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
+ struct fib_nh_common *nhc = &nh->nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
@@ -1499,17 +1501,21 @@ void rt6_flush_exceptions(struct fib6_info *rt)
spin_lock_bh(&rt6_exception_lock);
/* Prevent rt6_insert_exception() to recreate the bucket list */
- rt->exception_bucket_flushed = 1;
+ if (!from)
+ nhc->nhc_exceptions_flushed = 1;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (!bucket)
goto out;
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
- rt6_remove_exception(bucket, rt6_ex);
- WARN_ON_ONCE(bucket->depth);
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+ if (!from ||
+ rcu_access_pointer(rt6_ex->rt6i->from) == from)
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ WARN_ON_ONCE(!from && bucket->depth);
bucket++;
}
@@ -1517,6 +1523,11 @@ void rt6_flush_exceptions(struct fib6_info *rt)
spin_unlock_bh(&rt6_exception_lock);
}
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+ fib6_nh_flush_exceptions(&f6i->fib6_nh, f6i);
+}
+
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
@@ -1524,12 +1535,13 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
struct in6_addr *daddr,
struct in6_addr *saddr)
{
+ struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct rt6_info *res = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket);
+ bucket = rcu_dereference(nhc->nhc_exceptions);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates rt is in subtree
@@ -1549,25 +1561,20 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
return res;
}
-/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
{
+ struct fib_nh_common *nhc = &nh->nh_common;
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
- int err;
-
- from = rcu_dereference(rt->from);
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
+ int err = 0;
- if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nhc->nhc_exceptions))
return -ENOENT;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
@@ -1576,39 +1583,43 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
&rt->rt6i_dst.addr,
src_key);
- if (rt6_ex) {
+ if (rt6_ex)
rt6_remove_exception(bucket, rt6_ex);
- err = 0;
- } else {
+ else
err = -ENOENT;
- }
spin_unlock_bh(&rt6_exception_lock);
return err;
}
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+/* Remove the passed in cached rt from the hash table that contains it */
+static int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
- struct rt6_exception *rt6_ex;
struct fib6_info *from;
- rcu_read_lock();
from = rcu_dereference(rt->from);
if (!from || !(rt->rt6i_flags & RTF_CACHE))
- goto unlock;
+ return -EINVAL;
- bucket = rcu_dereference(from->rt6i_exception_bucket);
+ return fib6_nh_remove_exception(&from->fib6_nh,
+ from->fib6_src.plen, rt);
+}
+
+static void fib6_nh_update_exception(struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
+{
+ struct fib_nh_common *nhc = &nh->nh_common;
+ const struct in6_addr *src_key = NULL;
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+
+ bucket = rcu_dereference(nhc->nhc_exceptions);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
@@ -1617,15 +1628,28 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
- rt6_ex = __rt6_find_exception_rcu(&bucket,
- &rt->rt6i_dst.addr,
- src_key);
+ rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
if (rt6_ex)
rt6_ex->stamp = jiffies;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+
+ rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
+ fib6_nh_update_exception(&from->fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -1655,11 +1679,12 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
struct fib6_info *rt, int mtu)
{
+ struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (!bucket)
@@ -1686,16 +1711,17 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
struct in6_addr *gateway)
{
+ struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nhc->nhc_exceptions))
return;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (bucket) {
@@ -1768,15 +1794,18 @@ void rt6_age_exceptions(struct fib6_info *rt,
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
+ struct fib_nh_common *nhc;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
- return;
-
rcu_read_lock_bh();
+
+ nhc = &rt->fib6_nh.nh_common;
+ if (!rcu_access_pointer(nhc->nhc_exceptions))
+ goto out;
+
spin_lock(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions,
lockdep_is_held(&rt6_exception_lock));
if (bucket) {
@@ -1790,6 +1819,7 @@ void rt6_age_exceptions(struct fib6_info *rt,
}
}
spin_unlock(&rt6_exception_lock);
+out:
rcu_read_unlock_bh();
}
@@ -2596,6 +2626,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
+ struct fib_nh_common *nhc = &f6i->fib6_nh.nh_common;
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct in6_addr *src_key;
@@ -2614,7 +2645,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
src_key = saddr;
#endif
- bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+ bucket = rcu_dereference(nhc->nhc_exceptions);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
@@ -3011,6 +3042,17 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
void fib6_nh_release(struct fib6_nh *fib6_nh)
{
+ struct fib_nh_common *nhc = &fib6_nh->nh_common;
+ struct rt6_exception_bucket *bucket;
+
+ fib6_nh_flush_exceptions(fib6_nh, NULL);
+
+ bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
+ if (bucket) {
+ rcu_assign_pointer(nhc->nhc_exceptions, NULL);
+ kfree(bucket);
+ }
+
fib_nh_common_release(&fib6_nh->nh_common);
}
--
2.11.0
Powered by blists - more mailing lists