lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190330004731.23959-5-dsahern@kernel.org>
Date:   Fri, 29 Mar 2019 17:47:27 -0700
From:   David Ahern <dsahern@...nel.org>
To:     davem@...emloft.net, netdev@...r.kernel.org
Cc:     idosch@...lanox.com, David Ahern <dsahern@...il.com>
Subject: [PATCH net-next 4/8] ipv6: Move exceptions to fib_nh_common

From: David Ahern <dsahern@...il.com>

Exceptions are really per device, so move rt6i_exception_bucket to
fib_nh_common as a generic nhc_exception_bucket. Move the flushed
flag to common as well. fib_nh_common for both is a strategic choice
to reduce memory consumption. Moving to fib6_nh pushes the struct over
256 which increases the actual allocation of a fib entry to 512.

Exception flushing when a fib entry is deleted is limited to the exceptions
per nexthop that reference the to-be-deleted fib entry (ie., 'from' points
to it). When a fib6_nh is released, all exceptions are flushed.

Move the core logic of rt6_flush_exceptions, rt6_remove_exception_rt and
rt6_update_exception_stamp_rt to helpers that can be invoked per fib6_nh.
For fib6_nh_flush_exceptions, only remove the exception if from is NULL
(ie., flushing all exceptions) or the rt6_info->from matches (fib delete).

Signed-off-by: David Ahern <dsahern@...il.com>
---
 include/net/ip6_fib.h |   6 +--
 include/net/ip_fib.h  |   4 +-
 net/ipv6/ip6_fib.c    |   7 ---
 net/ipv6/route.c      | 140 ++++++++++++++++++++++++++++++++------------------
 4 files changed, 96 insertions(+), 61 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 58dbb4e82908..c1d1e32e1a19 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -153,7 +153,6 @@ struct fib6_info {
 	struct rt6key			fib6_prefsrc;
 
 	struct rt6_info * __percpu	*rt6i_pcpu;
-	struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
 
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	unsigned long			last_probe;
@@ -162,12 +161,11 @@ struct fib6_info {
 	u32				fib6_metric;
 	u8				fib6_protocol;
 	u8				fib6_type;
-	u8				exception_bucket_flushed:1,
-					should_flush:1,
+	u8				should_flush:1,
 					dst_nocount:1,
 					dst_nopolicy:1,
 					dst_host:1,
-					unused:3;
+					unused:4;
 
 	struct fib6_nh			fib6_nh;
 	struct rcu_head			rcu;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index cce437a1b2ff..063430ca0c6e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -84,7 +84,8 @@ struct fib_nh_common {
 	unsigned char		nhc_scope;
 	u8			nhc_family;
 	u8			nhc_has_gw:1,
-				unused:7;
+				nhc_exceptions_flushed:1,
+				unused:6;
 	union {
 		__be32          ipv4;
 		struct in6_addr ipv6;
@@ -96,6 +97,7 @@ struct fib_nh_common {
 	/* v4 specific, but allows v6 gw with v4 routes */
 	struct rtable __rcu * __percpu *nhc_pcpu_rth_output;
 	struct rtable __rcu     *nhc_rth_input;
+	void __rcu		*nhc_exceptions;
 };
 
 struct fib_nh {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 8c00609a1513..cce976a59a8c 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -170,16 +170,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
 void fib6_info_destroy_rcu(struct rcu_head *head)
 {
 	struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
-	struct rt6_exception_bucket *bucket;
 
 	WARN_ON(f6i->fib6_node);
 
-	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
-	if (bucket) {
-		f6i->rt6i_exception_bucket = NULL;
-		kfree(bucket);
-	}
-
 	if (f6i->rt6i_pcpu) {
 		int cpu;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e0ee30cbd079..c66b9ac37036 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1413,6 +1413,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt)
 static int rt6_insert_exception(struct rt6_info *nrt,
 				struct fib6_info *ort)
 {
+	struct fib_nh_common *nhc = &ort->fib6_nh.nh_common;
 	struct net *net = dev_net(nrt->dst.dev);
 	struct rt6_exception_bucket *bucket;
 	struct in6_addr *src_key = NULL;
@@ -1421,12 +1422,12 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 
 	spin_lock_bh(&rt6_exception_lock);
 
-	if (ort->exception_bucket_flushed) {
+	if (nhc->nhc_exceptions_flushed) {
 		err = -EINVAL;
 		goto out;
 	}
 
-	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions,
 					lockdep_is_held(&rt6_exception_lock));
 	if (!bucket) {
 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
@@ -1435,7 +1436,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 			err = -ENOMEM;
 			goto out;
 		}
-		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+		rcu_assign_pointer(nhc->nhc_exceptions, bucket);
 	}
 
 #ifdef CONFIG_IPV6_SUBTREES
@@ -1490,8 +1491,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
 	return err;
 }
 
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
 {
+	struct fib_nh_common *nhc = &nh->nh_common;
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
@@ -1499,17 +1501,21 @@ void rt6_flush_exceptions(struct fib6_info *rt)
 
 	spin_lock_bh(&rt6_exception_lock);
 	/* Prevent rt6_insert_exception() to recreate the bucket list */
-	rt->exception_bucket_flushed = 1;
+	if (!from)
+		nhc->nhc_exceptions_flushed = 1;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions,
 				    lockdep_is_held(&rt6_exception_lock));
 	if (!bucket)
 		goto out;
 
 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
-		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
-			rt6_remove_exception(bucket, rt6_ex);
-		WARN_ON_ONCE(bucket->depth);
+		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+			if (!from ||
+			    rcu_access_pointer(rt6_ex->rt6i->from) == from)
+				rt6_remove_exception(bucket, rt6_ex);
+		}
+		WARN_ON_ONCE(!from && bucket->depth);
 		bucket++;
 	}
 
@@ -1517,6 +1523,11 @@ void rt6_flush_exceptions(struct fib6_info *rt)
 	spin_unlock_bh(&rt6_exception_lock);
 }
 
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+	fib6_nh_flush_exceptions(&f6i->fib6_nh, f6i);
+}
+
 /* Find cached rt in the hash table inside passed in rt
  * Caller has to hold rcu_read_lock()
  */
@@ -1524,12 +1535,13 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 					   struct in6_addr *daddr,
 					   struct in6_addr *saddr)
 {
+	struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
 	struct rt6_exception_bucket *bucket;
 	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
 	struct rt6_info *res = NULL;
 
-	bucket = rcu_dereference(rt->rt6i_exception_bucket);
+	bucket = rcu_dereference(nhc->nhc_exceptions);
 
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates rt is in subtree
@@ -1549,25 +1561,20 @@ static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 	return res;
 }
 
-/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(struct fib6_nh *nh, int plen,
+				    const struct rt6_info *rt)
 {
+	struct fib_nh_common *nhc = &nh->nh_common;
+	const struct in6_addr *src_key = NULL;
 	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
 	struct rt6_exception *rt6_ex;
-	struct fib6_info *from;
-	int err;
-
-	from = rcu_dereference(rt->from);
-	if (!from ||
-	    !(rt->rt6i_flags & RTF_CACHE))
-		return -EINVAL;
+	int err = 0;
 
-	if (!rcu_access_pointer(from->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nhc->nhc_exceptions))
 		return -ENOENT;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions,
 				    lockdep_is_held(&rt6_exception_lock));
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
@@ -1576,39 +1583,43 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
 					       &rt->rt6i_dst.addr,
 					       src_key);
-	if (rt6_ex) {
+	if (rt6_ex)
 		rt6_remove_exception(bucket, rt6_ex);
-		err = 0;
-	} else {
+	else
 		err = -ENOENT;
-	}
 
 	spin_unlock_bh(&rt6_exception_lock);
 	return err;
 }
 
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+/* Remove the passed in cached rt from the hash table that contains it */
+static int rt6_remove_exception_rt(struct rt6_info *rt)
 {
-	struct rt6_exception_bucket *bucket;
-	struct in6_addr *src_key = NULL;
-	struct rt6_exception *rt6_ex;
 	struct fib6_info *from;
 
-	rcu_read_lock();
 	from = rcu_dereference(rt->from);
 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
-		goto unlock;
+		return -EINVAL;
 
-	bucket = rcu_dereference(from->rt6i_exception_bucket);
+	return fib6_nh_remove_exception(&from->fib6_nh,
+					from->fib6_src.plen, rt);
+}
+
+static void fib6_nh_update_exception(struct fib6_nh *nh, int plen,
+				     const struct rt6_info *rt)
+{
+	struct fib_nh_common *nhc = &nh->nh_common;
+	const struct in6_addr *src_key = NULL;
+	struct rt6_exception_bucket *bucket;
+	struct rt6_exception *rt6_ex;
+
+	bucket = rcu_dereference(nhc->nhc_exceptions);
 
 #ifdef CONFIG_IPV6_SUBTREES
 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
@@ -1617,15 +1628,28 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
 	 * Otherwise, the exception table is indexed by
 	 * a hash of only rt6i_dst.
 	 */
-	if (from->fib6_src.plen)
+	if (plen)
 		src_key = &rt->rt6i_src.addr;
 #endif
-	rt6_ex = __rt6_find_exception_rcu(&bucket,
-					  &rt->rt6i_dst.addr,
-					  src_key);
+	rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
 	if (rt6_ex)
 		rt6_ex->stamp = jiffies;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+	struct fib6_info *from;
+
+	rcu_read_lock();
 
+	from = rcu_dereference(rt->from);
+	if (!from || !(rt->rt6i_flags & RTF_CACHE))
+		goto unlock;
+
+	fib6_nh_update_exception(&from->fib6_nh, from->fib6_src.plen, rt);
 unlock:
 	rcu_read_unlock();
 }
@@ -1655,11 +1679,12 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
 static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 				       struct fib6_info *rt, int mtu)
 {
+	struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	int i;
 
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions,
 					lockdep_is_held(&rt6_exception_lock));
 
 	if (!bucket)
@@ -1686,16 +1711,17 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
 static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
 					struct in6_addr *gateway)
 {
+	struct fib_nh_common *nhc = &rt->fib6_nh.nh_common;
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+	if (!rcu_access_pointer(nhc->nhc_exceptions))
 		return;
 
 	spin_lock_bh(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions,
 				     lockdep_is_held(&rt6_exception_lock));
 
 	if (bucket) {
@@ -1768,15 +1794,18 @@ void rt6_age_exceptions(struct fib6_info *rt,
 {
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
+	struct fib_nh_common *nhc;
 	struct hlist_node *tmp;
 	int i;
 
-	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
-		return;
-
 	rcu_read_lock_bh();
+
+	nhc = &rt->fib6_nh.nh_common;
+	if (!rcu_access_pointer(nhc->nhc_exceptions))
+		goto out;
+
 	spin_lock(&rt6_exception_lock);
-	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions,
 				    lockdep_is_held(&rt6_exception_lock));
 
 	if (bucket) {
@@ -1790,6 +1819,7 @@ void rt6_age_exceptions(struct fib6_info *rt,
 		}
 	}
 	spin_unlock(&rt6_exception_lock);
+out:
 	rcu_read_unlock_bh();
 }
 
@@ -2596,6 +2626,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
 u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
 		      struct in6_addr *saddr)
 {
+	struct fib_nh_common *nhc = &f6i->fib6_nh.nh_common;
 	struct rt6_exception_bucket *bucket;
 	struct rt6_exception *rt6_ex;
 	struct in6_addr *src_key;
@@ -2614,7 +2645,7 @@ u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
 		src_key = saddr;
 #endif
 
-	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
+	bucket = rcu_dereference(nhc->nhc_exceptions);
 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
 		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
@@ -3011,6 +3042,17 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 
 void fib6_nh_release(struct fib6_nh *fib6_nh)
 {
+	struct fib_nh_common *nhc = &fib6_nh->nh_common;
+	struct rt6_exception_bucket *bucket;
+
+	fib6_nh_flush_exceptions(fib6_nh, NULL);
+
+	bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1);
+	if (bucket) {
+		rcu_assign_pointer(nhc->nhc_exceptions, NULL);
+		kfree(bucket);
+	}
+
 	fib_nh_common_release(&fib6_nh->nh_common);
 }
 
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ