lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180702220312.14590-1-dsahern@kernel.org>
Date:   Mon,  2 Jul 2018 15:03:12 -0700
From:   dsahern@...nel.org
To:     netdev@...r.kernel.org
Cc:     idosch@...lanox.com, sharpd@...ulusnetworks.com,
        Thomas.Winter@...iedtelesis.co.nz, David Ahern <dsahern@...il.com>
Subject: [PATCH net] net/ipv6: Revert attempt to simplify route replace and append

From: David Ahern <dsahern@...il.com>

NetworkManager likes to manage linklocal prefix routes and does so with
the NLM_F_APPEND flag, breaking attempts to simplify the IPv6 route
code and by extension enable multipath routes with device only nexthops.

Revert f34436a43092 and its followup
6eba08c3626b ("ipv6: Only emit append events for appended routes").
Update the test cases to reflect the old behavior.

Fixes: f34436a43092 ("net/ipv6: Simplify route replace and appending into multipath route")
Signed-off-by: David Ahern <dsahern@...il.com>
---
Ido: I left 5a15a1b07c51. FIB_EVENT_ENTRY_APPEND is not generated for
     IPv6, so no harm in leaving it.

 include/net/ip6_route.h                  |   6 ++
 net/ipv6/ip6_fib.c                       | 156 +++++++++++++++++--------------
 net/ipv6/route.c                         |   3 +-
 tools/testing/selftests/net/fib_tests.sh |  41 --------
 4 files changed, 93 insertions(+), 113 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 59656fc580df..7b9c82de11cc 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -66,6 +66,12 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
+{
+	return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+	       RTF_GATEWAY;
+}
+
 void ip6_route_input(struct sk_buff *skb);
 struct dst_entry *ip6_route_input_lookup(struct net *net,
 					 struct net_device *dev,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1fb2f3118d60..d212738e9d10 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 {
 	struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
 				    lockdep_is_held(&rt->fib6_table->tb6_lock));
-	enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
-	struct fib6_info *iter = NULL, *match = NULL;
+	struct fib6_info *iter = NULL;
 	struct fib6_info __rcu **ins;
+	struct fib6_info __rcu **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
-	int append = (info->nlh &&
-		       (info->nlh->nlmsg_flags & NLM_F_APPEND));
 	int add = (!info->nlh ||
 		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
 	int found = 0;
+	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
 	u16 nlflags = NLM_F_EXCL;
 	int err;
 
-	if (append)
+	if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
 		nlflags |= NLM_F_APPEND;
 
 	ins = &fn->leaf;
@@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
 			nlflags &= ~NLM_F_EXCL;
 			if (replace) {
-				found++;
-				break;
+				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+					found++;
+					break;
+				}
+				if (rt_can_ecmp)
+					fallback_ins = fallback_ins ?: ins;
+				goto next_iter;
 			}
 
 			if (rt6_duplicate_nexthop(iter, rt)) {
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
 				return -EEXIST;
 			}
-
-			/* first route that matches */
-			if (!match)
-				match = iter;
+			/* If we have the same destination and the same metric,
+			 * but not the same gateway, then the route we try to
+			 * add is sibling to this route, increment our counter
+			 * of siblings, and later we will add our route to the
+			 * list.
+			 * Only static routes (which don't have flag
+			 * RTF_EXPIRES) are used for ECMPv6.
+			 *
+			 * To avoid long list, we only had siblings if the
+			 * route have a gateway.
+			 */
+			if (rt_can_ecmp &&
+			    rt6_qualify_for_ecmp(iter))
+				rt->fib6_nsiblings++;
 		}
 
 		if (iter->fib6_metric > rt->fib6_metric)
 			break;
 
+next_iter:
 		ins = &iter->fib6_next;
 	}
 
+	if (fallback_ins && !found) {
+		/* No ECMP-able route found, replace first non-ECMP one */
+		ins = fallback_ins;
+		iter = rcu_dereference_protected(*ins,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+		found++;
+	}
+
 	/* Reset round-robin state, if necessary */
 	if (ins == &fn->leaf)
 		fn->rr_ptr = NULL;
 
 	/* Link this route to others same route. */
-	if (append && match) {
+	if (rt->fib6_nsiblings) {
+		unsigned int fib6_nsiblings;
 		struct fib6_info *sibling, *temp_sibling;
 
-		if (rt->fib6_flags & RTF_REJECT) {
-			NL_SET_ERR_MSG(extack,
-				       "Can not append a REJECT route");
-			return -EINVAL;
-		} else if (match->fib6_flags & RTF_REJECT) {
-			NL_SET_ERR_MSG(extack,
-				       "Can not append to a REJECT route");
-			return -EINVAL;
+		/* Find the first route that have the same metric */
+		sibling = leaf;
+		while (sibling) {
+			if (sibling->fib6_metric == rt->fib6_metric &&
+			    rt6_qualify_for_ecmp(sibling)) {
+				list_add_tail(&rt->fib6_siblings,
+					      &sibling->fib6_siblings);
+				break;
+			}
+			sibling = rcu_dereference_protected(sibling->fib6_next,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
 		}
-		event = FIB_EVENT_ENTRY_APPEND;
-		rt->fib6_nsiblings = match->fib6_nsiblings;
-		list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
-		match->fib6_nsiblings++;
-
 		/* For each sibling in the list, increment the counter of
 		 * siblings. BUG() if counters does not match, list of siblings
 		 * is broken!
 		 */
+		fib6_nsiblings = 0;
 		list_for_each_entry_safe(sibling, temp_sibling,
-					 &match->fib6_siblings, fib6_siblings) {
+					 &rt->fib6_siblings, fib6_siblings) {
 			sibling->fib6_nsiblings++;
-			BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
+			BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+			fib6_nsiblings++;
 		}
-
-		rt6_multipath_rebalance(match);
+		BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+		rt6_multipath_rebalance(temp_sibling);
 	}
 
 	/*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 add:
 		nlflags |= NLM_F_CREATE;
 
-		err = call_fib6_entry_notifiers(info->nl_net, event, rt,
-						extack);
+		err = call_fib6_entry_notifiers(info->nl_net,
+						FIB_EVENT_ENTRY_ADD,
+						rt, extack);
 		if (err)
 			return err;
 
@@ -1062,7 +1087,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 		}
 
 	} else {
-		struct fib6_info *tmp;
+		int nsiblings;
 
 		if (!found) {
 			if (add)
@@ -1077,57 +1102,48 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 		if (err)
 			return err;
 
-		/* if route being replaced has siblings, set tmp to
-		 * last one, otherwise tmp is current route. this is
-		 * used to set fib6_next for new route
-		 */
-		if (iter->fib6_nsiblings)
-			tmp = list_last_entry(&iter->fib6_siblings,
-					      struct fib6_info,
-					      fib6_siblings);
-		else
-			tmp = iter;
-
-		/* insert new route */
 		atomic_inc(&rt->fib6_ref);
 		rcu_assign_pointer(rt->fib6_node, fn);
-		rt->fib6_next = tmp->fib6_next;
+		rt->fib6_next = iter->fib6_next;
 		rcu_assign_pointer(*ins, rt);
-
 		if (!info->skip_notify)
 			inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
 		if (!(fn->fn_flags & RTN_RTINFO)) {
 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
 			fn->fn_flags |= RTN_RTINFO;
 		}
+		nsiblings = iter->fib6_nsiblings;
+		iter->fib6_node = NULL;
+		fib6_purge_rt(iter, fn, info->nl_net);
+		if (rcu_access_pointer(fn->rr_ptr) == iter)
+			fn->rr_ptr = NULL;
+		fib6_info_release(iter);
 
-		/* delete old route */
-		rt = iter;
-
-		if (rt->fib6_nsiblings) {
-			struct fib6_info *tmp;
-
+		if (nsiblings) {
 			/* Replacing an ECMP route, remove all siblings */
-			list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
-						 fib6_siblings) {
-				iter->fib6_node = NULL;
-				fib6_purge_rt(iter, fn, info->nl_net);
-				if (rcu_access_pointer(fn->rr_ptr) == iter)
-					fn->rr_ptr = NULL;
-				fib6_info_release(iter);
-
-				rt->fib6_nsiblings--;
-				info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+			ins = &rt->fib6_next;
+			iter = rcu_dereference_protected(*ins,
+				    lockdep_is_held(&rt->fib6_table->tb6_lock));
+			while (iter) {
+				if (iter->fib6_metric > rt->fib6_metric)
+					break;
+				if (rt6_qualify_for_ecmp(iter)) {
+					*ins = iter->fib6_next;
+					iter->fib6_node = NULL;
+					fib6_purge_rt(iter, fn, info->nl_net);
+					if (rcu_access_pointer(fn->rr_ptr) == iter)
+						fn->rr_ptr = NULL;
+					fib6_info_release(iter);
+					nsiblings--;
+					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+				} else {
+					ins = &iter->fib6_next;
+				}
+				iter = rcu_dereference_protected(*ins,
+					lockdep_is_held(&rt->fib6_table->tb6_lock));
 			}
+			WARN_ON(nsiblings != 0);
 		}
-
-		WARN_ON(rt->fib6_nsiblings != 0);
-
-		rt->fib6_node = NULL;
-		fib6_purge_rt(rt, fn, info->nl_net);
-		if (rcu_access_pointer(fn->rr_ptr) == rt)
-			fn->rr_ptr = NULL;
-		fib6_info_release(rt);
 	}
 
 	return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 86a0e4333d42..63f99411f0de 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3842,7 +3842,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
 			lockdep_is_held(&rt->fib6_table->tb6_lock));
 	while (iter) {
 		if (iter->fib6_metric == rt->fib6_metric &&
-		    iter->fib6_nsiblings)
+		    rt6_qualify_for_ecmp(iter))
 			return iter;
 		iter = rcu_dereference_protected(iter->fib6_next,
 				lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4439,7 +4439,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 		 */
 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
 						     NLM_F_REPLACE);
-		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
 		nhn++;
 	}
 
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 78245d60d8bc..0f45633bd634 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -740,13 +740,6 @@ ipv6_rt_add()
 	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
 	log_test $? 2 "Attempt to add duplicate route - reject route"
 
-	# iproute2 prepend only sets NLM_F_CREATE
-	# - adds a new route; does NOT convert existing route to ECMP
-	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
-	run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
-	check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
-	log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)"
-
 	# route append with same prefix adds a new route
 	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
 	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
@@ -754,27 +747,6 @@ ipv6_rt_add()
 	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
 	log_test $? 0 "Append nexthop to existing route - gw"
 
-	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
-	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
-	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
-	log_test $? 0 "Append nexthop to existing route - dev only"
-
-	# multipath route can not have a nexthop that is a reject route
-	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
-	run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
-	log_test $? 2 "Append nexthop to existing route - reject route"
-
-	# reject route can not be converted to multipath route
-	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
-	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
-	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
-	log_test $? 2 "Append nexthop to existing reject route - gw"
-
-	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
-	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
-	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
-	log_test $? 2 "Append nexthop to existing reject route - dev only"
-
 	# insert mpath directly
 	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
 	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
@@ -819,13 +791,6 @@ ipv6_rt_replace_single()
 	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
 	log_test $? 0 "Single path with multipath"
 
-	# single path with reject
-	#
-	add_initial_route6 "nexthop via 2001:db8:101::2"
-	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
-	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
-	log_test $? 0 "Single path with reject route"
-
 	# single path with single path using MULTIPATH attribute
 	#
 	add_initial_route6 "via 2001:db8:101::2"
@@ -873,12 +838,6 @@ ipv6_rt_replace_mpath()
 	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
 	log_test $? 0 "Multipath with single path via multipath attribute"
 
-	# multipath with reject
-	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
-	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
-	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
-	log_test $? 0 "Multipath with reject route"
-
 	# route replace fails - invalid nexthop 1
 	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
 	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
-- 
2.11.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ