lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1437146248-21311-1-git-send-email-fw@strlen.de>
Date:	Fri, 17 Jul 2015 17:17:28 +0200
From:	Florian Westphal <fw@...len.de>
To:	<netdev@...r.kernel.org>
Cc:	Florian Westphal <fw@...len.de>,
	Alexander Duyck <alexander.h.duyck@...hat.com>
Subject: [PATCH -next] net: fib: use fib result when zero-length prefix aliases exist

default route selection is not deterministic when TOS keys are used:

ip route del default

ip route add tos 0x00 via 10.2.100.100
ip route add tos 0x04 via 10.2.100.101
ip route add tos 0x08 via 10.2.100.102
ip route add tos 0x0C via 10.2.100.103
ip route add tos 0x10 via 10.2.100.104

[ i.e. 5 routes with prefix length 0, differentiated via TOS key ]

ip route get 10.3.1.1 tos 0x4
-> 10.2.100.101
ip route get 10.3.1.1 tos 0x8
-> 10.2.100.102
ip route get tos 0x0C
-> 10.2.100.103

But for 0x10, we'll get round-robin results among all the aliases.
Repeated queries return .100, 101, 102, etc. in turn.

This behaviour is not new  -- fib_select_default can be traced back to
fn_hash_select_default in CVS history.

Routing cache made 'round-robin' behaviour less visible.

This changes fib_select_default to not change the FIB chosen result EXCEPT
if this nexthop appears to be unreachable.

fib_detect_death() logic is reversed -- we consider a nexthop 'dead' only
if it has a neigh entry in unreachable state.

Only then we search fib_aliases for an alternative and use one of these in
a round-robin fashion.  If all are believed to be unreachable, no change is
made and fib-chosen nh_gw is used.

Reported-by: Hagen Paul Pfeifer <hagen@...u.net>
Cc: Alexander Duyck <alexander.h.duyck@...hat.com>
Signed-off-by: Florian Westphal <fw@...len.de>
---
 net/ipv4/fib_semantics.c | 71 ++++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c7358ea..83b485b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -410,28 +410,24 @@ errout:
 		rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
 }
 
-static int fib_detect_death(struct fib_info *fi, int order,
-			    struct fib_info **last_resort, int *last_idx,
-			    int dflt)
+static bool fib_nud_is_unreach(const struct fib_info *fi)
 {
 	struct neighbour *n;
 	int state = NUD_NONE;
 
-	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
-	if (n) {
+	local_bh_disable();
+
+	n = __neigh_lookup_noref(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
+	if (n)
 		state = n->nud_state;
-		neigh_release(n);
-	}
-	if (state == NUD_REACHABLE)
-		return 0;
-	if ((state & NUD_VALID) && order != dflt)
-		return 0;
-	if ((state & NUD_VALID) ||
-	    (*last_idx < 0 && order > dflt)) {
-		*last_resort = fi;
-		*last_idx = order;
-	}
-	return 1;
+
+	local_bh_enable();
+
+	/* Caller might be able to find alternate (reachable) nexthop */
+	if (state & (NUD_INCOMPLETE | NUD_FAILED))
+		return true;
+
+	return false;
 }
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1204,12 +1200,17 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 /* Must be invoked inside of an RCU protected region.  */
 void fib_select_default(struct fib_result *res)
 {
-	struct fib_info *fi = NULL, *last_resort = NULL;
 	struct hlist_head *fa_head = res->fa_head;
+	struct fib_info *last_resort = NULL;
 	struct fib_table *tb = res->table;
 	int order = -1, last_idx = -1;
 	struct fib_alias *fa;
+	bool unreach = fib_nud_is_unreach(res->fi);
 
+	if (likely(!unreach))
+		return;
+
+	/* attempt to pick another nexthop */
 	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
 
@@ -1223,33 +1224,33 @@ void fib_select_default(struct fib_result *res)
 		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
 			continue;
 
+		order++;
+
+		if (next_fi == res->fi) /* already tested, not reachable */
+			continue;
+
 		fib_alias_accessed(fa);
 
-		if (!fi) {
-			if (next_fi != res->fi)
+		unreach = fib_nud_is_unreach(next_fi);
+		if (unreach)
+			continue;
+
+		/* try to round-robin among all fa_aliases in case
+		 * res->fi nexthop is unreachable.
+		 */
+		if (last_idx < 0 || order > tb->tb_default) {
+			last_resort = next_fi;
+			last_idx = order;
+			if (order > tb->tb_default)
 				break;
-		} else if (!fib_detect_death(fi, order, &last_resort,
-					     &last_idx, tb->tb_default)) {
-			fib_result_assign(res, fi);
-			tb->tb_default = order;
-			goto out;
 		}
-		fi = next_fi;
-		order++;
 	}
 
-	if (order <= 0 || !fi) {
+	if (order < 0) {
 		tb->tb_default = -1;
 		goto out;
 	}
 
-	if (!fib_detect_death(fi, order, &last_resort, &last_idx,
-				tb->tb_default)) {
-		fib_result_assign(res, fi);
-		tb->tb_default = order;
-		goto out;
-	}
-
 	if (last_idx >= 0)
 		fib_result_assign(res, last_resort);
 	tb->tb_default = last_idx;
-- 
2.0.5

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ