lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220120235028.9040-1-tmshlvck@gmail.com>
Date:   Fri, 21 Jan 2022 00:50:29 +0100
From:   Tomas Hlavacek <tmshlvck@...il.com>
To:     "David S . Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
        David Ahern <dsahern@...nel.org>
Cc:     netdev@...r.kernel.org, Tomas Hlavacek <tmshlvck@...il.com>
Subject: [RFC PATCH] ipv4: fix fnhe dump record multiplication

Fix the multiplication of the FNHE records during dump: Check in
fnhe_dump_bucket() that the dumped record destination address falls
within the key (prefix, prefixlen) of the FIB leaf that is being dumped.

FNHE table records can be dumped multiple times to netlink on RTM_GETROUTE
command with NLM_F_DUMP flag - either to "ip route show cache" or to any
routing daemon. The multiplication is substantial under specific
conditions - it can produce over 120M netlink messages in one dump.
It happens if there is one shared struct fib_nh linked through
struct fib_info (->fib_nh) from many leafs in FIB over struct fib_alias.

This situation can be triggered by importing a full BGP table over GRE
tunnel. In this case there are ~800k routes that translates to ~120k leafs
in FIB that all ulimately links the same next-hop (the other end of
the GRE tunnel). The GRE tunnel creates one FNHE record for each
destination IP that is routed to the tunnel because of PMTU. In my case
I had around 1000 PMTU records after a few minutes in a lab connected to
the public internet so the FNHE dump produced 120M records that easily
stalled BIRD routing daemon as described here:
http://trubka.network.cz/pipermail/bird-users/2022-January/015897.html
(There is a work-around already committed to BIRD that removes unnecessary
dumps of FNHE.)

Signed-off-by: Tomas Hlavacek <tmshlvck@...il.com>
---
 include/net/route.h |  3 ++-
 net/ipv4/fib_trie.c |  3 ++-
 net/ipv4/route.c    | 25 ++++++++++++++++++++++---
 3 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/net/route.h b/include/net/route.h
index 2e6c0e153e3a..066eab9c5d99 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -244,7 +244,8 @@ void rt_del_uncached_list(struct rtable *rt);
 
 int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
 		       u32 table_id, struct fib_info *fi,
-		       int *fa_index, int fa_start, unsigned int flags);
+		       int *fa_index, int fa_start, unsigned int flags,
+		       __be32 prefix, unsigned char prefixlen);
 
 static inline void ip_rt_put(struct rtable *rt)
 {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8060524f4256..7a42db70f46d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2313,7 +2313,8 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb,
 
 		if (filter->dump_exceptions) {
 			err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi,
-						 &i_fa, s_fa, flags);
+						 &i_fa, s_fa, flags, xkey,
+						 (KEYLENGTH - fa->fa_slen));
 			if (err < 0)
 				goto stop;
 		}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0b4103b1e622..bc882c85228d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3049,10 +3049,25 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
 	return -EMSGSIZE;
 }
 
+static int fnhe_daddr_check(__be32 daddr, struct net *net, u32 table_id,
+			    __be32 prefix, unsigned char prefixlen)
+{
+	struct flowi4 fl4 = { .daddr = daddr };
+	struct fib_table *tb = fib_get_table(net, table_id);
+	struct fib_result res;
+	int err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
+
+	if (!err && res.prefix == prefix && res.prefixlen == prefixlen)
+		return 1;
+
+	return 0;
+}
+
 static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
 			    struct netlink_callback *cb, u32 table_id,
 			    struct fnhe_hash_bucket *bucket, int genid,
-			    int *fa_index, int fa_start, unsigned int flags)
+			    int *fa_index, int fa_start, unsigned int flags,
+			    __be32 prefix, unsigned char prefixlen)
 {
 	int i;
 
@@ -3067,6 +3082,9 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
 			if (*fa_index < fa_start)
 				goto next;
 
+			if (!fnhe_daddr_check(fnhe->fnhe_daddr, net, table_id, prefix, prefixlen))
+				goto next;
+
 			if (fnhe->fnhe_genid != genid)
 				goto next;
 
@@ -3096,7 +3114,8 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb,
 
 int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
 		       u32 table_id, struct fib_info *fi,
-		       int *fa_index, int fa_start, unsigned int flags)
+		       int *fa_index, int fa_start, unsigned int flags,
+		       __be32 prefix, unsigned char prefixlen)
 {
 	struct net *net = sock_net(cb->skb->sk);
 	int nhsel, genid = fnhe_genid(net);
@@ -3115,7 +3134,7 @@ int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb,
 		if (bucket)
 			err = fnhe_dump_bucket(net, skb, cb, table_id, bucket,
 					       genid, fa_index, fa_start,
-					       flags);
+					       flags, prefix, prefixlen);
 		rcu_read_unlock();
 		if (err)
 			return err;
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ