netdev - Re: [PATCH RFC] ipv6: use stronger hash for reassembly queue hash table

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130313012715.GE14801@order.stressinduktion.org>
Date:	Wed, 13 Mar 2013 02:27:15 +0100
From:	Hannes Frederic Sowa <hannes@...essinduktion.org>
To:	Eric Dumazet <eric.dumazet@...il.com>
Cc:	netdev@...r.kernel.org, yoshfuji@...ux-ipv6.org, brouer@...hat.com
Subject: Re: [PATCH RFC] ipv6: use stronger hash for reassembly queue hash table

[cc'ing Jesper, too]

On Fri, Mar 08, 2013 at 07:23:39AM -0800, Eric Dumazet wrote:
> On Fri, 2013-03-08 at 16:08 +0100, Hannes Frederic Sowa wrote:
> > On Fri, Mar 08, 2013 at 06:53:06AM -0800, Eric Dumazet wrote:
> > > No matter how you hash, a hacker can easily fill your defrag unit with
> > > not complete datagrams, so what's the point ?
> > 
> > I want to harden reassembly logic against all fragments being put in
> > the same hash bucket because of malicious traffic and thus creating
> > long list traversals in the fragment queue hash table.
> 
> Note that the long traversal was a real issue with TCP (thats why I
> introduced ipv6_addr_jhash()), as a single ehash slot could contains
> thousand of sockets.
> 
> But with fragments, we should just limit the depth of any particular
> slot, and drop above a particular threshold.

[PATCH net-next RFC] inet: add max_depth to limit list length in inet_frags hash

This does implement trivial drop for fragments where the hash queue
is above some limit.

I calculate the limit as follow:

I averaged the folowing formula

max_depth = max_threshold / INETFRAGS_HASHSZ / rounded up (SKB_TRUELEN(0)
            sizeof(struct ipq or struct frag_queue))

to

max_threshold >> 15

So we start with a maximum list length of 128. I think we could halve
this value to 64, but because I have no real performance data I left it
at this higher value for now.

This patch does only protect IPv6 (and not netfilter ipv6 defragmentation)
and will switch off limit checking if max_depth is zero. I'll rewrite
the check if we agree that this simple solution is the way to go (simple
drop) and will clamp the minimum value to 1 as soon as I also migrated
ipv4 and netfilter to the new sysctl handler.

When testing this patch:

Disable netfilter defragmenation for ipv6 on your machine if you test
this patch, otherwise you won't see the improvment. Machine now runs
smoothly under fragmentation dos.

Ok if I target this patch for net next time because the hashing changes
are in there already?

Signed-off-by: Hannes Frederic Sowa <hannes@...essinduktion.org>
---
 include/net/inet_frag.h  | 13 +++++++++++++
 net/ipv4/inet_fragment.c | 25 ++++++++++++++++++++++++-
 net/ipv6/reassembly.c    |  6 +++++-
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 76c3fe5..9ba6ada 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -17,6 +17,7 @@ struct netns_frags {
 	int			timeout;
 	int			high_thresh;
 	int			low_thresh;
+	int			max_depth;
 };
 
 struct inet_frag_queue {
@@ -43,6 +44,11 @@ struct inet_frag_queue {
 
 #define INETFRAGS_HASHSZ		64
 
+/* max_depth = max_threshold / INETFRAGS_HASHSZ / rounded up (SKB_TRUELEN(0) +
+ *	       sizeof(struct ipq or struct frag_queue))
+ */
+#define INETFRAGS_MAXDEPTH_SHIFT	15
+
 struct inet_frags {
 	struct hlist_head	hash[INETFRAGS_HASHSZ];
 	/* This rwlock is a global lock (seperate per IPv4, IPv6 and
@@ -144,4 +150,11 @@ static inline void inet_frag_lru_add(struct netns_frags *nf,
 	list_add_tail(&q->lru_list, &nf->lru_list);
 	spin_unlock(&nf->lru_lock);
 }
+
+#ifdef CONFIG_SYSCTL
+int inet_frag_update_high_thresh(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos);
+#endif
+
 #endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 245ae07..92f1fdd 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -277,6 +277,7 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 	__releases(&f->lock)
 {
 	struct inet_frag_queue *q;
+	int depth = 0;
 
 	hlist_for_each_entry(q, &f->hash[hash], list) {
 		if (q->net == nf && f->match(q, key)) {
@@ -284,9 +285,31 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
 			read_unlock(&f->lock);
 			return q;
 		}
+		depth++;
 	}
 	read_unlock(&f->lock);
 
-	return inet_frag_create(nf, f, key);
+	if (!nf->max_depth || depth <= nf->max_depth)
+		return inet_frag_create(nf, f, key);
+	else
+		return NULL;
 }
 EXPORT_SYMBOL(inet_frag_find);
+
+#ifdef CONFIG_SYSCTL
+int inet_frag_update_high_thresh(struct ctl_table *table, int write,
+				 void __user *buffer, size_t *lenp,
+				 loff_t *ppos)
+{
+	int ret;
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
+
+	if (!ret && write && table->extra1) {
+		int *data = table->data;
+		int *max_depth = table->extra1;
+		*max_depth = *data >> INETFRAGS_MAXDEPTH_SHIFT;
+	}
+	return ret;
+}
+EXPORT_SYMBOL(inet_frag_update_high_thresh);
+#endif
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3c6a772..84b35f6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -558,7 +558,8 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
 		.data		= &init_net.ipv6.frags.high_thresh,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= inet_frag_update_high_thresh,
+		.extra1		= &init_net.ipv6.frags.max_depth
 	},
 	{
 		.procname	= "ip6frag_low_thresh",
@@ -600,6 +601,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
 			goto err_alloc;
 
 		table[0].data = &net->ipv6.frags.high_thresh;
+		table[0].extra1 = &net->ipv6.frags.max_depth;
 		table[1].data = &net->ipv6.frags.low_thresh;
 		table[2].data = &net->ipv6.frags.timeout;
 
@@ -670,6 +672,8 @@ static int __net_init ipv6_frags_init_net(struct net *net)
 	net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
 	net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
 	net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+	net->ipv6.frags.max_depth =
+		IPV6_FRAG_HIGH_THRESH >> INETFRAGS_MAXDEPTH_SHIFT;
 
 	inet_frags_init_net(&net->ipv6.frags);
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html