lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 12 Dec 2017 14:09:28 +0100
From:   Paolo Abeni <pabeni@...hat.com>
To:     netdev@...r.kernel.org
Cc:     "David S. Miller" <davem@...emloft.net>,
        Craig Gallek <kraig@...gle.com>,
        Eric Dumazet <edumazet@...gle.com>
Subject: [RFC PATCH] reuseport: compute the ehash only if needed

When a reuseport socket group is using a BPF filter to distribute
the packets among the sockets, we don't need to compute any hash
value, but the current reuseport_select_sock() requires the
caller to compute such hash in advance.

This patch reworks reuseport_select_sock() to compute the hash value
only if needed - missing or failing BPF filter. Since different
hash functions have different argument types - ipv4 addresses vs ipv6
ones - to avoid over-complicate the interface, reuseport_select_sock()
is now a macro.

Additionally, the sk_reuseport test is move inside reuseport_select_sock,
to avoid some code duplication.

Overall this gives small but measurable performance improvement
under UDP flood while using SO_REUSEPORT + BPF.

Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
 include/net/sock_reuseport.h | 32 ++++++++++++++++++++++++++++----
 net/core/sock_reuseport.c    | 34 +++++++++++++++-------------------
 net/ipv4/inet_hashtables.c   | 28 ++++++++++------------------
 net/ipv4/udp.c               | 30 ++++++++++++------------------
 net/ipv6/inet6_hashtables.c  | 28 ++++++++++------------------
 net/ipv6/udp.c               | 31 ++++++++++++-------------------
 6 files changed, 87 insertions(+), 96 deletions(-)

diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0054b3a9b923..e7d71d22dca7 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,13 +16,37 @@ struct sock_reuseport {
 	struct sock		*socks[0];	/* array of sock pointers */
 };
 
+struct reuseport_info {
+	struct sock_reuseport *reuse;
+	struct sock *sk;
+	u16 socks;
+};
+
 extern int reuseport_alloc(struct sock *sk);
 extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
 extern void reuseport_detach_sock(struct sock *sk);
-extern struct sock *reuseport_select_sock(struct sock *sk,
-					  u32 hash,
-					  struct sk_buff *skb,
-					  int hdr_len);
+bool __reuseport_get_info(struct sock *sk, struct sk_buff *skb, int hdr_len,
+			  struct reuseport_info *info);
+static inline struct sock *__reuseport_select_sock(struct reuseport_info *info,
+						   u32 hash)
+{
+	return info->reuse->socks[reciprocal_scale(hash, info->socks)];
+}
+
+#define reuseport_select_sock(sk, skb, net, hlen, fn, saddr, sport, daddr, dport) \
+({									      \
+	struct reuseport_info info;					      \
+	info.sk = NULL;							      \
+	if (sk->sk_reuseport) {						      \
+		rcu_read_lock();					      \
+		if (__reuseport_get_info(sk, skb, hlen, &info) && !info.sk)   \
+			info.sk = __reuseport_select_sock(&info,	      \
+					 fn(net, daddr, hnum, saddr, sport)); \
+		rcu_read_unlock();					      \
+	}								      \
+	info.sk;							      \
+})
+
 extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
 					      struct bpf_prog *prog);
 
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index c5bb52bc73a1..8d66e66239a2 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -201,31 +201,30 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
 }
 
 /**
- *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
+ *  __reuseport_get_info - Retrieve information for reuseport socket selection
  *  @sk: First socket in the group.
- *  @hash: When no BPF filter is available, use this hash to select.
  *  @skb: skb to run through BPF filter.
  *  @hdr_len: BPF filter expects skb data pointer at payload data.  If
  *    the skb does not yet point at the payload, this parameter represents
  *    how far the pointer needs to advance to reach the payload.
- *  Returns a socket that should receive the packet (or NULL on error).
+ *  @info: reuseport information, filled only if return value is true
+ *  Returns true if @sk is a reuseport socket, and fill @info accordingly.
+ *  if @info.sk is NULL, the caller must retrieve the selected reuseport socket
+ *  calling __reuseport_select_sock(). The caller must hold the RCU lock.
  */
-struct sock *reuseport_select_sock(struct sock *sk,
-				   u32 hash,
-				   struct sk_buff *skb,
-				   int hdr_len)
+bool __reuseport_get_info(struct sock *sk, struct sk_buff *skb, int hdr_len,
+			  struct reuseport_info *info)
 {
 	struct sock_reuseport *reuse;
 	struct bpf_prog *prog;
-	struct sock *sk2 = NULL;
 	u16 socks;
 
-	rcu_read_lock();
+	info->sk = NULL;
 	reuse = rcu_dereference(sk->sk_reuseport_cb);
 
 	/* if memory allocation failed or add call is not yet complete */
 	if (!reuse)
-		goto out;
+		return false;
 
 	prog = rcu_dereference(reuse->prog);
 	socks = READ_ONCE(reuse->num_socks);
@@ -234,18 +233,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
 		smp_rmb();
 
 		if (prog && skb)
-			sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+			info->sk = run_bpf(reuse, socks, prog, skb, hdr_len);
 
-		/* no bpf or invalid bpf result: fall back to hash usage */
-		if (!sk2)
-			sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+		info->reuse = reuse;
+		info->socks = socks;
+		return true;
 	}
-
-out:
-	rcu_read_unlock();
-	return sk2;
+	return false;
 }
-EXPORT_SYMBOL(reuseport_select_sock);
+EXPORT_SYMBOL(__reuseport_get_info);
 
 struct bpf_prog *
 reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f58108b4c5..eed48aab05f5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -273,21 +273,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
 	struct inet_connection_sock *icsk;
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
-	u32 phash = 0;
 
 	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
 		sk = (struct sock *)icsk;
 		score = compute_score(sk, net, hnum, daddr,
 				      dif, sdif, exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet_ehashfn(net, daddr, hnum,
-						     saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
@@ -310,7 +306,6 @@ struct sock *__inet_lookup_listener(struct net *net,
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
 	unsigned int hash2;
-	u32 phash = 0;
 
 	if (ilb->count <= 10 || !hashinfo->lhash2)
 		goto port_lookup;
@@ -346,14 +341,11 @@ struct sock *__inet_lookup_listener(struct net *net,
 		score = compute_score(sk, net, hnum, daddr,
 				      dif, sdif, exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet_ehashfn(net, daddr, hnum,
-						     saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e9c0d1e1772e..8072755bb5fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -440,7 +440,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	int score, badness;
-	u32 hash = 0;
 
 	result = NULL;
 	badness = 0;
@@ -448,14 +447,12 @@ static struct sock *udp4_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp_ehashfn(net, daddr, hnum,
-						   saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			badness = score;
 			result = sk;
 		}
@@ -476,7 +473,6 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	bool exact_dif = udp_lib_exact_dif_match(net, skb);
 	int score, badness;
-	u32 hash = 0;
 
 	if (hslot->count > 10) {
 		hash2 = ipv4_portaddr_hash(net, daddr, hnum);
@@ -513,14 +509,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp_ehashfn(net, daddr, hnum,
-						   saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			badness = score;
 		}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2febe26de6a1..f6167e647672 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -136,21 +136,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
 	struct inet_connection_sock *icsk;
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
-	u32 phash = 0;
 
 	inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
 		sk = (struct sock *)icsk;
 		score = compute_score(sk, net, hnum, daddr, dif, sdif,
 				      exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet6_ehashfn(net, daddr, hnum,
-						      saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
@@ -173,7 +169,6 @@ struct sock *inet6_lookup_listener(struct net *net,
 	struct sock *sk, *result = NULL;
 	int score, hiscore = 0;
 	unsigned int hash2;
-	u32 phash = 0;
 
 	if (ilb->count <= 10 || !hashinfo->lhash2)
 		goto port_lookup;
@@ -208,14 +203,11 @@ struct sock *inet6_lookup_listener(struct net *net,
 	sk_for_each(sk, &ilb->head) {
 		score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
 		if (score > hiscore) {
-			if (sk->sk_reuseport) {
-				phash = inet6_ehashfn(net, daddr, hnum,
-						      saddr, sport);
-				result = reuseport_select_sock(sk, phash,
-							       skb, doff);
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net, doff,
+						       inet6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			hiscore = score;
 		}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index eecf9f0faf29..936c2a5c7147 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -169,7 +169,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 {
 	struct sock *sk, *result;
 	int score, badness;
-	u32 hash = 0;
 
 	result = NULL;
 	badness = -1;
@@ -177,15 +176,12 @@ static struct sock *udp6_lib_lookup2(struct net *net,
 		score = compute_score(sk, net, saddr, sport,
 				      daddr, hnum, dif, sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp6_ehashfn(net, daddr, hnum,
-						    saddr, sport);
-
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			badness = score;
 		}
@@ -206,7 +202,6 @@ struct sock *__udp6_lib_lookup(struct net *net,
 	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
 	bool exact_dif = udp6_lib_exact_dif_match(net, skb);
 	int score, badness;
-	u32 hash = 0;
 
 	if (hslot->count > 10) {
 		hash2 = ipv6_portaddr_hash(net, daddr, hnum);
@@ -244,14 +239,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
 		score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
 				      sdif, exact_dif);
 		if (score > badness) {
-			if (sk->sk_reuseport) {
-				hash = udp6_ehashfn(net, daddr, hnum,
-						    saddr, sport);
-				result = reuseport_select_sock(sk, hash, skb,
-							sizeof(struct udphdr));
-				if (result)
-					return result;
-			}
+			result = reuseport_select_sock(sk, skb, net,
+						       sizeof(struct udphdr),
+						       udp6_ehashfn, saddr,
+						       sport, daddr, hnum);
+			if (result)
+				return result;
 			result = sk;
 			badness = score;
 		}
-- 
2.14.3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ