[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date: Tue, 12 Dec 2017 14:09:28 +0100
From: Paolo Abeni <pabeni@...hat.com>
To: netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
Craig Gallek <kraig@...gle.com>,
Eric Dumazet <edumazet@...gle.com>
Subject: [RFC PATCH] reuseport: compute the ehash only if needed
When a reuseport socket group is using a BPF filter to distribute
the packets among the sockets, we don't need to compute any hash
value, but the current reuseport_select_sock() requires the
caller to compute such hash in advance.
This patch reworks reuseport_select_sock() to compute the hash value
only if needed - missing or failing BPF filter. Since different
hash functions have different argument types - ipv4 addresses vs ipv6
ones - to avoid over-complicate the interface, reuseport_select_sock()
is now a macro.
Additionally, the sk_reuseport test is move inside reuseport_select_sock,
to avoid some code duplication.
Overall this gives small but measurable performance improvement
under UDP flood while using SO_REUSEPORT + BPF.
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
include/net/sock_reuseport.h | 32 ++++++++++++++++++++++++++++----
net/core/sock_reuseport.c | 34 +++++++++++++++-------------------
net/ipv4/inet_hashtables.c | 28 ++++++++++------------------
net/ipv4/udp.c | 30 ++++++++++++------------------
net/ipv6/inet6_hashtables.c | 28 ++++++++++------------------
net/ipv6/udp.c | 31 ++++++++++++-------------------
6 files changed, 87 insertions(+), 96 deletions(-)
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index 0054b3a9b923..e7d71d22dca7 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,13 +16,37 @@ struct sock_reuseport {
struct sock *socks[0]; /* array of sock pointers */
};
+struct reuseport_info {
+ struct sock_reuseport *reuse;
+ struct sock *sk;
+ u16 socks;
+};
+
extern int reuseport_alloc(struct sock *sk);
extern int reuseport_add_sock(struct sock *sk, struct sock *sk2);
extern void reuseport_detach_sock(struct sock *sk);
-extern struct sock *reuseport_select_sock(struct sock *sk,
- u32 hash,
- struct sk_buff *skb,
- int hdr_len);
+bool __reuseport_get_info(struct sock *sk, struct sk_buff *skb, int hdr_len,
+ struct reuseport_info *info);
+static inline struct sock *__reuseport_select_sock(struct reuseport_info *info,
+ u32 hash)
+{
+ return info->reuse->socks[reciprocal_scale(hash, info->socks)];
+}
+
+#define reuseport_select_sock(sk, skb, net, hlen, fn, saddr, sport, daddr, dport) \
+({ \
+ struct reuseport_info info; \
+ info.sk = NULL; \
+ if (sk->sk_reuseport) { \
+ rcu_read_lock(); \
+ if (__reuseport_get_info(sk, skb, hlen, &info) && !info.sk) \
+ info.sk = __reuseport_select_sock(&info, \
+ fn(net, daddr, hnum, saddr, sport)); \
+ rcu_read_unlock(); \
+ } \
+ info.sk; \
+})
+
extern struct bpf_prog *reuseport_attach_prog(struct sock *sk,
struct bpf_prog *prog);
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index c5bb52bc73a1..8d66e66239a2 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -201,31 +201,30 @@ static struct sock *run_bpf(struct sock_reuseport *reuse, u16 socks,
}
/**
- * reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
+ * __reuseport_get_info - Retrieve information for reuseport socket selection
* @sk: First socket in the group.
- * @hash: When no BPF filter is available, use this hash to select.
* @skb: skb to run through BPF filter.
* @hdr_len: BPF filter expects skb data pointer at payload data. If
* the skb does not yet point at the payload, this parameter represents
* how far the pointer needs to advance to reach the payload.
- * Returns a socket that should receive the packet (or NULL on error).
+ * @info: reuseport information, filled only if return value is true
+ * Returns true if @sk is a reuseport socket, and fill @info accordingly.
+ * if @info.sk is NULL, the caller must retrieve the selected reuseport socket
+ * calling __reuseport_select_sock(). The caller must hold the RCU lock.
*/
-struct sock *reuseport_select_sock(struct sock *sk,
- u32 hash,
- struct sk_buff *skb,
- int hdr_len)
+bool __reuseport_get_info(struct sock *sk, struct sk_buff *skb, int hdr_len,
+ struct reuseport_info *info)
{
struct sock_reuseport *reuse;
struct bpf_prog *prog;
- struct sock *sk2 = NULL;
u16 socks;
- rcu_read_lock();
+ info->sk = NULL;
reuse = rcu_dereference(sk->sk_reuseport_cb);
/* if memory allocation failed or add call is not yet complete */
if (!reuse)
- goto out;
+ return false;
prog = rcu_dereference(reuse->prog);
socks = READ_ONCE(reuse->num_socks);
@@ -234,18 +233,15 @@ struct sock *reuseport_select_sock(struct sock *sk,
smp_rmb();
if (prog && skb)
- sk2 = run_bpf(reuse, socks, prog, skb, hdr_len);
+ info->sk = run_bpf(reuse, socks, prog, skb, hdr_len);
- /* no bpf or invalid bpf result: fall back to hash usage */
- if (!sk2)
- sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+ info->reuse = reuse;
+ info->socks = socks;
+ return true;
}
-
-out:
- rcu_read_unlock();
- return sk2;
+ return false;
}
-EXPORT_SYMBOL(reuseport_select_sock);
+EXPORT_SYMBOL(__reuseport_get_info);
struct bpf_prog *
reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f6f58108b4c5..eed48aab05f5 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -273,21 +273,17 @@ static struct sock *inet_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net, doff,
+ inet_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
hiscore = score;
}
@@ -310,7 +306,6 @@ struct sock *__inet_lookup_listener(struct net *net,
struct sock *sk, *result = NULL;
int score, hiscore = 0;
unsigned int hash2;
- u32 phash = 0;
if (ilb->count <= 10 || !hashinfo->lhash2)
goto port_lookup;
@@ -346,14 +341,11 @@ struct sock *__inet_lookup_listener(struct net *net,
score = compute_score(sk, net, hnum, daddr,
dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net, doff,
+ inet_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
hiscore = score;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e9c0d1e1772e..8072755bb5fc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -440,7 +440,6 @@ static struct sock *udp4_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = 0;
@@ -448,14 +447,12 @@ static struct sock *udp4_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net,
+ sizeof(struct udphdr),
+ udp_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
badness = score;
result = sk;
}
@@ -476,7 +473,6 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp_lib_exact_dif_match(net, skb);
int score, badness;
- u32 hash = 0;
if (hslot->count > 10) {
hash2 = ipv4_portaddr_hash(net, daddr, hnum);
@@ -513,14 +509,12 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net,
+ sizeof(struct udphdr),
+ udp_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
badness = score;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 2febe26de6a1..f6167e647672 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -136,21 +136,17 @@ static struct sock *inet6_lhash2_lookup(struct net *net,
struct inet_connection_sock *icsk;
struct sock *sk, *result = NULL;
int score, hiscore = 0;
- u32 phash = 0;
inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
sk = (struct sock *)icsk;
score = compute_score(sk, net, hnum, daddr, dif, sdif,
exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net, doff,
+ inet6_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
hiscore = score;
}
@@ -173,7 +169,6 @@ struct sock *inet6_lookup_listener(struct net *net,
struct sock *sk, *result = NULL;
int score, hiscore = 0;
unsigned int hash2;
- u32 phash = 0;
if (ilb->count <= 10 || !hashinfo->lhash2)
goto port_lookup;
@@ -208,14 +203,11 @@ struct sock *inet6_lookup_listener(struct net *net,
sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
- if (sk->sk_reuseport) {
- phash = inet6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, phash,
- skb, doff);
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net, doff,
+ inet6_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
hiscore = score;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index eecf9f0faf29..936c2a5c7147 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -169,7 +169,6 @@ static struct sock *udp6_lib_lookup2(struct net *net,
{
struct sock *sk, *result;
int score, badness;
- u32 hash = 0;
result = NULL;
badness = -1;
@@ -177,15 +176,12 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
-
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net,
+ sizeof(struct udphdr),
+ udp6_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
badness = score;
}
@@ -206,7 +202,6 @@ struct sock *__udp6_lib_lookup(struct net *net,
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
int score, badness;
- u32 hash = 0;
if (hslot->count > 10) {
hash2 = ipv6_portaddr_hash(net, daddr, hnum);
@@ -244,14 +239,12 @@ struct sock *__udp6_lib_lookup(struct net *net,
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
sdif, exact_dif);
if (score > badness) {
- if (sk->sk_reuseport) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result)
- return result;
- }
+ result = reuseport_select_sock(sk, skb, net,
+ sizeof(struct udphdr),
+ udp6_ehashfn, saddr,
+ sport, daddr, hnum);
+ if (result)
+ return result;
result = sk;
badness = score;
}
--
2.14.3
Powered by blists - more mailing lists