[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87wo2vwxq6.fsf@cloudflare.com>
Date: Wed, 22 Jul 2020 14:17:05 +0200
From: Jakub Sitnicki <jakub@...udflare.com>
To: Stephen Rothwell <sfr@...b.auug.org.au>
Cc: Daniel Borkmann <daniel@...earbox.net>,
Alexei Starovoitov <ast@...nel.org>,
Networking <netdev@...r.kernel.org>,
David Miller <davem@...emloft.net>,
Linux Next Mailing List <linux-next@...r.kernel.org>,
"Linux Kernel Mailing List" <linux-kernel@...r.kernel.org>,
Kuniyuki Iwashima <kuniyu@...zon.co.jp>,
kernel-team@...udflare.com, Willem de Bruijn <willemb@...gle.com>
Subject: Re: linux-next: manual merge of the bpf-next tree with the net tree
On Wed, Jul 22, 2020 at 05:21 AM CEST, Stephen Rothwell wrote:
> Hi all,
>
> Today's linux-next merge of the bpf-next tree got conflicts in:
>
> net/ipv4/udp.c
> net/ipv6/udp.c
>
> between commit:
>
> efc6b6f6c311 ("udp: Improve load balancing for SO_REUSEPORT.")
>
> from the net tree and commits:
>
> 7629c73a1466 ("udp: Extract helper for selecting socket from reuseport group")
> 2a08748cd384 ("udp6: Extract helper for selecting socket from reuseport group")
>
> from the bpf-next tree.
>
> I fixed it up (I wasn't sure how to proceed, so I used the latter
> version) and can carry the fix as necessary. This is now fixed as far
> as linux-next is concerned, but any non trivial conflicts should be
> mentioned to your upstream maintainer when your tree is submitted for
> merging. You may also want to consider cooperating with the maintainer
> of the conflicting tree to minimise any particularly complex conflicts.
This one is a bit tricky.
Looking at how code in udp[46]_lib_lookup2 evolved, first:
acdcecc61285 ("udp: correct reuseport selection with connected sockets")
1) exluded connected UDP sockets from reuseport group during lookup, and
2) limited fast reuseport return to groups with no connected sockets,
The second change had an uninteded side-effect of discarding reuseport
socket selection when reuseport group contained connected sockets.
Then, recent
efc6b6f6c311 ("udp: Improve load balancing for SO_REUSEPORT.")
rectified it by recording reuseport socket selection as lookup result
candidate, in case fast reuseport return did not happen because
reuseport group had connected sockets.
I belive that changes in commit efc6b6f6c311 can be rewritten as below
to the same effect, by realizing that we are always setting the 'result'
if 'score > badness'. Either to what reuseport_select_sock() returned or
to 'sk' that scored higher than current 'badness' threshold.
---8<---
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
struct sock *sk, *result;
int score, badness;
u32 hash = 0;
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
result = NULL;
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result && !reuseport_has_conns(sk, false))
return result;
}
if (!result)
result = sk;
badness = score;
}
}
return result;
}
---8<---
>From there, it is now easier to resolve the conflict with
7629c73a1466 ("udp: Extract helper for selecting socket from reuseport group")
2a08748cd384 ("udp6: Extract helper for selecting socket from reuseport group")
which extract the 'if (sk->sk_reuseport && sk->sk_state !=
TCP_ESTABLISHED)' block into a helper called lookup_reuseport().
To merge the two, we need to pull the reuseport_has_conns() check up
from lookup_reuseport() and back into udp[46]_lib_lookup2(), because now
we want to record reuseport socket selection even if reuseport group has
connections.
The only other call site of lookup_reuseport() is in
udp[46]_lookup_run_bpf(). We don't want to discard the reuseport
selected socket if group has connections there either, so no changes are
needed. And, now that I think about it, the current behavior in
udp[46]_lookup_run_bpf() is not right.
The end result for udp4 will look like:
---8<---
static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
struct sk_buff *skb,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned short hnum)
{
struct sock *reuse_sk = NULL;
u32 hash;
if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
reuse_sk = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
}
return reuse_sk;
}
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
__be32 daddr, unsigned int hnum,
int dif, int sdif,
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
struct sock *sk, *result;
int score, badness;
result = NULL;
badness = 0;
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
result = lookup_reuseport(net, sk, skb,
saddr, sport, daddr, hnum);
if (result && !reuseport_has_conns(sk, false))
return result;
if (!result)
result = sk;
badness = score;
}
}
return result;
}
---8<---
I will submit a patch that pulls the reuseport_has_conns() check from
lookup_reuseport() to bpf-next. That should bring the two sides of the
merge closer. Please let me know if I can help in any other way.
Also, please take a look at the 3-way diff below from my attempt to
merge net tree into bpf-next tree taking the described approach.
Thanks,
-jkbs
--
diff --cc net/ipv4/udp.c
index b738c63d7a77,4077d589b72e..f5297ea376de
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -408,25 -408,6 +408,22 @@@ static u32 udp_ehashfn(const struct ne
udp_ehash_secret + net_hash_mix(net));
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ __be32 saddr, __be16 sport,
+ __be32 daddr, unsigned short hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
- /* Fall back to scoring if group has connections */
- if (reuseport_has_conns(sk, false))
- return NULL;
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
__be32 saddr, __be16 sport,
@@@ -444,13 -426,20 +441,13 @@@
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- reuseport_result = NULL;
-
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp_ehashfn(net, daddr, hnum,
- saddr, sport);
- reuseport_result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (reuseport_result && !reuseport_has_conns(sk, false))
- return reuseport_result;
- }
-
- result = reuseport_result ? : sk;
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
- if (result)
++ if (result && !reuseport_has_conns(sk, false))
+ return result;
-
++ if (!result)
++ result = sk;
badness = score;
- result = sk;
}
}
return result;
diff --cc net/ipv6/udp.c
index ff8be202726a,a8d74f44056a..ca50fcdf0776
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@@ -141,27 -141,6 +141,24 @@@ static int compute_score(struct sock *s
return score;
}
+static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ const struct in6_addr *saddr,
+ __be16 sport,
+ const struct in6_addr *daddr,
+ unsigned int hnum)
+{
+ struct sock *reuse_sk = NULL;
+ u32 hash;
+
+ if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) {
+ hash = udp6_ehashfn(net, daddr, hnum, saddr, sport);
+ reuse_sk = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
- /* Fall back to scoring if group has connections */
- if (reuseport_has_conns(sk, false))
- return NULL;
+ }
+ return reuse_sk;
+}
+
/* called with rcu_read_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
const struct in6_addr *saddr, __be16 sport,
@@@ -178,12 -158,20 +175,12 @@@
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif);
if (score > badness) {
- reuseport_result = NULL;
-
- if (sk->sk_reuseport &&
- sk->sk_state != TCP_ESTABLISHED) {
- hash = udp6_ehashfn(net, daddr, hnum,
- saddr, sport);
-
- reuseport_result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (reuseport_result && !reuseport_has_conns(sk, false))
- return reuseport_result;
- }
-
- result = reuseport_result ? : sk;
+ result = lookup_reuseport(net, sk, skb,
+ saddr, sport, daddr, hnum);
- if (result)
++ if (result && !reuseport_has_conns(sk, false))
+ return result;
-
- result = sk;
++ if (!result)
++ result = sk;
badness = score;
}
}
Powered by blists - more mailing lists