[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220729161932.2543584-1-marek@cloudflare.com>
Date: Fri, 29 Jul 2022 18:19:32 +0200
From: Marek Majkowski <marek@...udflare.com>
To: netdev@...r.kernel.org
Cc: kernel-team@...udflare.com, edumazet@...gle.com,
davem@...emloft.net, yoshfuji@...ux-ipv6.org, pabeni@...hat.com,
kuba@...nel.org, Marek Majkowski <marek@...udflare.com>
Subject: [PATCH net-next] When using syscookies initial receive window calculation uses wrong MSS
When using syncookies we get the `mss` variable from the two bits
encoded in the ACK number. This MSS though, represents the
client/sender MSS.
When calculating initial receive window with
`tcp_select_initial_window`, we feed it MSS so that the receive
buffer can be MSS aligned. But this MSS is supposed to be our
receiver/server advertised MSS, not MSS from the remote
party. The code got it wrong.
Usually it's not a big deal but could yield weird/wrong
rcv_ssthresh initial values. Consider this:
$ sysctl net.ipv4.tcp_syncookies=2
$ ip route change local 127.0.0.1/32 dev lo initrwnd 1
$ ip -6 route change local ::1 dev lo initrwnd 1
Flags [S], seq #, win 32767, options [mss 65495,nop,nop,sackOK,nop,wscale 10], length 0
Flags [S.], seq #, ack #, win 128, options [mss 128], length 0
Flags [.], ack 1, win 32767, length 0
> client_tcpi_snd_wnd:128 server_tcpi_rcv_ssthresh:128
Flags [P.], seq 1:2, ack 1, win 536, length 1
Flags [.], ack 2, win 32767, length 0
> client_snd_wnd:536 server_rcv_ssthresh:128
After the handshake we see the client reporting send window=128
and this is visible as tcpi_snd_wnd on client and rcv_ssthresh on
server. Then, after 1 byte exchange, to get an ACK from server,
we see the window to surprisingly grow to 536 bytes. This is
surprising, since the server window should not be related to
syncookie-derived MSS from the client. What is even more
surprising, the window is 536 but the rcv_ssthresh on server is
still 128.
Flags [S], seq #, win 32767, options [mss 65476,nop,nop,sackOK,nop,wscale 10], length 0
Flags [S.], seq #, ack #, win 128, options [mss 128], length 0
Flags [.], ack 1, win 32767, length 0
> client_snd_wnd:128 server_rcv_ssthresh:128
Flags [P.], seq 1:2, ack 1, win 1220, length 1
Flags [.], ack 2, win 32767, length 0
> client_tcpi_snd_wnd:1220 server_tcpi_rcv_ssthresh:128
Identical situation for IPv6.
After the patch we get more sensible values, and sender window is
kept in sync with rcv_ssthresh:
Flags [S], seq #, win 65495, options [mss 65495,nop,nop,sackOK,nop,wscale 4], length 0
Flags [S.], seq #, ack #, win 128, options [mss 128], length 0
Flags [.], ack 1, win 65495, length 0
> client_snd_wnd:128 server_rcv_ssthresh:128
Flags [P.], seq 1:2, ack 1, win 128, length 1
Flags [.], ack 2, win 65494, length 0
> client_snd_wnd:128 server_rcv_ssthresh:128
Flags [S], seq #, win 65476, options [mss 65476,nop,nop,sackOK,nop,wscale 4], length 0
Flags [S.], seq #, ack #, win 128, options [mss 128], length 0
Flags [.], ack 1, win 65476, length 0
> client_snd_wnd:128 server_rcv_ssthresh:128
Flags [P.], seq 1:2, ack 1, win 128, length 1
Flags [.], ack 2, win 65475, length 0
> client_snd_wnd:128 server_rcv_ssthresh:128
Signed-off-by: Marek Majkowski <marek@...udflare.com>
---
net/ipv4/syncookies.c | 7 ++++++-
net/ipv6/syncookies.c | 7 ++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 942d2dfa1115..c6701c490528 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -338,6 +338,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
+ u32 adj_mss;
+ u32 advmss;
u32 tsoff = 0;
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
@@ -430,6 +432,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
}
+ advmss = tcp_mss_clamp(tp, dst_metric_advmss(&rt->dst));
+ adj_mss = advmss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
@@ -438,7 +443,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
req->rsk_window_clamp = full_space;
- tcp_select_initial_window(sk, full_space, req->mss,
+ tcp_select_initial_window(sk, full_space, adj_mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5014aa663452..8175043c0bb9 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -139,6 +139,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int full_space, mss;
struct dst_entry *dst;
__u8 rcv_wscale;
+ u32 adj_mss;
+ u32 advmss;
u32 tsoff = 0;
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
@@ -249,7 +251,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
(req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
req->rsk_window_clamp = full_space;
- tcp_select_initial_window(sk, full_space, req->mss,
+ advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
+ adj_mss = advmss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0);
+
+ tcp_select_initial_window(sk, full_space, adj_mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
--
2.25.1
Powered by blists - more mailing lists