[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250224110654.707639-1-edumazet@google.com>
Date: Mon, 24 Feb 2025 11:06:53 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Neal Cardwell <ncardwell@...gle.com>
Cc: Kuniyuki Iwashima <kuniyu@...zon.com>, Simon Horman <horms@...nel.org>, Florian Westphal <fw@...len.de>,
netdev@...r.kernel.org, eric.dumazet@...il.com,
Eric Dumazet <edumazet@...gle.com>, Yong-Hao Zou <yonghaoz1994@...il.com>
Subject: [PATCH net-next] tcp: be less liberal in tsecr received while in
SYN_RECV state
Yong-Hao Zou mentioned that linux was not strict as other OS in 3WHS,
for flows using TCP TS option (RFC 7323)
As hinted by an old comment in tcp_check_req(),
we can check the TSecr value in the incoming packet corresponds
to one of the SYNACK TSval values we have sent.
In this patch, I record the oldest and most recent values
that SYNACK packets have used.
Send a challenge ACK if we receive a TSecr outside
of this range, and increase a new SNMP counter.
nstat -az | grep TcpExtTSECR_Rejected
TcpExtTSECR_Rejected 0 0.0
Reported-by: Yong-Hao Zou <yonghaoz1994@...il.com>
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
.../networking/net_cachelines/snmp.rst | 1 +
include/linux/tcp.h | 2 ++
include/uapi/linux/snmp.h | 1 +
net/ipv4/proc.c | 1 +
net/ipv4/tcp_minisocks.c | 25 +++++++++++--------
net/ipv4/tcp_output.c | 3 +++
6 files changed, 22 insertions(+), 11 deletions(-)
diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst
index 90ca2d92547d44fa5b4d28cb9d00820662c3f0fd..bc96efc92cf5b888c1e441412c78f3974be1f587 100644
--- a/Documentation/networking/net_cachelines/snmp.rst
+++ b/Documentation/networking/net_cachelines/snmp.rst
@@ -36,6 +36,7 @@ unsigned_long LINUX_MIB_TIMEWAITRECYCLED
unsigned_long LINUX_MIB_TIMEWAITKILLED
unsigned_long LINUX_MIB_PAWSACTIVEREJECTED
unsigned_long LINUX_MIB_PAWSESTABREJECTED
+unsigned_long LINUX_MIB_TSECR_REJECTED
unsigned_long LINUX_MIB_DELAYEDACKLOST
unsigned_long LINUX_MIB_LISTENOVERFLOWS
unsigned_long LINUX_MIB_LISTENDROPS
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f88daaa76d836654b2a2e217d0d744d3713d368e..159b2c59eb6271030dc2c8d58b43229ebef10ea5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -160,6 +160,8 @@ struct tcp_request_sock {
u32 rcv_isn;
u32 snt_isn;
u32 ts_off;
+ u32 snt_tsval_first;
+ u32 snt_tsval_last;
u32 last_oow_ack_time; /* last SYNACK */
u32 rcv_nxt; /* the ack # by SYNACK. For
* FastOpen it's the seq#
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 848c7784e684c03bdf743e42594317f3d889d83f..b85dd84dda5c13471e2f62c3a4ffb11b22f787f8 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -186,6 +186,7 @@ enum
LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */
LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */
LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */
+ LINUX_MIB_TSECR_REJECTED, /* TSECR_Rejected */
LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */
LINUX_MIB_DELAYEDACKS, /* DelayedACKs */
LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index affd21a0f57281947f88c6563be3d99aae613baf..2f0d2cf7cae45d824f8c506df3f83c175e794a0a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED),
SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED),
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
+ SNMP_MIB_ITEM("TSECR_Rejected", LINUX_MIB_TSECR_REJECTED),
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1eccc518b957eb9b81cab8b288cb6a5bca931e5a..a87ab5c693b524aa6a324afe5bf5ff0498e528cc 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -663,6 +663,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct sock *child;
const struct tcphdr *th = tcp_hdr(skb);
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+ bool tsecr_reject = false;
bool paws_reject = false;
bool own_req;
@@ -672,8 +673,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
- if (tmp_opt.rcv_tsecr)
+ if (tmp_opt.rcv_tsecr) {
+ tsecr_reject = !between(tmp_opt.rcv_tsecr,
+ tcp_rsk(req)->snt_tsval_first,
+ READ_ONCE(tcp_rsk(req)->snt_tsval_last));
tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
+ }
/* We do not store true stamp, but it is not required,
* it can be estimated (approximately)
* from another data.
@@ -788,18 +793,14 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->snt_isn + 1))
return sk;
- /* Also, it would be not so bad idea to check rcv_tsecr, which
- * is essentially ACK extension and too early or too late values
- * should cause reset in unsynchronized states.
- */
-
/* RFC793: "first check sequence number". */
- if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt,
- tcp_rsk(req)->rcv_nxt +
- tcp_synack_window(req))) {
+ if (paws_reject || tsecr_reject ||
+ !tcp_in_window(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(skb)->end_seq,
+ tcp_rsk(req)->rcv_nxt,
+ tcp_rsk(req)->rcv_nxt +
+ tcp_synack_window(req))) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST) &&
!tcp_oow_rate_limited(sock_net(sk), skb,
@@ -808,6 +809,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
req->rsk_ops->send_ack(sk, skb, req);
if (paws_reject)
NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ else if (tsecr_reject)
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TSECR_REJECTED);
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a3cf51eab787859ec82432ee6eb9f94e709b292..485ca131091e58616b4f3076acc2ad7a478de89d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -943,6 +943,9 @@ static unsigned int tcp_synack_options(const struct sock *sk,
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) +
tcp_rsk(req)->ts_off;
+ if (!req->num_timeout)
+ tcp_rsk(req)->snt_tsval_first = opts->tsval;
+ WRITE_ONCE(tcp_rsk(req)->snt_tsval_last, opts->tsval);
opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
--
2.48.1.601.g30ceb7b040-goog
Powered by blists - more mailing lists