[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251030143435.13003-12-chia-yu.chang@nokia-bell-labs.com>
Date: Thu, 30 Oct 2025 15:34:32 +0100
From: chia-yu.chang@...ia-bell-labs.com
To: pabeni@...hat.com,
edumazet@...gle.com,
parav@...dia.com,
linux-doc@...r.kernel.org,
corbet@....net,
horms@...nel.org,
dsahern@...nel.org,
kuniyu@...gle.com,
bpf@...r.kernel.org,
netdev@...r.kernel.org,
dave.taht@...il.com,
jhs@...atatu.com,
kuba@...nel.org,
stephen@...workplumber.org,
xiyou.wangcong@...il.com,
jiri@...nulli.us,
davem@...emloft.net,
andrew+netdev@...n.ch,
donald.hunter@...il.com,
ast@...erby.net,
liuhangbin@...il.com,
shuah@...nel.org,
linux-kselftest@...r.kernel.org,
ij@...nel.org,
ncardwell@...gle.com,
koen.de_schepper@...ia-bell-labs.com,
g.white@...lelabs.com,
ingemar.s.johansson@...csson.com,
mirja.kuehlewind@...csson.com,
cheshire@...le.com,
rs.ietf@....at,
Jason_Livingood@...cast.com,
vidhi_goel@...le.com
Cc: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
Subject: [PATCH v5 net-next 11/14] tcp: accecn: unset ECT if receive or send ACE=0 in AccECN negotiaion
From: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
Based on specification:
https://tools.ietf.org/id/draft-ietf-tcpm-accurate-ecn-28.txt
Based on Section 3.1.5 of AccECN spec (RFC9768), a TCP Server in
AccECN mode MUST NOT set ECT on any packet for the rest of the connection,
if it has received or sent at least one valid SYN or Acceptable SYN/ACK
with (AE,CWR,ECE) = (0,0,0) during the handshake.
In addition, a host in AccECN mode that is feeding back the IP-ECN
field on a SYN or SYN/ACK MUST feed back the IP-ECN field on the
latest valid SYN or acceptable SYN/ACK to arrive.
Signed-off-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
---
include/net/tcp_ecn.h | 4 +++-
net/ipv4/tcp_input.c | 2 ++
net/ipv4/tcp_minisocks.c | 33 +++++++++++++++++++++++----------
net/ipv4/tcp_output.c | 8 +++++---
4 files changed, 33 insertions(+), 14 deletions(-)
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
index 99d095ed01b3..88a328e7bcde 100644
--- a/include/net/tcp_ecn.h
+++ b/include/net/tcp_ecn.h
@@ -649,7 +649,8 @@ static inline void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
}
static inline void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
+tcp_ecn_make_synack(struct sock *sk, const struct request_sock *req,
+ struct tcphdr *th)
{
if (!req->num_retrans || !req->num_timeout) {
if (tcp_rsk(req)->accecn_ok)
@@ -660,6 +661,7 @@ tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
th->ae = 0;
th->cwr = 0;
th->ece = 0;
+ tcp_accecn_fail_mode_set(tcp_sk(sk), TCP_ACCECN_ACE_FAIL_SEND);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6b10333fedd1..cc39056d446f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6213,6 +6213,8 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (th->syn) {
if (tcp_ecn_mode_accecn(tp)) {
accecn_reflector = true;
+ tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
+ INET_ECN_MASK;
if (tp->rx_opt.accecn &&
tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 512920b23968..4a9190df0668 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -749,16 +749,29 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
*/
if (!tcp_oow_rate_limited(sock_net(sk), skb,
LINUX_MIB_TCPACKSKIPPEDSYNRECV,
- &tcp_rsk(req)->last_oow_ack_time) &&
-
- !tcp_rtx_synack(sk, req)) {
- unsigned long expires = jiffies;
-
- expires += reqsk_timeout(req, TCP_RTO_MAX);
- if (!fastopen)
- mod_timer_pending(&req->rsk_timer, expires);
- else
- req->rsk_timer.expires = expires;
+ &tcp_rsk(req)->last_oow_ack_time)) {
+ if (tcp_rsk(req)->accecn_ok) {
+ u8 ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &
+ INET_ECN_MASK;
+
+ tcp_rsk(req)->syn_ect_rcv = ect_rcv;
+ if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
+ u8 fail_mode = TCP_ACCECN_ACE_FAIL_RECV;
+
+ tcp_accecn_fail_mode_set(tcp_sk(sk),
+ fail_mode);
+ }
+ }
+ if (!tcp_rtx_synack(sk, req)) {
+ unsigned long expires = jiffies;
+
+ expires += reqsk_timeout(req, TCP_RTO_MAX);
+ if (!fastopen)
+ mod_timer_pending(&req->rsk_timer,
+ expires);
+ else
+ req->rsk_timer.expires = expires;
+ }
}
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 37c04da4cfb1..d52229d32b4d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -334,11 +334,13 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
return;
ecn_ect_1 = tp->ecn_flags & TCP_ECN_ECT_1;
- if (ecn_ect_1 && !tcp_accecn_ace_fail_recv(tp))
+ if (ecn_ect_1 && !tcp_accecn_ace_fail_recv(tp) &&
+ !tcp_accecn_ace_fail_send(tp))
__INET_ECN_xmit(sk, true);
if (tcp_ecn_mode_accecn(tp)) {
- if (!ecn_ect_1 && !tcp_accecn_ace_fail_recv(tp))
+ if (!ecn_ect_1 && !tcp_accecn_ace_fail_recv(tp) &&
+ !tcp_accecn_ace_fail_send(tp))
INET_ECN_xmit(sk);
tcp_accecn_set_ace(tp, skb, th);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN;
@@ -4006,7 +4008,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
- tcp_ecn_make_synack(req, th);
+ tcp_ecn_make_synack((struct sock *)sk, req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
skb->mark = ireq->ir_mark;
--
2.34.1
Powered by blists - more mailing lists