[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20241015102940.26157-32-chia-yu.chang@nokia-bell-labs.com>
Date: Tue, 15 Oct 2024 12:29:27 +0200
From: chia-yu.chang@...ia-bell-labs.com
To: netdev@...r.kernel.org, ij@...nel.org, ncardwell@...gle.com,
koen.de_schepper@...ia-bell-labs.com, g.white@...leLabs.com,
ingemar.s.johansson@...csson.com, mirja.kuehlewind@...csson.com,
cheshire@...le.com, rs.ietf@....at, Jason_Livingood@...cast.com,
vidhi_goel@...le.com
Cc: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>,
Olivier Tilmans <olivier.tilmans@...ia.com>
Subject: [PATCH net-next 31/44] tcp: L4S ECT(1) identifier for CC modules
From: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
When ECN is successfully negociated for a TCP flow, it defaults to
always use ECT(0) in the IP header. L4S service, however, needs to
use ECT(1).
This patch enables congestion control algorithms to control whether
ECT(0) or ECT(1) should be used on a per-segment basis. A new
CA module flag (TCP_CONG_WANTS_ECT_1) defines the behavior
expected by the CA when not-yet initialized for the connection.
As such, it implicitely assumes that the CA also has the
TCP_CONG_NEEDS_ECN set.
Co-developed-by: Olivier Tilmans <olivier.tilmans@...ia.com>
Signed-off-by: Olivier Tilmans <olivier.tilmans@...ia.com>
Signed-off-by: Ilpo Järvinen <ij@...nel.org>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
---
include/net/inet_ecn.h | 20 +++++++++++++++++---
include/net/tcp.h | 8 ++++++++
net/ipv4/tcp_cong.c | 9 ++++++---
net/ipv4/tcp_output.c | 7 ++++---
4 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index ea32393464a2..3c64d32a32b0 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -51,11 +51,25 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
return outer;
}
+/* Apply either ECT(0) or ECT(1) */
+static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
+{
+ __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
+
+ /* Mask the complete byte in case the connection alternates between
+ * ECT(0) and ECT(1).
+ */
+ inet_sk(sk)->tos &= ~INET_ECN_MASK;
+ inet_sk(sk)->tos |= ect;
+ if (inet6_sk(sk) != NULL) {
+ inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+ inet6_sk(sk)->tclass |= ect;
+ }
+}
+
static inline void INET_ECN_xmit(struct sock *sk)
{
- inet_sk(sk)->tos |= INET_ECN_ECT_0;
- if (inet6_sk(sk) != NULL)
- inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+ __INET_ECN_xmit(sk, false);
}
static inline void INET_ECN_dontxmit(struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 822ae5ceb235..cecbec887508 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -426,6 +426,7 @@ static inline void tcp_dec_quickack_mode(struct sock *sk)
#define TCP_ECN_DEMAND_CWR BIT(2)
#define TCP_ECN_SEEN BIT(3)
#define TCP_ECN_MODE_ACCECN BIT(4)
+#define TCP_ECN_ECT_1 BIT(5)
#define TCP_ECN_DISABLED 0
#define TCP_ECN_MODE_PENDING (TCP_ECN_MODE_RFC3168|TCP_ECN_MODE_ACCECN)
@@ -1253,6 +1254,8 @@ enum tcp_ca_ack_event_flags {
#define TCP_CONG_NEEDS_ECN BIT(1)
/* Require successfully negotiated AccECN capability */
#define TCP_CONG_NEEDS_ACCECN BIT(2)
+/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */
+#define TCP_CONG_WANTS_ECT_1 (TCP_CONG_NEEDS_ECN | TCP_CONG_NEEDS_ACCECN)
#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \
TCP_CONG_NEEDS_ACCECN)
@@ -1394,6 +1397,11 @@ static inline bool tcp_ca_needs_accecn(const struct sock *sk)
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN;
}
+static inline bool tcp_ca_wants_ect_1(const struct sock *sk)
+{
+ return inet_csk(sk)->icsk_ca_ops->flags & TCP_CONG_WANTS_ECT_1;
+}
+
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 0306d257fa64..7be5fb14428b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -227,7 +227,7 @@ void tcp_assign_congestion_control(struct sock *sk)
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
else
INET_ECN_dontxmit(sk);
}
@@ -240,7 +240,10 @@ void tcp_init_congestion_control(struct sock *sk)
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
if (tcp_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ /* The CA is already initialized, expect it to set the
+ * appropriate flag to select ECT(1).
+ */
+ __INET_ECN_xmit(sk, tcp_sk(sk)->ecn_flags & TCP_ECN_ECT_1);
else
INET_ECN_dontxmit(sk);
icsk->icsk_ca_initialized = 1;
@@ -257,7 +260,7 @@ static void tcp_reinit_congestion_control(struct sock *sk,
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
else
INET_ECN_dontxmit(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 663cdea1b87b..ec10785f6d00 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -326,7 +326,7 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
@@ -366,7 +366,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
if (use_ecn) {
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
+ __INET_ECN_xmit(sk, tcp_ca_wants_ect_1(sk));
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
if (use_accecn) {
@@ -435,7 +435,8 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
return;
if (!tcp_accecn_ace_fail_recv(tp))
- INET_ECN_xmit(sk);
+ /* The CCA could change the ECT codepoint on the fly, reset it*/
+ __INET_ECN_xmit(sk, tp->ecn_flags & TCP_ECN_ECT_1);
if (tcp_ecn_mode_accecn(tp)) {
tcp_accecn_set_ace(tp, skb, th);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ACCECN;
--
2.34.1
Powered by blists - more mailing lists