[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250417230029.21905-6-chia-yu.chang@nokia-bell-labs.com>
Date: Fri, 18 Apr 2025 01:00:19 +0200
From: chia-yu.chang@...ia-bell-labs.com
To: dsahern@...nel.org,
kuniyu@...zon.com,
bpf@...r.kernel.org,
netdev@...r.kernel.org,
dave.taht@...il.com,
pabeni@...hat.com,
jhs@...atatu.com,
kuba@...nel.org,
stephen@...workplumber.org,
xiyou.wangcong@...il.com,
jiri@...nulli.us,
davem@...emloft.net,
edumazet@...gle.com,
horms@...nel.org,
andrew+netdev@...n.ch,
donald.hunter@...il.com,
ast@...erby.net,
liuhangbin@...il.com,
shuah@...nel.org,
linux-kselftest@...r.kernel.org,
ij@...nel.org,
ncardwell@...gle.com,
koen.de_schepper@...ia-bell-labs.com,
g.white@...lelabs.com,
ingemar.s.johansson@...csson.com,
mirja.kuehlewind@...csson.com,
cheshire@...le.com,
rs.ietf@....at,
Jason_Livingood@...cast.com,
vidhi_goel@...le.com
Cc: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
Subject: [PATCH v4 net-next 05/15] tcp: accecn: add AccECN rx byte counters
From: Ilpo Järvinen <ij@...nel.org>
These counters track IP ECN field payload byte sums for all
arriving (acceptable) packets. The AccECN option (added by
a later patch in the series) echoes these counters back to
sender side.
Signed-off-by: Ilpo Järvinen <ij@...nel.org>
Signed-off-by: Neal Cardwell <ncardwell@...gle.com>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
---
include/linux/tcp.h | 1 +
include/net/tcp.h | 18 +++++++++++++++++-
net/ipv4/tcp.c | 3 ++-
net/ipv4/tcp_input.c | 13 +++++++++----
net/ipv4/tcp_minisocks.c | 3 ++-
5 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index af38fff24aa4..9cbfefd693e3 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -303,6 +303,7 @@ struct tcp_sock {
u32 delivered; /* Total data packets delivered incl. rexmits */
u32 delivered_ce; /* Like the above but only ECE marked packets */
u32 received_ce; /* Like the above but for rcvd CE marked pkts */
+ u32 received_ecn_bytes[3];
u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce */
unused2:4;
u32 app_limited; /* limited until "delivered" reaches this val */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f36a1a3d538f..6ffa4ae085db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -467,7 +467,8 @@ static inline int tcp_accecn_extract_syn_ect(u8 ace)
bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, u8 sent_ect);
void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
u8 syn_ect_snt);
-void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb);
+void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
+ u32 payload_len);
enum tcp_tw_status {
TCP_TW_SUCCESS = 0,
@@ -1035,11 +1036,26 @@ static inline u32 tcp_rsk_tsval(const struct tcp_request_sock *treq)
* See draft-ietf-tcpm-accurate-ecn for the latest values.
*/
#define TCP_ACCECN_CEP_INIT_OFFSET 5
+#define TCP_ACCECN_E1B_INIT_OFFSET 1
+#define TCP_ACCECN_E0B_INIT_OFFSET 1
+#define TCP_ACCECN_CEB_INIT_OFFSET 0
+
+static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
+{
+ BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
+ BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
+ BUILD_BUG_ON(INET_ECN_CE != 0x3);
+
+ counter_array[INET_ECN_ECT_1 - 1] = 0;
+ counter_array[INET_ECN_ECT_0 - 1] = 0;
+ counter_array[INET_ECN_CE - 1] = 0;
+}
static inline void tcp_accecn_init_counters(struct tcp_sock *tp)
{
tp->received_ce = 0;
tp->received_ce_pending = 0;
+ __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
}
/* State flags for sacked in struct tcp_skb_cb */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 73f8cc715bff..1e21bdf43f23 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5092,6 +5092,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
@@ -5099,7 +5100,7 @@ static void __init tcp_struct_check(void)
/* 32bit arches with 8byte alignment on u64 fields might need padding
* before tcp_clock_cache.
*/
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 97 + 7);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 109 + 7);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc34664805f8..c017e342f092 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6106,7 +6106,8 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t
}
/* Updates Accurate ECN received counters from the received IP ECN field */
-void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb)
+void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
+ u32 payload_len)
{
u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
u8 is_ce = INET_ECN_is_ce(ecnfield);
@@ -6121,6 +6122,9 @@ void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb)
tp->received_ce += pcount;
tp->received_ce_pending = min(tp->received_ce_pending + pcount,
0xfU);
+
+ if (payload_len > 0)
+ tp->received_ecn_bytes[ecnfield - 1] += payload_len;
}
}
@@ -6398,7 +6402,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
flag |= __tcp_replace_ts_recent(tp,
delta);
- tcp_ecn_received_counters(sk, skb);
+ tcp_ecn_received_counters(sk, skb, 0);
/* We know that such packets are checksummed
* on entry.
@@ -6444,7 +6448,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
/* Bulk data transfer: receiver */
tcp_cleanup_skb(skb);
__skb_pull(skb, tcp_header_len);
- tcp_ecn_received_counters(sk, skb);
+ tcp_ecn_received_counters(sk, skb,
+ len - tcp_header_len);
eaten = tcp_queue_rcv(sk, skb, &fragstolen);
tcp_event_data_recv(sk, skb);
@@ -6491,7 +6496,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
tcp_accecn_third_ack(sk, skb, tp->syn_ect_snt);
tcp_fast_path_on(tp);
}
- tcp_ecn_received_counters(sk, skb);
+ tcp_ecn_received_counters(sk, skb, len - th->doff * 4);
reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
if ((int)reason < 0) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 779a206a5ca6..3f8225bae49f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -497,10 +497,11 @@ static void tcp_ecn_openreq_child(struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
if (treq->accecn_ok) {
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_snt = treq->syn_ect_snt;
tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
- tcp_ecn_received_counters(sk, skb);
+ tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
} else {
tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
TCP_ECN_MODE_RFC3168 :
--
2.34.1
Powered by blists - more mailing lists