[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250417230029.21905-16-chia-yu.chang@nokia-bell-labs.com>
Date: Fri, 18 Apr 2025 01:00:29 +0200
From: chia-yu.chang@...ia-bell-labs.com
To: dsahern@...nel.org,
kuniyu@...zon.com,
bpf@...r.kernel.org,
netdev@...r.kernel.org,
dave.taht@...il.com,
pabeni@...hat.com,
jhs@...atatu.com,
kuba@...nel.org,
stephen@...workplumber.org,
xiyou.wangcong@...il.com,
jiri@...nulli.us,
davem@...emloft.net,
edumazet@...gle.com,
horms@...nel.org,
andrew+netdev@...n.ch,
donald.hunter@...il.com,
ast@...erby.net,
liuhangbin@...il.com,
shuah@...nel.org,
linux-kselftest@...r.kernel.org,
ij@...nel.org,
ncardwell@...gle.com,
koen.de_schepper@...ia-bell-labs.com,
g.white@...lelabs.com,
ingemar.s.johansson@...csson.com,
mirja.kuehlewind@...csson.com,
cheshire@...le.com,
rs.ietf@....at,
Jason_Livingood@...cast.com,
vidhi_goel@...le.com
Cc: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
Subject: [PATCH v4 net-next 15/15] tcp: try to avoid safer when ACKs are thinned
From: Ilpo Järvinen <ij@...nel.org>
Add newly acked pkts EWMA. When ACK thinning occurs, select
between safer and unsafe cep delta in AccECN processing based
on it. If the packets ACKed per ACK tends to be large, don't
conservatively assume ACE field overflow.
Signed-off-by: Ilpo Järvinen <ij@...nel.org>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@...ia-bell-labs.com>
---
include/linux/tcp.h | 1 +
net/ipv4/tcp.c | 4 +++-
net/ipv4/tcp_input.c | 20 +++++++++++++++++++-
3 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 782e4dd58bf7..230f55b22a51 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -313,6 +313,7 @@ struct tcp_sock {
prev_ecnfield:2,/* ECN bits from the previous segment */
accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
est_ecnfield:2;/* ECN field for AccECN delivered estimates */
+ u16 pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */
u64 accecn_opt_tstamp; /* Last AccECN option sent timestamp */
u32 app_limited; /* limited until "delivered" reaches this val */
u32 rcv_wnd; /* Current receiver window */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8e3582c1b5bb..56b6467df18b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3371,6 +3371,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_accecn_init_counters(tp);
tp->prev_ecnfield = 0;
tp->accecn_opt_tstamp = 0;
+ tp->pkts_acked_ewma = 0;
if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
icsk->icsk_ca_ops->release(sk);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
@@ -5109,6 +5110,7 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ecn_bytes);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pkts_acked_ewma);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, accecn_opt_tstamp);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
@@ -5117,7 +5119,7 @@ static void __init tcp_struct_check(void)
/* 32bit arches with 8byte alignment on u64 fields might need padding
* before tcp_clock_cache.
*/
- CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 130 + 6);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 132 + 8);
/* RX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c6dac3c2d47a..5bdd82d3c201 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -689,6 +689,10 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 delivered,
tcp_count_delivered_ce(tp, delivered);
}
+#define PKTS_ACKED_WEIGHT 6
+#define PKTS_ACKED_PREC 6
+#define ACK_COMP_THRESH 4
+
/* Returns the ECN CE delta */
static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
u32 delivered_pkts, u32 delivered_bytes,
@@ -708,6 +712,19 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
opt_deltas_valid = tcp_accecn_process_option(tp, skb,
delivered_bytes, flag);
+ if (delivered_pkts) {
+ if (!tp->pkts_acked_ewma) {
+ tp->pkts_acked_ewma = delivered_pkts << PKTS_ACKED_PREC;
+ } else {
+ u32 ewma = tp->pkts_acked_ewma;
+
+ ewma = (((ewma << PKTS_ACKED_WEIGHT) - ewma) +
+ (delivered_pkts << PKTS_ACKED_PREC)) >>
+ PKTS_ACKED_WEIGHT;
+ tp->pkts_acked_ewma = min_t(u32, ewma, 0xFFFFU);
+ }
+ }
+
if (!(flag & FLAG_SLOWPATH)) {
/* AccECN counter might overflow on large ACKs */
if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK)
@@ -756,7 +773,8 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
if (d_ceb <
safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT)
return delta;
- }
+ } else if (tp->pkts_acked_ewma > (ACK_COMP_THRESH << PKTS_ACKED_PREC))
+ return delta;
return safe_delta;
}
--
2.34.1
Powered by blists - more mailing lists