[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240715033118.32322-1-kerneljasonxing@gmail.com>
Date: Mon, 15 Jul 2024 11:31:18 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
davem@...emloft.net,
dsahern@...nel.org,
ncardwell@...gle.com,
corbet@....net
Cc: netdev@...r.kernel.org,
kerneljasonxing@...il.com,
Jason Xing <kernelxing@...cent.com>
Subject: [PATCH net-next] tcp: introduce rto_max_us sysctl knob
From: Jason Xing <kernelxing@...cent.com>
As we all know, the algorithm of rto is exponential backoff as RFC
defined long time ago. After several rounds of repeatedly transmitting
a lost skb, the expiry of rto timer could reach above 1 second within
the upper bound (120s).
Waiting more than one second to retransmit for some latency-sensitive
application is a little bit unacceptable nowadays, so I decided to
introduce a sysctl knob to allow users to tune it. Still, the maximum
value is 120 seconds.
Signed-off-by: Jason Xing <kernelxing@...cent.com>
---
Documentation/networking/ip-sysctl.rst | 10 ++++++++++
include/net/inet_connection_sock.h | 10 +++++++++-
include/net/netns/ipv4.h | 1 +
net/ipv4/sysctl_net_ipv4.c | 8 ++++++++
net/ipv4/tcp_ipv4.c | 1 +
5 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 3616389c8c2d..32a1907ca95d 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1223,6 +1223,16 @@ tcp_rto_min_us - INTEGER
Default: 200000
+tcp_rto_max_us - INTEGER
+ Maximum TCP retransmission timeout (in microseconds).
+
+ The recommended practice is to use a value less or equal to 120000000
+ microseconds.
+
+ Possible Values: 1 - INT_MAX
+
+ Default: 120000000
+
UDP variables
=============
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c0deaafebfdc..a0abbafcab9e 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -217,10 +217,18 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
*/
static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
unsigned long when,
- const unsigned long max_when)
+ unsigned long max_when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ if (what == ICSK_TIME_RETRANS) {
+ int rto_max_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_us);
+ unsigned int rto_max = usecs_to_jiffies(rto_max_us);
+
+ if (rto_max < max_when)
+ max_when = rto_max;
+ }
+
if (when > max_when) {
pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
sk, what, when, (void *)_THIS_IP_);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 5fcd61ada622..09a28a5c94d2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -178,6 +178,7 @@ struct netns_ipv4 {
u8 sysctl_tcp_window_scaling;
u8 sysctl_tcp_timestamps;
int sysctl_tcp_rto_min_us;
+ int sysctl_tcp_rto_max_us;
u8 sysctl_tcp_recovery;
u8 sysctl_tcp_thin_linear_timeouts;
u8 sysctl_tcp_slow_start_after_idle;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9140d20eb2d4..304f173837bc 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1573,6 +1573,14 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
+ {
+ .procname = "tcp_rto_max_us",
+ .data = &init_net.ipv4.sysctl_tcp_rto_max_us,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ONE,
+ },
};
static __net_init int ipv4_sysctl_init_net(struct net *net)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd17f25ff288..f06859be5942 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -3506,6 +3506,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_pingpong_thresh = 1;
net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN);
+ net->ipv4.sysctl_tcp_rto_max_us = jiffies_to_usecs(TCP_RTO_MAX);
return 0;
}
--
2.37.3
Powered by blists - more mailing lists