[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230804144616.3938718-3-edumazet@google.com>
Date: Fri, 4 Aug 2023 14:46:12 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: netdev@...r.kernel.org, eric.dumazet@...il.com,
Soheil Hassas Yeganeh <soheil@...gle.com>, Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next 2/6] tcp: set TCP_USER_TIMEOUT locklessly
icsk->icsk_user_timeout can be set locklessly,
if all read sides use READ_ONCE().
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/linux/tcp.h | 2 +-
net/ipv4/tcp.c | 23 ++++++++++-------------
net/ipv4/tcp_timer.c | 37 ++++++++++++++++++++++---------------
3 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index d16abdb3541a6c07a5d7db59915089f74ee25092..3c5efeeb024f651c90ae4a9ca704dcf16e4adb11 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -564,6 +564,6 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on);
void tcp_sock_set_nodelay(struct sock *sk);
void tcp_sock_set_quickack(struct sock *sk, int val);
int tcp_sock_set_syncnt(struct sock *sk, int val);
-void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
+int tcp_sock_set_user_timeout(struct sock *sk, int val);
#endif /* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bcbb33a8c152abe2a060abd644689b54bcca1daa..34c2a40b024779866216402e1d1de1802f8dfde4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3296,11 +3296,16 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
}
EXPORT_SYMBOL(tcp_sock_set_syncnt);
-void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+int tcp_sock_set_user_timeout(struct sock *sk, int val)
{
- lock_sock(sk);
+ /* Cap the max time in ms TCP will retry or probe the window
+ * before giving up and aborting (ETIMEDOUT) a connection.
+ */
+ if (val < 0)
+ return -EINVAL;
+
WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
- release_sock(sk);
+ return 0;
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3464,6 +3469,8 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case TCP_SYNCNT:
return tcp_sock_set_syncnt(sk, val);
+ case TCP_USER_TIMEOUT:
+ return tcp_sock_set_user_timeout(sk, val);
}
sockopt_lock_sock(sk);
@@ -3611,16 +3618,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
break;
#endif
- case TCP_USER_TIMEOUT:
- /* Cap the max time in ms TCP will retry or probe the window
- * before giving up and aborting (ETIMEDOUT) a connection.
- */
- if (val < 0)
- err = -EINVAL;
- else
- WRITE_ONCE(icsk->icsk_user_timeout, val);
- break;
-
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 66040ab457d46ffa2fac62f875b636f567157793..f99e2d06ae7cae72efcafe2bd664545fac8f3fee 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -26,14 +26,15 @@
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 elapsed, start_ts;
+ u32 elapsed, start_ts, user_timeout;
s32 remaining;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (!icsk->icsk_user_timeout)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- remaining = icsk->icsk_user_timeout - elapsed;
+ remaining = user_timeout - elapsed;
if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
@@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- u32 remaining;
+ u32 remaining, user_timeout;
s32 elapsed;
- if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
+ user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (!user_timeout || !icsk->icsk_probes_tstamp)
return when;
elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
if (unlikely(elapsed < 0))
elapsed = 0;
- remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
+ remaining = msecs_to_jiffies(user_timeout) - elapsed;
remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
return min_t(u32, remaining, when);
@@ -270,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
}
if (!expired)
expired = retransmits_timed_out(sk, retry_until,
- icsk->icsk_user_timeout);
+ READ_ONCE(icsk->icsk_user_timeout));
tcp_fastopen_active_detect_blackhole(sk, expired);
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
@@ -384,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- if (!icsk->icsk_probes_tstamp)
+ if (!icsk->icsk_probes_tstamp) {
icsk->icsk_probes_tstamp = tcp_jiffies32;
- else if (icsk->icsk_user_timeout &&
- (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
- msecs_to_jiffies(icsk->icsk_user_timeout))
- goto abort;
+ } else {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+ if (user_timeout &&
+ (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
+ msecs_to_jiffies(user_timeout))
+ goto abort;
+ }
max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -734,13 +739,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
elapsed = keepalive_time_elapsed(tp);
if (elapsed >= keepalive_time_when(tp)) {
+ u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
+
/* If the TCP_USER_TIMEOUT option is enabled, use that
* to determine when to timeout instead.
*/
- if ((icsk->icsk_user_timeout != 0 &&
- elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
+ if ((user_timeout != 0 &&
+ elapsed >= msecs_to_jiffies(user_timeout) &&
icsk->icsk_probes_out > 0) ||
- (icsk->icsk_user_timeout == 0 &&
+ (user_timeout == 0 &&
icsk->icsk_probes_out >= keepalive_probes(tp))) {
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_write_err(sk);
--
2.41.0.640.ga95def55d0-goog
Powered by blists - more mailing lists