lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACSApvYiV71CN3O8DgEsDNmKOmNPX4PHHqvBRQVE3pztKSPjxQ@mail.gmail.com>
Date: Fri, 4 Aug 2023 11:49:37 -0400
From: Soheil Hassas Yeganeh <soheil@...gle.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, 
	Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org, eric.dumazet@...il.com
Subject: Re: [PATCH net-next 2/6] tcp: set TCP_USER_TIMEOUT locklessly

On Fri, Aug 4, 2023 at 10:46 AM Eric Dumazet <edumazet@...gle.com> wrote:
>
> icsk->icsk_user_timeout can be set locklessly,
> if all read sides use READ_ONCE().
>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>

Acked-by: Soheil Hassas Yeganeh <soheil@...gle.com>

> ---
>  include/linux/tcp.h  |  2 +-
>  net/ipv4/tcp.c       | 23 ++++++++++-------------
>  net/ipv4/tcp_timer.c | 37 ++++++++++++++++++++++---------------
>  3 files changed, 33 insertions(+), 29 deletions(-)
>
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index d16abdb3541a6c07a5d7db59915089f74ee25092..3c5efeeb024f651c90ae4a9ca704dcf16e4adb11 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -564,6 +564,6 @@ void __tcp_sock_set_nodelay(struct sock *sk, bool on);
>  void tcp_sock_set_nodelay(struct sock *sk);
>  void tcp_sock_set_quickack(struct sock *sk, int val);
>  int tcp_sock_set_syncnt(struct sock *sk, int val);
> -void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
> +int tcp_sock_set_user_timeout(struct sock *sk, int val);
>
>  #endif /* _LINUX_TCP_H */
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index bcbb33a8c152abe2a060abd644689b54bcca1daa..34c2a40b024779866216402e1d1de1802f8dfde4 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -3296,11 +3296,16 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
>  }
>  EXPORT_SYMBOL(tcp_sock_set_syncnt);
>
> -void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
> +int tcp_sock_set_user_timeout(struct sock *sk, int val)
>  {
> -       lock_sock(sk);
> +       /* Cap the max time in ms TCP will retry or probe the window
> +        * before giving up and aborting (ETIMEDOUT) a connection.
> +        */
> +       if (val < 0)
> +               return -EINVAL;
> +
>         WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
> -       release_sock(sk);
> +       return 0;
>  }
>  EXPORT_SYMBOL(tcp_sock_set_user_timeout);
>
> @@ -3464,6 +3469,8 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
>         switch (optname) {
>         case TCP_SYNCNT:
>                 return tcp_sock_set_syncnt(sk, val);
> +       case TCP_USER_TIMEOUT:
> +               return tcp_sock_set_user_timeout(sk, val);
>         }
>
>         sockopt_lock_sock(sk);
> @@ -3611,16 +3618,6 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
>                 err = tp->af_specific->md5_parse(sk, optname, optval, optlen);
>                 break;
>  #endif
> -       case TCP_USER_TIMEOUT:
> -               /* Cap the max time in ms TCP will retry or probe the window
> -                * before giving up and aborting (ETIMEDOUT) a connection.
> -                */
> -               if (val < 0)
> -                       err = -EINVAL;
> -               else
> -                       WRITE_ONCE(icsk->icsk_user_timeout, val);
> -               break;
> -
>         case TCP_FASTOPEN:
>                 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
>                     TCPF_LISTEN))) {
> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
> index 66040ab457d46ffa2fac62f875b636f567157793..f99e2d06ae7cae72efcafe2bd664545fac8f3fee 100644
> --- a/net/ipv4/tcp_timer.c
> +++ b/net/ipv4/tcp_timer.c
> @@ -26,14 +26,15 @@
>  static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
>  {
>         struct inet_connection_sock *icsk = inet_csk(sk);
> -       u32 elapsed, start_ts;
> +       u32 elapsed, start_ts, user_timeout;
>         s32 remaining;
>
>         start_ts = tcp_sk(sk)->retrans_stamp;
> -       if (!icsk->icsk_user_timeout)
> +       user_timeout = READ_ONCE(icsk->icsk_user_timeout);
> +       if (!user_timeout)
>                 return icsk->icsk_rto;
>         elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
> -       remaining = icsk->icsk_user_timeout - elapsed;
> +       remaining = user_timeout - elapsed;
>         if (remaining <= 0)
>                 return 1; /* user timeout has passed; fire ASAP */
>
> @@ -43,16 +44,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
>  u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
>  {
>         struct inet_connection_sock *icsk = inet_csk(sk);
> -       u32 remaining;
> +       u32 remaining, user_timeout;
>         s32 elapsed;
>
> -       if (!icsk->icsk_user_timeout || !icsk->icsk_probes_tstamp)
> +       user_timeout = READ_ONCE(icsk->icsk_user_timeout);
> +       if (!user_timeout || !icsk->icsk_probes_tstamp)
>                 return when;
>
>         elapsed = tcp_jiffies32 - icsk->icsk_probes_tstamp;
>         if (unlikely(elapsed < 0))
>                 elapsed = 0;
> -       remaining = msecs_to_jiffies(icsk->icsk_user_timeout) - elapsed;
> +       remaining = msecs_to_jiffies(user_timeout) - elapsed;
>         remaining = max_t(u32, remaining, TCP_TIMEOUT_MIN);
>
>         return min_t(u32, remaining, when);
> @@ -270,7 +272,7 @@ static int tcp_write_timeout(struct sock *sk)
>         }
>         if (!expired)
>                 expired = retransmits_timed_out(sk, retry_until,
> -                                               icsk->icsk_user_timeout);
> +                                               READ_ONCE(icsk->icsk_user_timeout));
>         tcp_fastopen_active_detect_blackhole(sk, expired);
>
>         if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
> @@ -384,13 +386,16 @@ static void tcp_probe_timer(struct sock *sk)
>          * corresponding system limit. We also implement similar policy when
>          * we use RTO to probe window in tcp_retransmit_timer().
>          */
> -       if (!icsk->icsk_probes_tstamp)
> +       if (!icsk->icsk_probes_tstamp) {
>                 icsk->icsk_probes_tstamp = tcp_jiffies32;
> -       else if (icsk->icsk_user_timeout &&
> -                (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
> -                msecs_to_jiffies(icsk->icsk_user_timeout))
> -               goto abort;
> +       } else {
> +               u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
>
> +               if (user_timeout &&
> +                   (s32)(tcp_jiffies32 - icsk->icsk_probes_tstamp) >=
> +                    msecs_to_jiffies(user_timeout))
> +               goto abort;
> +       }
>         max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
>         if (sock_flag(sk, SOCK_DEAD)) {
>                 const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
> @@ -734,13 +739,15 @@ static void tcp_keepalive_timer (struct timer_list *t)
>         elapsed = keepalive_time_elapsed(tp);
>
>         if (elapsed >= keepalive_time_when(tp)) {
> +               u32 user_timeout = READ_ONCE(icsk->icsk_user_timeout);
> +
>                 /* If the TCP_USER_TIMEOUT option is enabled, use that
>                  * to determine when to timeout instead.
>                  */
> -               if ((icsk->icsk_user_timeout != 0 &&
> -                   elapsed >= msecs_to_jiffies(icsk->icsk_user_timeout) &&
> +               if ((user_timeout != 0 &&
> +                   elapsed >= msecs_to_jiffies(user_timeout) &&
>                     icsk->icsk_probes_out > 0) ||
> -                   (icsk->icsk_user_timeout == 0 &&
> +                   (user_timeout == 0 &&
>                     icsk->icsk_probes_out >= keepalive_probes(tp))) {
>                         tcp_send_active_reset(sk, GFP_ATOMIC);
>                         tcp_write_err(sk);
> --
> 2.41.0.640.ga95def55d0-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ