lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACSApvbprQpd=+ShSG6YkC3T_XkcoCv8adu0CjC2axSAMGkKuQ@mail.gmail.com>
Date:   Thu, 11 Apr 2019 09:47:41 -0400
From:   Soheil Hassas Yeganeh <soheil@...gle.com>
To:     Eric Dumazet <edumazet@...gle.com>
Cc:     "David S . Miller" <davem@...emloft.net>,
        netdev <netdev@...r.kernel.org>,
        Eric Dumazet <eric.dumazet@...il.com>,
        Yuchung Cheng <ycheng@...gle.com>,
        Neal Cardwell <ncardwell@...gle.com>,
        Soheil Hassas Yeganeh <soheil@...gle.com>,
        Florian Westphal <fw@...len.de>,
        Daniel Borkmann <daniel@...earbox.net>,
        Lawrence Brakmo <brakmo@...com>,
        Abdul Kabbani <akabbani@...gle.com>
Subject: Re: [PATCH net] dctcp: more accurate tracking of packets delivery

On Thu, Apr 11, 2019 at 8:55 AM Eric Dumazet <edumazet@...gle.com> wrote:
>
> After commit e21db6f69a95 ("tcp: track total bytes delivered with ECN CE marks")
> core TCP stack does a very good job tracking ECN signals.
>
> The "sender's best estimate of CE information" Yuchung mentioned in his
> patch is indeed the best we can do.
>
> DCTCP can use tp->delivered_ce and tp->delivered to not duplicate the logic,
> and use the existing best estimate.
>
> This solves some problems, since current DCTCP logic does not deal with losses
> and/or GRO or ack aggregation very well.
>
> This also removes a dubious use of inet_csk(sk)->icsk_ack.rcv_mss
> (this should have been tp->mss_cache), and a 64 bit divide.
>
> Finally, we can see that the DCTCP logic, calling dctcp_update_alpha() for
> every ACK could be done differently, calling it only once per RTT.
>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> Cc: Yuchung Cheng <ycheng@...gle.com>
> Cc: Neal Cardwell <ncardwell@...gle.com>
> Cc: Soheil Hassas Yeganeh <soheil@...gle.com>
> Cc: Florian Westphal <fw@...len.de>
> Cc: Daniel Borkmann <daniel@...earbox.net>
> Cc: Lawrence Brakmo <brakmo@...com>
> Cc: Abdul Kabbani <akabbani@...gle.com>

Acked-by: Soheil Hassas Yeganeh <soheil@...gle.com>

Thank you! Nice idea!

> ---
>  net/ipv4/tcp_dctcp.c | 45 +++++++++++++++++---------------------------
>  1 file changed, 17 insertions(+), 28 deletions(-)
>
> diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
> index 359da68d7c0628360d5f1b727c878f678e28d39a..477cb4aa456c11c70185a982cbadafba857d3619 100644
> --- a/net/ipv4/tcp_dctcp.c
> +++ b/net/ipv4/tcp_dctcp.c
> @@ -49,9 +49,8 @@
>  #define DCTCP_MAX_ALPHA        1024U
>
>  struct dctcp {
> -       u32 acked_bytes_ecn;
> -       u32 acked_bytes_total;
> -       u32 prior_snd_una;
> +       u32 old_delivered;
> +       u32 old_delivered_ce;
>         u32 prior_rcv_nxt;
>         u32 dctcp_alpha;
>         u32 next_seq;
> @@ -73,8 +72,8 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
>  {
>         ca->next_seq = tp->snd_nxt;
>
> -       ca->acked_bytes_ecn = 0;
> -       ca->acked_bytes_total = 0;
> +       ca->old_delivered = tp->delivered;
> +       ca->old_delivered_ce = tp->delivered_ce;
>  }
>
>  static void dctcp_init(struct sock *sk)
> @@ -86,7 +85,6 @@ static void dctcp_init(struct sock *sk)
>              sk->sk_state == TCP_CLOSE)) {
>                 struct dctcp *ca = inet_csk_ca(sk);
>
> -               ca->prior_snd_una = tp->snd_una;
>                 ca->prior_rcv_nxt = tp->rcv_nxt;
>
>                 ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
> @@ -118,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
>  {
>         const struct tcp_sock *tp = tcp_sk(sk);
>         struct dctcp *ca = inet_csk_ca(sk);
> -       u32 acked_bytes = tp->snd_una - ca->prior_snd_una;
> -
> -       /* If ack did not advance snd_una, count dupack as MSS size.
> -        * If ack did update window, do not count it at all.
> -        */
> -       if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE))
> -               acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
> -       if (acked_bytes) {
> -               ca->acked_bytes_total += acked_bytes;
> -               ca->prior_snd_una = tp->snd_una;
> -
> -               if (flags & CA_ACK_ECE)
> -                       ca->acked_bytes_ecn += acked_bytes;
> -       }
>
>         /* Expired RTT */
>         if (!before(tp->snd_una, ca->next_seq)) {
> -               u64 bytes_ecn = ca->acked_bytes_ecn;
> +               u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce;
>                 u32 alpha = ca->dctcp_alpha;
>
>                 /* alpha = (1 - g) * alpha + g * F */
>
>                 alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g);
> -               if (bytes_ecn) {
> +               if (delivered_ce) {
> +                       u32 delivered = tp->delivered - ca->old_delivered;
> +
>                         /* If dctcp_shift_g == 1, a 32bit value would overflow
> -                        * after 8 Mbytes.
> +                        * after 8 M packets.
>                          */
> -                       bytes_ecn <<= (10 - dctcp_shift_g);
> -                       do_div(bytes_ecn, max(1U, ca->acked_bytes_total));
> +                       delivered_ce <<= (10 - dctcp_shift_g);
> +                       delivered_ce /= max(1U, delivered);
>
> -                       alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA);
> +                       alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA);
>                 }
>                 /* dctcp_alpha can be read from dctcp_get_info() without
>                  * synchro, so we ask compiler to not use dctcp_alpha
> @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
>                              union tcp_cc_info *info)
>  {
>         const struct dctcp *ca = inet_csk_ca(sk);
> +       const struct tcp_sock *tp = tcp_sk(sk);
>
>         /* Fill it also in case of VEGASINFO due to req struct limits.
>          * We can still correctly retrieve it later.
> @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
>                         info->dctcp.dctcp_enabled = 1;
>                         info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
>                         info->dctcp.dctcp_alpha = ca->dctcp_alpha;
> -                       info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
> -                       info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
> +                       info->dctcp.dctcp_ab_ecn = tp->mss_cache *
> +                                                  (tp->delivered_ce - ca->old_delivered_ce);
> +                       info->dctcp.dctcp_ab_tot = tp->mss_cache *
> +                                                  (tp->delivered - ca->old_delivered);
>                 }
>
>                 *attr = INET_DIAG_DCTCPINFO;
> --
> 2.21.0.392.gf8f6787159e-goog
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ