netdev - Re: [PATCH net-next 2/3] tcp: improve RTT from SACK for CC

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK6E8=cummBFwoMEmh24HoT+qVP6vOPVeWqe7138SNtW4z-C6g@mail.gmail.com>
Date:	Thu, 30 Apr 2015 10:35:18 -0700
From:	Yuchung Cheng <ycheng@...gle.com>
To:	Kenneth Klette Jonassen <kennetkl@....uio.no>
Cc:	netdev <netdev@...r.kernel.org>, Eric Dumazet <edumazet@...gle.com>
Subject: Re: [PATCH net-next 2/3] tcp: improve RTT from SACK for CC

On Thu, Apr 30, 2015 at 9:23 AM, Kenneth Klette Jonassen
<kennetkl@....uio.no> wrote:
> tcp_sacktag_one() always picks the earliest sequence SACKed for RTT.
> This might not make sense for congestion control in cases where:
>
>   1. ACKs are lost, i.e. a SACK following a lost SACK covers both
>      new and old segments at the receiver.
>   2. The receiver disregards the RFC 5681 recommendation to immediately
>      ACK out-of-order segments.
>
> Give congestion control a RTT for the latest segment SACKed, which is the
> most accurate RTT estimate, but preserve the conservative RTT for RTO.
>
> Removes the call to skb_mstamp_get() in tcp_sacktag_one().
>
> Cc: Yuchung Cheng <ycheng@...gle.com>
> Cc: Eric Dumazet <edumazet@...gle.com>
> Signed-off-by: Kenneth Klette Jonassen <kennetkl@....uio.no>
> ---
>  net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++------------------
>  1 file changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 9902cf1..32bac6a 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -1130,7 +1130,14 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
>  struct tcp_sacktag_state {
>         int     reord;
>         int     fack_count;
> -       long    rtt_us; /* RTT measured by SACKing never-retransmitted data */
> +       /* Timestamps for earliest and latest never-retransmitted segment
> +        * that was SACKed. RTO needs the earliest RTT to be conservative
> +        * against receivers that might delay SACKing (RFC 5681 does not
> +        * require ACKing out-of-order segments immediately), but congestion
> +        * control should still get an accurate delay signal.
> +        */
FWIW this comment is a little misleading. The reason we took the earliest
timestamp for sack RTT was not b/c RFC 5681 uses SHOULD on immediate ACKs.
The reason was to stay conservative of ACK compression effects. AFAIK
major stacks (almost?) always send immediate ACKs on out-of-order packets.

The rest of the patch looks good!

> +       struct skb_mstamp first_sackt;
> +       struct skb_mstamp last_sackt;
>         int     flag;
>  };
>
> @@ -1233,14 +1240,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
>                                                            state->reord);
>                                 if (!after(end_seq, tp->high_seq))
>                                         state->flag |= FLAG_ORIG_SACK_ACKED;
> -                               /* Pick the earliest sequence sacked for RTT */
> -                               if (state->rtt_us < 0) {
> -                                       struct skb_mstamp now;
> -
> -                                       skb_mstamp_get(&now);
> -                                       state->rtt_us = skb_mstamp_us_delta(&now,
> -                                                               xmit_time);
> -                               }
> +                               if (state->first_sackt.v64 == 0)
> +                                       state->first_sackt = *xmit_time;
> +                               state->last_sackt = *xmit_time;
>                         }
>
>                         if (sacked & TCPCB_LOST) {
> @@ -3049,7 +3051,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
>   * arrived at the other end.
>   */
>  static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
> -                              u32 prior_snd_una, long sack_rtt_us)
> +                              u32 prior_snd_una,
> +                              struct tcp_sacktag_state *sack)
>  {
>         const struct inet_connection_sock *icsk = inet_csk(sk);
>         struct skb_mstamp first_ackt, last_ackt, now;
> @@ -3057,8 +3060,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
>         u32 prior_sacked = tp->sacked_out;
>         u32 reord = tp->packets_out;
>         bool fully_acked = true;
> -       long ca_seq_rtt_us = -1L;
> +       long sack_rtt_us = -1L;
>         long seq_rtt_us = -1L;
> +       long ca_rtt_us = -1L;
>         struct sk_buff *skb;
>         u32 pkts_acked = 0;
>         bool rtt_update;
> @@ -3147,7 +3151,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
>         skb_mstamp_get(&now);
>         if (likely(first_ackt.v64)) {
>                 seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
> -               ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
> +               ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
> +       }
> +       if (sack->first_sackt.v64) {
> +               sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
> +               ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
>         }
>
>         rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
> @@ -3178,10 +3186,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
>
>                 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
>
> -               if (ca_ops->pkts_acked) {
> -                       long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
> -                       ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
> -               }
> +               if (ca_ops->pkts_acked)
> +                       ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
>
>         } else if (skb && rtt_update && sack_rtt_us >= 0 &&
>                    sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
> @@ -3466,7 +3472,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>         const int prior_unsacked = tp->packets_out - tp->sacked_out;
>         int acked = 0; /* Number of packets newly acked */
>
> -       sack_state.rtt_us = -1L;
> +       sack_state.first_sackt.v64 = 0;
>
>         /* We very likely will need to access write queue head. */
>         prefetchw(sk->sk_write_queue.next);
> @@ -3555,7 +3561,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
>         /* See if we can take anything off of the retransmit queue. */
>         acked = tp->packets_out;
>         flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
> -                                   sack_state.rtt_us);
> +                                   &sack_state);
>         acked -= tp->packets_out;
>
>         /* Advance cwnd if state allows */
> --
> 2.1.0
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html