netdev - Re: [PATCH v3 net 4/5] tcp: prevent bogus undos when SACK is not enabled

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAK6E8=ea8JJu2J369vdUa3FPwj8JNRAGeh4d1WC8RcNZpQp=4Q@mail.gmail.com>
Date:   Wed, 28 Mar 2018 15:36:57 -0700
From:   Yuchung Cheng <ycheng@...gle.com>
To:     Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>
Cc:     netdev <netdev@...r.kernel.org>,
        Neal Cardwell <ncardwell@...gle.com>,
        Eric Dumazet <edumazet@...gle.com>,
        Sergei Shtylyov <sergei.shtylyov@...entembedded.com>
Subject: Re: [PATCH v3 net 4/5] tcp: prevent bogus undos when SACK is not enabled

On Tue, Mar 13, 2018 at 3:25 AM, Ilpo Järvinen
<ilpo.jarvinen@...sinki.fi> wrote:
> When a cumulative ACK lands to high_seq at the end of loss
> recovery and SACK is not enabled, the sender needs to avoid
> false fast retransmits (RFC6582). The avoidance mechanisms is
> implemented by remaining in the loss recovery CA state until
> one additional cumulative ACK arrives. During the operation of
> this avoidance mechanism, there is internal transient in the
> use of state variables which will always trigger a bogus undo.
Do we have to make undo in non-sack perfect? can we consider a much
simpler but imperfect fix of

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8d480542aa07..95225d9de0af 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2356,6 +2356,7 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                 * fast retransmits (RFC2582). SACK TCP is safe. */
                if (!tcp_any_retrans_done(sk))
                        tp->retrans_stamp = 0;
+               tp->undo_marker = 0;
                return true;
        }



>
> When we enter to this transient state in tcp_try_undo_recovery,
> tcp_any_retrans_done is often (always?) false resulting in
> clearing retrans_stamp. On the next cumulative ACK,
> tcp_try_undo_recovery again executes because CA state still
> remains in the same recovery state and tcp_may_undo will always
> return true because tcp_packet_delayed has this condition:
>     return !tp->retrans_stamp || ...
>
> Check if the false fast retransmit transient avoidance is in
> progress in tcp_packet_delayed to avoid bogus undos. Since snd_una
> has advanced already on this ACK but CA state still remains
> unchanged (CA state is updated slightly later than undo is
> checked), prior_snd_una needs to be passed to tcp_packet_delayed
> (instead of tp->snd_una). Passing prior_snd_una around to
> the tcp_packet_delayed makes this change look more involved than
> it really is.
>
> The additional checks done in this change only affect non-SACK
> case, the SACK case remains the same.
>
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>
> ---
>  net/ipv4/tcp_input.c | 42 ++++++++++++++++++++++++++----------------
>  1 file changed, 26 insertions(+), 16 deletions(-)
>
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 72ecfbb..270aa48 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -2241,10 +2241,17 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp,
>  /* Nothing was retransmitted or returned timestamp is less
>   * than timestamp of the first retransmission.
>   */
> -static inline bool tcp_packet_delayed(const struct tcp_sock *tp)
> +static inline bool tcp_packet_delayed(const struct tcp_sock *tp,
> +                                     const u32 prior_snd_una)
>  {
> -       return !tp->retrans_stamp ||
> -              tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
> +       if (!tp->retrans_stamp) {
> +               /* Sender will be in a transient state with cleared
> +                * retrans_stamp during false fast retransmit prevention
> +                * mechanism
> +                */
> +               return !tcp_false_fast_retrans_possible(tp, prior_snd_una);
> +       }
> +       return tcp_tsopt_ecr_before(tp, tp->retrans_stamp);
>  }
>
>  /* Undo procedures. */
> @@ -2334,17 +2341,19 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
>         tp->rack.advanced = 1; /* Force RACK to re-exam losses */
>  }
>
> -static inline bool tcp_may_undo(const struct tcp_sock *tp)
> +static inline bool tcp_may_undo(const struct tcp_sock *tp,
> +                               const u32 prior_snd_una)
>  {
> -       return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
> +       return tp->undo_marker &&
> +              (!tp->undo_retrans || tcp_packet_delayed(tp, prior_snd_una));
>  }
>
>  /* People celebrate: "We love our President!" */
> -static bool tcp_try_undo_recovery(struct sock *sk)
> +static bool tcp_try_undo_recovery(struct sock *sk, const u32 prior_snd_una)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (tcp_may_undo(tp)) {
> +       if (tcp_may_undo(tp, prior_snd_una)) {
>                 int mib_idx;
>
>                 /* Happy end! We did not retransmit anything
> @@ -2391,11 +2400,12 @@ static bool tcp_try_undo_dsack(struct sock *sk)
>  }
>
>  /* Undo during loss recovery after partial ACK or using F-RTO. */
> -static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
> +static bool tcp_try_undo_loss(struct sock *sk, const u32 prior_snd_una,
> +                             bool frto_undo)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (frto_undo || tcp_may_undo(tp)) {
> +       if (frto_undo || tcp_may_undo(tp, prior_snd_una)) {
>                 tcp_undo_cwnd_reduction(sk, true);
>
>                 DBGUNDO(sk, "partial loss");
> @@ -2628,13 +2638,13 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack)
>   * recovered or spurious. Otherwise retransmits more on partial ACKs.
>   */
>  static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
> -                            int *rexmit)
> +                            int *rexmit, const u32 prior_snd_una)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>         bool recovered = !before(tp->snd_una, tp->high_seq);
>
>         if ((flag & FLAG_SND_UNA_ADVANCED) &&
> -           tcp_try_undo_loss(sk, false))
> +           tcp_try_undo_loss(sk, prior_snd_una, false))
>                 return;
>
>         if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
> @@ -2642,7 +2652,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
>                  * lost, i.e., never-retransmitted data are (s)acked.
>                  */
>                 if ((flag & FLAG_ORIG_SACK_ACKED) &&
> -                   tcp_try_undo_loss(sk, true))
> +                   tcp_try_undo_loss(sk, prior_snd_una, true))
>                         return;
>
>                 if (after(tp->snd_nxt, tp->high_seq)) {
> @@ -2665,7 +2675,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
>
>         if (recovered) {
>                 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
> -               tcp_try_undo_recovery(sk);
> +               tcp_try_undo_recovery(sk, prior_snd_una);
>                 return;
>         }
>         if (tcp_is_reno(tp)) {
> @@ -2685,7 +2695,7 @@ static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una)
>  {
>         struct tcp_sock *tp = tcp_sk(sk);
>
> -       if (tp->undo_marker && tcp_packet_delayed(tp)) {
> +       if (tp->undo_marker && tcp_packet_delayed(tp, prior_snd_una)) {
>                 /* Plain luck! Hole if filled with delayed
>                  * packet, rather than with a retransmit. Check reordering.
>                  */
> @@ -2788,7 +2798,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
>                 case TCP_CA_Recovery:
>                         if (tcp_is_reno(tp))
>                                 tcp_reset_reno_sack(tp);
> -                       if (tcp_try_undo_recovery(sk))
> +                       if (tcp_try_undo_recovery(sk, prior_snd_una))
>                                 return;
>                         tcp_end_cwnd_reduction(sk);
>                         break;
> @@ -2815,7 +2825,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
>                 tcp_rack_identify_loss(sk, ack_flag);
>                 break;
>         case TCP_CA_Loss:
> -               tcp_process_loss(sk, flag, is_dupack, rexmit);
> +               tcp_process_loss(sk, flag, is_dupack, rexmit, prior_snd_una);
>                 tcp_rack_identify_loss(sk, ack_flag);
>                 if (!(icsk->icsk_ca_state == TCP_CA_Open ||
>                       (*ack_flag & FLAG_LOST_RETRANS)))
> --
> 2.7.4
>