lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180630014815.2881895-3-brakmo@fb.com>
Date:   Fri, 29 Jun 2018 18:48:15 -0700
From:   Lawrence Brakmo <brakmo@...com>
To:     netdev <netdev@...r.kernel.org>
CC:     Kernel Team <kernel-team@...com>, Blake Matheny <bmatheny@...com>,
        Alexei Starovoitov <ast@...com>,
        Neal Cardwell <ncardwell@...gle.com>,
        Yuchung Cheng <ycheng@...gle.com>,
        Steve Ibanez <sibanez@...nford.edu>,
        Eric Dumazet <eric.dumazet@...il.com>
Subject: [PATCH net-next 2/2] tcp: ack immediately when a cwr packet arrives

We observed high 99 and 99.9% latencies when doing RPCs with DCTCP. The
problem is triggered when the last packet of a request arrives CE
marked. The reply will carry the ECE mark causing TCP to shrink its cwnd
to 1 (because there are no packets in flight). When the 1st packet of
the next request arrives it was sometimes delayed adding up to 40ms to
the latency.

This patch insures that CWR makred data packets arriving will be acked
immediately.

Signed-off-by: Lawrence Brakmo <brakmo@...com>
---
 net/ipv4/tcp_input.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 76ca88f63b70..b024d36f0d56 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,6 +98,8 @@ int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
 #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
 #define FLAG_ACK_MAYBE_DELAYED	0x10000 /* Likely a delayed ACK */
+#define FLAG_OFO_POSSIBLE	0x20000 /* Possible OFO */
+#define FLAG_CWR		0x40000 /* CWR in this ACK */
 
 #define FLAG_ACKED		(FLAG_DATA_ACKED|FLAG_SYN_ACKED)
 #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
@@ -5087,7 +5089,7 @@ static inline void tcp_data_snd_check(struct sock *sk)
 /*
  * Check if sending an ack is needed.
  */
-static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
+static void __tcp_ack_snd_check(struct sock *sk, int flags)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned long rtt, delay;
@@ -5102,13 +5104,16 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 	    (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat ||
 	     __tcp_select_window(sk) >= tp->rcv_wnd)) ||
 	    /* We ACK each frame or... */
-	    tcp_in_quickack_mode(sk)) {
+	    tcp_in_quickack_mode(sk) ||
+	    /* We received CWR */
+	    flags & FLAG_CWR) {
 send_now:
 		tcp_send_ack(sk);
 		return;
 	}
 
-	if (!ofo_possible || RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
+	if (!(flags & FLAG_OFO_POSSIBLE) ||
+	    RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
 		tcp_send_delayed_ack(sk);
 		return;
 	}
@@ -5134,13 +5139,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 		      HRTIMER_MODE_REL_PINNED_SOFT);
 }
 
-static inline void tcp_ack_snd_check(struct sock *sk)
+static inline void tcp_ack_snd_check(struct sock *sk, int flags)
 {
 	if (!inet_csk_ack_scheduled(sk)) {
 		/* We sent a data segment already. */
 		return;
 	}
-	__tcp_ack_snd_check(sk, 1);
+	__tcp_ack_snd_check(sk, flags | FLAG_OFO_POSSIBLE);
 }
 
 /*
@@ -5489,6 +5494,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 				goto discard;
 			}
 		} else {
+			int flags;
 			int eaten = 0;
 			bool fragstolen = false;
 
@@ -5525,7 +5531,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 					goto no_ack;
 			}
 
-			__tcp_ack_snd_check(sk, 0);
+			flags = (tcp_flag_word(th) & TCP_FLAG_CWR) ?
+				FLAG_CWR : 0;
+			__tcp_ack_snd_check(sk, flags);
 no_ack:
 			if (eaten)
 				kfree_skb_partial(skb, fragstolen);
@@ -5561,7 +5569,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
 	tcp_data_queue(sk, skb);
 
 	tcp_data_snd_check(sk);
-	tcp_ack_snd_check(sk);
+	tcp_ack_snd_check(sk, (tcp_flag_word(th) & TCP_FLAG_CWR) ?
+			  FLAG_CWR : 0);
 	return;
 
 csum_error:
@@ -6150,7 +6159,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 	/* tcp_data could move socket to TIME-WAIT */
 	if (sk->sk_state != TCP_CLOSE) {
 		tcp_data_snd_check(sk);
-		tcp_ack_snd_check(sk);
+		tcp_ack_snd_check(sk, 0);
 	}
 
 	if (!queued) {
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ