lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1487091948-7492-1-git-send-email-koen.de_schepper@nokia-bell-labs.com>
Date:   Tue, 14 Feb 2017 18:05:48 +0100
From:   Koen De Schepper <koen0607@...il.com>
To:     netdev@...r.kernel.org
Cc:     Daniel Borkmann <daniel@...earbox.net>,
        Florian Westphal <fw@...len.de>,
        Glenn Judd <glenn.judd@...ganstanley.com>,
        Yuchung Cheng <ycheng@...gle.com>,
        Neal Cardwell <ncardwell@...gle.com>,
        Andrew Shewmaker <agshew@...il.com>,
        Lawrence Brakmo <brakmo@...com>,
        Koen De Schepper <koen.de_schepper@...ia-bell-labs.com>
Subject: [PATCH net-next] DCTCP drop compatibility

 Makes DCTCP respond to drop. Additional Reno-like
 response is added when a packet gets lost. The
 clamp_alpha_on_loss implementation was not working.
 The parameter was renamed into dctcp_drop_compatible
 to express the expected result, not the implementation
 mechanism.

 If the dctcp_drop_compatible parameter is enabled, and in
 this RTT at least one loss occurred, the window is reduced
 additionally by half. The motivation is that packet
 loss is an indicator of extreme congestion. In practice,
 this turned out to be both beneficial for overloaded
 datacenter ECN-configured queues, and classic configured
 drop queues (non-ECN) where it supports coexistence with
 classic TCPs such as Cubic and Reno.

 On a PI2 or PIE AQM bottleneck without ECN support in the
 AQM, following drop probabilities are measured for 4
 competing flows of the same congestion control:

   TCP  |    Drop    |  Link   |   RTT    |    Drop
   CC   | compatible |  speed  | base+AQM | probability
 =======|============|=========|==========|============
  CUBIC |      -     |  40Mbps |  7+20ms  |    0.21%
  RENO  |      -     |         |          |    0.19%
  DCTCP |      1     |         |          |    0.22%
  DCTCP |      0     |         |          |   25.8%
 -------|------------|---------|----------|------------
  CUBIC |      -     | 100Mbps |  7+20ms  |    0.03%
  RENO  |      -     |         |          |    0.02%
  DCTCP |      1     |         |          |    0.04%
  DCTCP |      0     |         |          |   23.3%
 -------|------------|---------|----------|------------
  CUBIC |      -     | 800Mbps |   1+1ms  |    0.04%
  RENO  |      -     |         |          |    0.05%
  DCTCP |      1     |         |          |    0.06%
  DCTCP |      0     |         |          |   18.7%

 These results show that DCTCP drives drop based queues
 to high levels of loss when dctcp_drop_compatible=0,
 and becomes compatible with Cubic and Reno when
 dctcp_drop_compatible=1.

 The parameter is default on, to protect network paths
 that do not support ECN. DCTCP in MS-Windows is similarly
 drop compatible by default. 

---
 net/ipv4/tcp_dctcp.c | 79 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 17 deletions(-)

diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 5f5e593..d1cf111 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -67,10 +67,10 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA;
 module_param(dctcp_alpha_on_init, uint, 0644);
 MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value");
 
-static unsigned int dctcp_clamp_alpha_on_loss __read_mostly;
-module_param(dctcp_clamp_alpha_on_loss, uint, 0644);
-MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss,
-		 "parameter for clamping alpha on loss");
+static unsigned int dctcp_drop_compatible __read_mostly = 1;
+module_param(dctcp_drop_compatible, uint, 0644);
+MODULE_PARM_DESC(dctcp_drop_compatible,
+		 "parameter for classic drop compatibility on loss");
 
 static struct tcp_congestion_ops dctcp_reno;
 
@@ -231,21 +231,62 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags)
 	}
 }
 
-static void dctcp_state(struct sock *sk, u8 new_state)
+static void dctcp_adjust_on_loss(struct sock *sk)
 {
-	if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) {
-		struct dctcp *ca = inet_csk_ca(sk);
+	/* If the dctcp_drop_compatible extension is enabled, and
+	 * this RTT at least one loss occurred, the window is reduced
+	 * additionally by half. The motivation is that packet
+	 * loss is an indicator of extreme congestion. In practice,
+	 * this turned out to be both beneficial for overloaded
+	 * datacenter ECN-configured queues, and classic configured
+	 * drop queues (non-ECN) where it supports coexistence with
+	 * classic TCPs such as Cubic and Reno.
+	 *
+	 * On a PI2 or PIE AQM bottleneck without ECN support in the
+	 * AQM, following drop probabilities are measured for 4
+	 * competing flows of the same congestion control:
+	 *
+	 *   TCP  |    Drop    |  Link   |   RTT    |    Drop
+	 *   CC   | compatible |  speed  | base+AQM | probability
+	 * =======|============|=========|==========|============
+	 *  CUBIC |      -     |  40Mbps |  7+20ms  |    0.21%
+	 *  RENO  |      -     |         |          |    0.19%
+	 *  DCTCP |      1     |         |          |    0.22%
+	 *  DCTCP |      0     |         |          |   25.8%
+	 * -------|------------|---------|----------|------------
+	 *  CUBIC |      -     | 100Mbps |  7+20ms  |    0.03%
+	 *  RENO  |      -     |         |          |    0.02%
+	 *  DCTCP |      1     |         |          |    0.04%
+	 *  DCTCP |      0     |         |          |   23.3%
+	 * -------|------------|---------|----------|------------
+	 *  CUBIC |      -     | 800Mbps |   1+1ms  |    0.04%
+	 *  RENO  |      -     |         |          |    0.05%
+	 *  DCTCP |      1     |         |          |    0.06%
+	 *  DCTCP |      0     |         |          |   18.7%
+	 *
+	 * These results show that DCTCP drives drop based queues
+	 * to high levels of loss when dctcp_drop_compatible=0,
+	 * and becomes compatible with Cubic and Reno when
+	 * dctcp_drop_compatible=1.
+	 */
 
-		/* If this extension is enabled, we clamp dctcp_alpha to
-		 * max on packet loss; the motivation is that dctcp_alpha
-		 * is an indicator to the extend of congestion and packet
-		 * loss is an indicator of extreme congestion; setting
-		 * this in practice turned out to be beneficial, and
-		 * effectively assumes total congestion which reduces the
-		 * window by half.
-		 */
-		ca->dctcp_alpha = DCTCP_MAX_ALPHA;
-	}
+	struct dctcp *ca = inet_csk_ca(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	ca->loss_cwnd = tp->snd_cwnd;
+	tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
+}
+
+static void dctcp_state(struct sock *sk, u8 new_state)
+{
+	/* TCP_CA_LOSS is better handled via the event call back for 2 reasons:
+	 * - event is called immediately after call to ssthresh
+	 * - state is called also for MTU probe loss, which uses simple_ssthresh
+	 */
+	if (dctcp_drop_compatible &&
+	    (new_state == TCP_CA_Recovery) &&
+	    !(tcp_in_cwnd_reduction(sk)))
+		dctcp_adjust_on_loss(sk);
 }
 
 static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
@@ -280,6 +321,10 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	case CA_EVENT_NON_DELAYED_ACK:
 		dctcp_update_ack_reserved(sk, ev);
 		break;
+	case CA_EVENT_LOSS:
+		if (dctcp_drop_compatible)
+			dctcp_adjust_on_loss(sk);
+		break;
 	default:
 		/* Don't care for the rest. */
 		break;
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ