lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180117015726.93632-1-ycheng@google.com>
Date:   Tue, 16 Jan 2018 17:57:26 -0800
From:   Yuchung Cheng <ycheng@...gle.com>
To:     davem@...emloft.net
Cc:     netdev@...r.kernel.org, edumazet@...gle.com, ysseung@...gle.com,
        ncardwell@...gle.com, Yuchung Cheng <ycheng@...gle.com>
Subject: [PATCH net-next] tcp: avoid negotitating ECN for BBR

This patch keeps BBR from negotiating ECN if sysctl ECN is
set. Prior to this patch, BBR negotiates ECN if enabled, sends
CWR upon receiving ECE ACKs but does not react to them. This can
cause confusion from the protocol perspective. Therefore this
patch prevents the connection from negotiating ECN if BBR is the
congestion control during the handshake.

Note that after the handshake, the user can still switch to a
different congestion control that supports or even requires ECN
(e.g. DCTCP).  In that case, the connection can not re-negotiate
ECN and has to go with the ECN-free mode in that congestion control.

There are other cases BBR would still respond to ECE ACKs with CWR
but does not react to it like the behavior before this patch. First,
when the user switches to BBR congestion control but the connection
has already negotiated ECN before. Second, the system has configured
the ip route and/or uses eBPF to enable ECN on the connection that
uses BBR congestion control.

Signed-off-by: Yuchung Cheng <ycheng@...gle.com>
Signed-off-by: Neal Cardwell <ncardwell@...gle.com>
Acked-by: Yousuk Seung <ysseung@...gle.com>
Acked-by: Eric Dumazet <edumazet@...gle.com>
---
 include/net/tcp.h     | 7 +++++++
 net/ipv4/tcp_bbr.c    | 2 +-
 net/ipv4/tcp_input.c  | 3 ++-
 net/ipv4/tcp_output.c | 6 ++++--
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 6939e69d3c37..22345132d969 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -925,6 +925,8 @@ enum tcp_ca_ack_event_flags {
 #define TCP_CONG_NON_RESTRICTED 0x1
 /* Requires ECN/ECT set on all packets */
 #define TCP_CONG_NEEDS_ECN	0x2
+/* Does not use or react to ECN */
+#define TCP_CONG_DONT_USE_ECN	0x4
 
 union tcp_cc_info;
 
@@ -1033,6 +1035,11 @@ static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 	return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
 }
 
+static inline bool tcp_ca_uses_ecn(const struct sock *sk)
+{
+	return !(inet_csk(sk)->icsk_ca_ops->flags & TCP_CONG_DONT_USE_ECN);
+}
+
 static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 8322f26e770e..27456554b113 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -926,7 +926,7 @@ static void bbr_set_state(struct sock *sk, u8 new_state)
 }
 
 static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
-	.flags		= TCP_CONG_NON_RESTRICTED,
+	.flags		= TCP_CONG_NON_RESTRICTED | TCP_CONG_DONT_USE_ECN,
 	.name		= "bbr",
 	.owner		= THIS_MODULE,
 	.init		= bbr_init,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ff71b18d9682..6731d0b9b146 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6090,7 +6090,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
 
 	ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
 	ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
-	ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
+	ecn_ok = ecn_ok_dst ||
+		 (net->ipv4.sysctl_tcp_ecn && tcp_ca_uses_ecn(listen_sk));
 
 	if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
 	    (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 95461f02ac9a..446cb65090f5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -312,8 +312,10 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
-	bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
-		tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+	bool use_ecn = tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
+
+	if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 && tcp_ca_uses_ecn(sk))
+		use_ecn = true;
 
 	if (!use_ecn) {
 		const struct dst_entry *dst = __sk_dst_get(sk);
-- 
2.16.0.rc1.238.g530d649a79-goog

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ