lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180518190141.899-2-dbanerje@akamai.com>
Date:   Fri, 18 May 2018 15:01:41 -0400
From:   Debabrata Banerjee <dbanerje@...mai.com>
To:     "David S . Miller" <davem@...emloft.net>, netdev@...r.kernel.org
Cc:     Alexey Kuznetsov <kuznet@....inr.ac.ru>,
        Hideaki YOSHIFUJI <yoshfuji@...ux-ipv6.org>,
        dbanerje@...mai.com
Subject: [PATCH RFC net-next 1/1] tcp: close socket without reset on incoming data

When TCP_CLOSE_NORST is set before a close(), offload sinking of
unwanted data to the kernel with low resource usage, with a timeout of
TCP_LINGER2. The socket will transition to FIN_WAIT1 and then FIN_WAIT2
where it will ack data until either the timeout is hit, or a RST or FIN
is received.

Signed-off-by: Debabrata Banerjee <dbanerje@...mai.com>
---
 include/linux/tcp.h      |  4 +++-
 include/uapi/linux/tcp.h |  2 +-
 net/ipv4/tcp.c           | 23 +++++++++++++++++++++--
 net/ipv4/tcp_input.c     | 16 ++++++++++++----
 net/ipv4/tcp_minisocks.c | 15 +++++++++++++++
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 72705eaf4b84..bd44bc99b480 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -226,7 +226,8 @@ struct tcp_sock {
 		fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
 		fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */
 		is_sack_reneg:1,    /* in recovery from loss with SACK reneg? */
-		unused:2;
+		norst:1,	/* Don't send RST on shutdown() socket */
+		unused:1;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		recvmsg_inq : 1,/* Indicate # of bytes in queue upon recvmsg */
@@ -429,6 +430,7 @@ struct tcp_timewait_sock {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif
+	int			  tw_norst;
 };
 
 static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 29eb659aa77a..369f3402b669 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -124,8 +124,8 @@ enum {
 #define TCP_FASTOPEN_NO_COOKIE	34	/* Enable TFO without a TFO cookie */
 #define TCP_ZEROCOPY_RECEIVE	35
 #define TCP_INQ			36	/* Notify bytes available to read as a cmsg on read */
-
 #define TCP_CM_INQ		TCP_INQ
+#define TCP_CLOSE_NORST		37	/* Don't send RST on close()'d socket */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a2ea0bbf867..29fe763002e5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2318,8 +2318,10 @@ void tcp_close(struct sock *sk, long timeout)
 	struct sk_buff *skb;
 	int data_was_unread = 0;
 	int state;
+	struct tcp_sock *tp;
 
 	lock_sock(sk);
+	tp = tcp_sk(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
 	if (sk->sk_state == TCP_LISTEN) {
@@ -2362,8 +2364,19 @@ void tcp_close(struct sock *sk, long timeout)
 	} else if (data_was_unread) {
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
-		tcp_set_state(sk, TCP_CLOSE);
-		tcp_send_active_reset(sk, sk->sk_allocation);
+
+		if (unlikely(tp->norst)) {
+			if (tcp_close_state(sk)) {
+				/* We will discard all new incoming data
+				 * set window to max of current or init.
+				 */
+				tp->rcv_wnd = max(tp->rcv_wnd, MAX_TCP_WINDOW);
+				tcp_send_fin(sk);
+			}
+		} else {
+			tcp_set_state(sk, TCP_CLOSE);
+			tcp_send_active_reset(sk, sk->sk_allocation);
+		}
 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
@@ -3040,6 +3053,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		else
 			tp->recvmsg_inq = val;
 		break;
+	case TCP_CLOSE_NORST:
+		tp->norst = !!val;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -3523,6 +3539,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		return err;
 	}
 #endif
+	case TCP_CLOSE_NORST:
+		val = tp->norst;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aebb29ab2fdf..e0aa6e126700 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6054,7 +6054,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		if (tp->linger2 < 0) {
+		if (likely(!tp->norst) && tp->linger2 < 0) {
 			tcp_done(sk);
 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 			return 1;
@@ -6064,9 +6064,12 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 			/* Receive out of order FIN after close() */
 			if (tp->syn_fastopen && th->fin)
 				tcp_fastopen_active_disable(sk);
-			tcp_done(sk);
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
-			return 1;
+
+			if (likely(!tp->norst)) {
+				tcp_done(sk);
+				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+				return 1;
+			}
 		}
 
 		tmo = tcp_fin_time(sk);
@@ -6123,6 +6126,11 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
 		if (sk->sk_shutdown & RCV_SHUTDOWN) {
 			if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
 			    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+				if (unlikely(tp->norst)) {
+					tcp_send_ack(sk);
+					goto discard;
+				}
+
 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
 				tcp_reset(sk);
 				return 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f867658b4b30..48a9d5351478 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -133,6 +133,20 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			return TCP_TW_SUCCESS;
 		}
 
+		if (tcptw->tw_norst) {
+			/* ack and discard new data */
+			tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+			if (tmp_opt.saw_tstamp) {
+				tcptw->tw_ts_recent_stamp = get_seconds();
+				tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;
+			}
+
+			if (th->fin) /* active remote close, we can die now */
+				inet_twsk_deschedule_put(tw);
+
+			return TCP_TW_ACK;
+		}
+
 		/* New data or FIN. If new data arrive after half-duplex close,
 		 * reset.
 		 */
@@ -272,6 +286,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 		tcptw->tw_ts_offset	= tp->tsoffset;
 		tcptw->tw_last_oow_ack_time = 0;
+		tcptw->tw_norst		= tp->norst;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
-- 
2.17.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ