lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <396556a20805301217k293e5718h6bbf02bfe069070@europa>
Date:	Tue, 12 Aug 2008 13:56:47 -0700
From:	"Adam Langley" <agl@...erialviolet.org>
To:	netdev@...r.kernel.org
Subject: [RFC] tcp: Add (limited) SYNACK payload support

This patch implements the draft spec:
http://www.ietf.org/internet-drafts/draft-agl-tcpm-sadata-01.txt

At the moment, this is just an [RFC] patch because an option number hasn't been
assigned by the IETF yet.

It allows listening sockets to be configured with a small (<= 64 bytes),
payload that is included in SYN/ACK packets elicited by SYN packets that
include a special option. See the draft linked to above for motivations.

Additionally, the listening socket can request that the kernel replace 8 bytes
of the payload with random data (that can later be read from the resulting
accepted socket).

The additional header material for the user interface is:

#define TCP_SADATA_MAX_PAYLOAD	64

/* Flags shared by both */
#define TCP_SADATA_REQUEST	(1 << 0)	/* Request sadata or */
/* Flags for setsockopt */
#define TCP_SADATA_INC_NONCE	(1 << 1)	/* Include nonce in payloads */
/* Flags for getsockopt */
#define TCP_SADATA_SENT		(1 << 2)	/* Was payload sent? */
#define TCP_SADATA_RCVD		(1 << 3)	/* Was payload received? */
#define TCP_SADATA_NONCE	(1 << 4)	/* Was an nonce sent? */

struct tcp_sadata {
	__u16	tcpsa_flags;		/* TCP_SADATA_*, above */
	__u8	tcpsa_payload_len;	/* Length of payload, in bytes */
	__u8	tcpsa_nonce_offset;	/* If INC_NONCE, it's offset in bytes */
	__u32	tcpsa_reserved;
	__u8	tcpsa_payload[TCP_SADATA_MAX_PAYLOAD];
};

A client socket (before connecting) is configured by a setsockopt with flags
equal to TCP_SADATA_REQUEST.

After connecting, a getsockopt will reveal:
  TCP_SADATA_RCVD - SYN/ACK payload received, use recv/read etc to get it
  TCP_SADATA_REQUEST is false - the kernel decided not to actually send the
    request. The kernel is free to do so, although this patch doesn't currently
    make use of it.

A listening socket is configured with a setsockopt with non-zero payload len
and, optionally, TCP_SADATA_INC_NONCE and tcpsa_nonce_offset if the kernel
should include random data.

On a resulting, accepted socket, a getsockopt reveals:
  TCP_SADATA_SENT - a SYN/ACK payload was sent
  TCP_SADATA_NONCE - the 8 random bytes generated are in tcpsa_payload
---

 include/linux/tcp.h      |   53 ++++++++++++++++++++++++++++++++-
 include/net/tcp.h        |   48 ++++++++++++++++++++++++++++++
 net/ipv4/Kconfig         |    9 ++++++
 net/ipv4/tcp.c           |   73 +++++++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_input.c     |   29 +++++++++++++++++-
 net/ipv4/tcp_ipv4.c      |   36 +++++++++++++++++++++++
 net/ipv4/tcp_minisocks.c |   18 +++++++++--
 net/ipv4/tcp_output.c    |   53 +++++++++++++++++++++++++++++++++
 net/ipv6/tcp_ipv6.c      |   30 +++++++++++++++++++
 9 files changed, 340 insertions(+), 9 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2e25573..af95ac0 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -96,6 +96,7 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
+#define TCP_SADATA		15	/* TCP SYNACK payloads */
 
 #define TCPI_OPT_TIMESTAMPS	1
 #define TCPI_OPT_SACK		2
@@ -170,6 +171,25 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
+#define TCP_SADATA_MAX_PAYLOAD	64
+
+/* Flags shared by both */
+#define TCP_SADATA_REQUEST	(1 << 0)	/* Request sadata or */
+/* Flags for setsockopt */
+#define TCP_SADATA_INC_NONCE	(1 << 1)	/* Include nonce in payloads */
+/* Flags for getsockopt */
+#define TCP_SADATA_SENT		(1 << 2)	/* Was payload sent? */
+#define TCP_SADATA_RCVD		(1 << 3)	/* Was payload received? */
+#define TCP_SADATA_NONCE	(1 << 4)	/* Was an nonce sent? */
+
+struct tcp_sadata {
+	__u16	tcpsa_flags;		/* TCP_SADATA_*, above */
+	__u8	tcpsa_payload_len;	/* Length of payload, in bytes */
+	__u8	tcpsa_nonce_offset;	/* If INC_NONCE, it's offset in bytes */
+	__u32	tcpsa_reserved;
+	__u8	tcpsa_payload[TCP_SADATA_MAX_PAYLOAD];
+};
+
 #ifdef __KERNEL__
 
 #include <linux/skbuff.h>
@@ -222,6 +242,9 @@ struct tcp_options_received {
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;  	/* mss requested by user in ioctl */
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	u8	sadata_ok;	/* OK to include data in the SYNACK?	*/
+#endif
 };
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
@@ -230,14 +253,28 @@ struct tcp_options_received {
  * only four options will fit in a standard TCP header */
 #define TCP_NUM_SACKS 4
 
+struct tcp_sadata_payload;
+
 struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
 	struct tcp_request_sock_ops	*af_specific;
 #endif
-	u32			 	rcv_isn;
-	u32			 	snt_isn;
+	u32				rcv_isn;
+	u32				snt_isn;
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	/* If sadata_ok is true then sadata_nonce contains valid random bytes.
+	 * This is the second half of the 8 byte nonce. The first is the
+	 * snt_isn in native byte order to save space.
+	 *
+	 * If sadata_ok is true then sadata_payload is non-NULL and this
+	 * object holds a reference to it (sadata_payload->kref)
+	 */
+	struct tcp_sadata_payload	*sadata_payload;
+	u8				sadata_nonce[4]; /* generated nonce */
+	u8				sadata_ok:1;   /* send sadata?    */
+#endif
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -411,6 +448,18 @@ struct tcp_sock {
 #endif
 
 	int			linger2;
+
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	union {
+		/* (maybe NULL) the current payload			      */
+		struct tcp_sadata_payload *p;
+		u8 nonce[8];	/* the generated nonce			      */
+	} sadata;
+	u8	sadata_is_nonce : 1,	/* sadata union contains nonce        */
+		sadata_sent : 1,	/* was the SYNACK data sent?          */
+		sadata_rcvd : 1,	/* did we see SYNACK payload data?    */
+		sadata_req : 1;		/* does userland want SYNACK payload? */
+#endif
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8983386..163f781 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
+#include <linux/kref.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -166,6 +167,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
+#define TCPOPT_SYNACK_PAYLOAD	255	/* Experimental option for now */
 
 /*
  *     TCP option lengths
@@ -176,6 +178,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERM      2
 #define TCPOLEN_TIMESTAMP      10
 #define TCPOLEN_MD5SIG         18
+#define TCPOLEN_SYNACK_PAYLOAD 2
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -186,6 +189,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_SACK_PERBLOCK		8
 #define TCPOLEN_MD5SIG_ALIGNED		20
 #define TCPOLEN_MSS_ALIGNED		4
+#define TCPOLEN_SYNACK_PAYLOAD_ALIGNED	4
 
 /* Flags in tp->nonagle */
 #define TCP_NAGLE_OFF		1	/* Nagle's algo is disabled */
@@ -331,7 +335,10 @@ extern void tcp_enter_quickack_mode(struct sock *sk);
 
 static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
- 	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+	rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	rx_opt->sadata_ok = 0;
+#endif
 }
 
 #define	TCP_ECN_OK		1
@@ -1402,4 +1409,43 @@ struct tcp_request_sock_ops {
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+/**
+ * struct tcp_sadata_payload - a SYN/ACK data payload
+ * @sadp_len: the length of the trailing data payload
+ * @sadp_nonce_off: the offset of the nonce in the payload, if any
+ * @sadp_inc_nonce: include nonce iff true
+ * @sadp_data: trailing payload data
+ *
+ * This structure contains a constant payload that is to be included in the
+ * payload of SYNACK packets when the SYN requests it.
+ *
+ * This structure is immutable (save for the reference counter) once created. A
+ * tcp_sock contains a pointer to the current one and this is cloned off to the
+ * request socks as they are generated.
+ */
+struct tcp_sadata_payload {
+	struct kref	kref;
+	u8		len;
+	u8		nonce_off : 6,
+			inc_nonce : 1;
+	u8		data[0];
+};
+
+static inline void tcp_sadata_payload_release(struct kref *kref)
+{
+	kfree(container_of(kref, struct tcp_sadata_payload, kref));
+}
+
+static inline int tcp_rsk_sadata_len(const struct tcp_request_sock *trsk)
+{
+	return trsk->sadata_ok ? trsk->sadata_payload->len : 0;
+}
+#else
+static inline int tcp_rsk_sadata_len(const struct tcp_request_sock *trsk)
+{
+	return 0;
+}
+#endif
+
 #endif	/* _TCP_H */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 591ea23..90e612b 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -630,5 +630,14 @@ config TCP_MD5SIG
 
 	  If unsure, say N.
 
+config TCP_SYNACK_PAYLOAD
+	bool "TCP: Enable payloads in SYNACK frames"
+	depends on EXPERIMENTAL
+	---help---
+	  This option enables an experimental, backwards compatible, extension
+	  to TCP where data can be included in the SYNACK frame of a handshake.
+
+	  If unsure, say N.
+
 source "net/ipv4/ipvs/Kconfig"
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1ab341e..f5e2eab 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1990,7 +1990,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	int val;
 	int err = 0;
 
-	/* This is a string value all the others are int's */
+	/* These are string values, all the others are int's */
 	if (optname == TCP_CONGESTION) {
 		char name[TCP_CA_NAME_MAX];
 
@@ -2008,6 +2008,55 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		release_sock(sk);
 		return err;
 	}
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	else if (optname == TCP_SADATA) {
+		struct tcp_sadata tcpsa;
+		struct tcp_sadata_payload *sadp;
+
+		if (optlen < sizeof(tcpsa))
+			return -EINVAL;
+		if (copy_from_user(&tcpsa, optval, sizeof(tcpsa)))
+			return -EFAULT;
+		if (tcpsa.tcpsa_payload_len > TCP_SADATA_MAX_PAYLOAD)
+			return -EINVAL;
+		if (tcpsa.tcpsa_flags & TCP_SADATA_INC_NONCE &&
+		    tcpsa.tcpsa_nonce_offset > TCP_SADATA_MAX_PAYLOAD - 8)
+			return -EINVAL;
+
+		lock_sock(sk);
+		tp->sadata_req = TCP_SADATA_REQUEST & tcpsa.tcpsa_flags ? 1 : 0;
+
+		if (tcpsa.tcpsa_payload_len == 0) {
+			if (!tp->sadata_is_nonce && tp->sadata.p) {
+				kref_put(&tp->sadata.p->kref,
+					 tcp_sadata_payload_release);
+				tp->sadata.p = NULL;
+			}
+		} else if ((sadp = kmalloc(sizeof(struct tcp_sadata_payload) +
+					   tcpsa.tcpsa_payload_len,
+					   GFP_ATOMIC))) {
+			if (unlikely(tp->sadata_is_nonce)) {
+				tp->sadata_is_nonce = 0;
+			} else if (unlikely(tp->sadata.p)) {
+				kref_put(&tp->sadata.p->kref,
+					 tcp_sadata_payload_release);
+			}
+			kref_init(&sadp->kref);
+			memcpy(sadp->data, tcpsa.tcpsa_payload,
+			       tcpsa.tcpsa_payload_len);
+			sadp->len = tcpsa.tcpsa_payload_len;
+			sadp->nonce_off = tcpsa.tcpsa_nonce_offset;
+			sadp->inc_nonce =
+				TCP_SADATA_INC_NONCE & tcpsa.tcpsa_flags ? 1:0;
+			tp->sadata.p = sadp;
+		} else {
+			err = -ENOMEM;
+		}
+
+		release_sock(sk);
+		return err;
+	}
+#endif
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
@@ -2269,6 +2318,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	if (get_user(len, optlen))
 		return -EFAULT;
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	/* This is a string argument, all the rest are ints */
+	if (optname == TCP_SADATA) {
+		struct tcp_sadata tcpsa;
+
+		if (len < sizeof(tcpsa))
+			return -EINVAL;
+		tcpsa.tcpsa_flags = (tp->sadata_sent ? TCP_SADATA_SENT : 0) |
+				    (tp->sadata_rcvd ? TCP_SADATA_RCVD : 0) |
+				    (tp->sadata_req ? TCP_SADATA_REQUEST : 0);
+		if (tp->sadata_is_nonce) {
+			tcpsa.tcpsa_flags |= TCP_SADATA_NONCE;
+			memcpy(tcpsa.tcpsa_payload, tp->sadata.nonce, 8);
+		}
+		if (copy_to_user(optval, &tcpsa, sizeof(tcpsa)))
+			return -EFAULT;
+		if (put_user(sizeof(tcpsa), optlen))
+			return -EFAULT;
+		return 0;
+	}
+#endif
+
 	len = min_t(unsigned int, len, sizeof(int));
 
 	if (len < 0)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 67ccce2..d76ad9b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3418,7 +3418,11 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					tcp_sack_reset(opt_rx);
 				}
 				break;
-
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+			case TCPOPT_SYNACK_PAYLOAD:
+				opt_rx->sadata_ok = 1;
+				break;
+#endif
 			case TCPOPT_SACK:
 				if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
 				   !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
@@ -4975,6 +4979,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int saved_clamp = tp->rx_opt.mss_clamp;
+	char queued = 0;
 
 	tcp_parse_options(skb, &tp->rx_opt, 0);
 
@@ -5073,6 +5078,22 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * Change state from SYN-SENT only after copied_seq
 		 * is initialized. */
 		tp->copied_seq = tp->rcv_nxt;
+
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+		if (skb->len > (th->doff << 2) && tp->sadata_req &&
+		    tp->rx_opt.sadata_ok) {
+			__skb_pull(skb, th->doff << 2);
+			__skb_queue_tail(&sk->sk_receive_queue, skb);
+			skb_set_owner_r(skb, sk);
+			sk->sk_data_ready(sk, 0);
+			tp->sadata_rcvd = 1;
+			queued = 1;
+			tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+			tp->rcv_wup = TCP_SKB_CB(skb)->end_seq;
+			tp->copied_seq = TCP_SKB_CB(skb)->seq + 1;
+		}
+#endif
+
 		smp_mb();
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
@@ -5124,11 +5145,15 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 						  TCP_DELACK_MAX, TCP_RTO_MAX);
 
 discard:
-			__kfree_skb(skb);
+			if (!queued)
+				__kfree_skb(skb);
 			return 0;
 		} else {
 			tcp_send_ack(sk);
 		}
+
+		if (queued)
+			return 0;
 		return -1;
 	}
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c1e93..677121c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -744,6 +744,13 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req)
  */
 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 {
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (tcp_rsk(req)->sadata_ok) {
+		kref_put(&tcp_rsk(req)->sadata_payload->kref,
+			 tcp_sadata_payload_release);
+	}
+#endif
+
 	kfree(inet_rsk(req)->opt);
 }
 
@@ -1302,6 +1309,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	tcp_rsk(req)->sadata_ok = tmp_opt.sadata_ok;
+	if (tmp_opt.sadata_ok) {
+		tcp_rsk(req)->sadata_payload = tcp_sk(sk)->sadata.p;
+		kref_get(&tcp_sk(sk)->sadata.p->kref);
+		get_random_bytes(&tcp_rsk(req)->sadata_nonce, 4);
+	}
+#endif
+
 	if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
 		goto drop_and_free;
 
@@ -1354,6 +1370,13 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->saddr	      = ireq->loc_addr;
 	newinet->opt	      = ireq->opt;
 	ireq->opt	      = NULL;
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (tcp_rsk(req)->sadata_ok) {
+		kref_put(&tcp_rsk(req)->sadata_payload->kref,
+			 tcp_sadata_payload_release);
+		tcp_rsk(req)->sadata_ok = 0;
+	}
+#endif
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
@@ -1792,6 +1815,12 @@ static int tcp_v4_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv4_specific;
 #endif
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	tp->sadata.p = NULL;
+	tp->sadata_sent = tp->sadata_rcvd = tp->sadata_req = 0;
+	tp->sadata_is_nonce = 0;
+#endif
+
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
@@ -1843,6 +1872,13 @@ void tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (!tp->sadata_is_nonce && tp->sadata.p) {
+		kref_put(&tp->sadata.p->kref, tcp_sadata_payload_release);
+		tp->sadata.p = NULL;
+	}
+#endif
+
 	atomic_dec(&tcp_sockets_allocated);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f976fc5..1e42355 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -394,7 +394,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp = tcp_sk(newsk);
 		newtp->pred_flags = 0;
 		newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
-		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
+		newtp->snd_sml = newtp->snd_una = newtp->snd_nxt =
+			treq->snt_isn + 1 + tcp_rsk_sadata_len(treq);
 
 		tcp_prequeue_init(newtp);
 
@@ -427,7 +428,17 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = treq->snt_isn + 1;
+		newtp->write_seq = treq->snt_isn + 1 + tcp_rsk_sadata_len(treq);
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+		newtp->sadata_sent = treq->sadata_ok;
+		if (treq->sadata_ok && treq->sadata_payload->inc_nonce) {
+			memcpy(newtp->sadata.nonce, &treq->snt_isn, 4);
+			memcpy(&newtp->sadata.nonce[4], &treq->sadata_nonce, 4);
+			newtp->sadata_is_nonce = 1;
+		} else {
+			newtp->sadata.p = NULL;
+		}
+#endif
 		newtp->pushed_seq = newtp->write_seq;
 
 		newtp->rx_opt.saw_tstamp = 0;
@@ -595,7 +606,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	 * Invalid ACK: reset will be sent by listening socket
 	 */
 	if ((flg & TCP_FLAG_ACK) &&
-	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1))
+	    (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn +
+					 1 + tcp_rsk_sadata_len(tcp_rsk(req))))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a00532d..1fb7f0a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -348,6 +348,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 #define OPTION_SACK_ADVERTISE	(1 << 0)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
+#define OPTION_SYNACK_PAYLOAD	(1 << 3)
 
 struct tcp_out_options {
 	u8 options;		/* bit field of OPTION_* */
@@ -430,6 +431,15 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 			tp->rx_opt.eff_sacks--;
 		}
 	}
+
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (unlikely(OPTION_SYNACK_PAYLOAD & opts->options)) {
+		*ptr++ = htonl((TCPOPT_NOP << 24) |
+			       (TCPOPT_NOP << 16) |
+			       (TCPOPT_SYNACK_PAYLOAD << 8) |
+			       TCPOLEN_SYNACK_PAYLOAD);
+	}
+#endif
 }
 
 static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
@@ -476,6 +486,14 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			size += TCPOLEN_SACKPERM_ALIGNED;
 	}
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (unlikely(tp->sadata_req &&
+	    size + TCPOLEN_SYNACK_PAYLOAD_ALIGNED <= MAX_TCP_OPTION_SPACE)) {
+		opts->options |= OPTION_SYNACK_PAYLOAD;
+		size += TCPOLEN_SYNACK_PAYLOAD_ALIGNED;
+	}
+#endif
+
 	return size;
 }
 
@@ -504,6 +522,25 @@ static unsigned tcp_synack_options(struct sock *sk,
 	   to be unnecessary. */
 	doing_ts = ireq->tstamp_ok && !(*md5 && ireq->sack_ok);
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (unlikely(tcp_rsk(req)->sadata_ok)) {
+		opts->options |= OPTION_SYNACK_PAYLOAD;
+		size += TCPOLEN_SYNACK_PAYLOAD_ALIGNED;
+
+		/* Consider the pessimal case: all options included. In this
+		 * case the options look like:
+		 *   MD5(20) + MSS(4) + WSCALE(4) + TS(12) + SADATAOK(4) > 40
+		 * Thus, when including both SADATAOK and MD5 we disable TS.
+		 * The reason is that we must be consistant across
+		 * retransmissions in our inclusion of SADATAOK. But a user
+		 * could configure an MD5 option between two retransmissions.
+		 * So, to be safe, we must disable TS rather than SADATAOK
+		 */
+		if (*md5)
+			doing_ts = 0;
+	}
+#endif
+
 	opts->mss = mss;
 	size += TCPOLEN_MSS_ALIGNED;
 
@@ -2285,6 +2322,22 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	 */
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
 			     TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (tcp_rsk(req)->sadata_ok) {
+		const struct tcp_sadata_payload *sadp =
+			tcp_rsk(req)->sadata_payload;
+		u8 *buf = skb_put(skb, sadp->len);
+		memcpy(buf, sadp->data, sadp->len);
+		if (sadp->inc_nonce &&
+		    sadp->len >= 8 + sadp->nonce_off) {
+			memcpy(buf + sadp->nonce_off,
+			       &tcp_rsk(req)->snt_isn, 4);
+			memcpy(buf + sadp->nonce_off + 4,
+			       tcp_rsk(req)->sadata_nonce, 4);
+		}
+		TCP_SKB_CB(skb)->end_seq += sadp->len;
+	}
+#endif
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5b90b36..e8387ea 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -532,6 +532,13 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
 	if (inet6_rsk(req)->pktopts)
 		kfree_skb(inet6_rsk(req)->pktopts);
+
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (tcp_rsk(req)->sadata_ok) {
+		kref_put(&tcp_rsk(req)->sadata_payload->kref,
+			 tcp_sadata_payload_release);
+	}
+#endif
 }
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1265,6 +1272,15 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 
 	tcp_rsk(req)->snt_isn = isn;
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	tcp_rsk(req)->sadata_ok = tmp_opt.sadata_ok;
+	if (tmp_opt.sadata_ok) {
+		tcp_rsk(req)->sadata_payload = tcp_sk(sk)->sadata.p;
+		kref_get(&tcp_sk(sk)->sadata.p->kref);
+		get_random_bytes(&tcp_rsk(req)->sadata_nonce, 4);
+	}
+#endif
+
 	security_inet_conn_request(sk, skb, req);
 
 	if (tcp_v6_send_synack(sk, req))
@@ -1451,6 +1467,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
 						     newnp->opt->opt_flen);
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	if (tcp_rsk(req)->sadata_ok) {
+		kref_put(&tcp_rsk(req)->sadata_payload->kref,
+			 tcp_sadata_payload_release);
+		tcp_rsk(req)->sadata_ok = 0;
+	}
+#endif
+
 	tcp_mtup_init(newsk);
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1894,6 +1918,12 @@ static int tcp_v6_init_sock(struct sock *sk)
 	tp->af_specific = &tcp_sock_ipv6_specific;
 #endif
 
+#ifdef CONFIG_TCP_SYNACK_PAYLOAD
+	tp->sadata.p = NULL;
+	tp->sadata_sent = tp->sadata_rcvd = tp->sadata_req = 0;
+	tp->sadata_is_nonce = 0;
+#endif
+
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ