lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-id: <20180201000716.69301-6-cpaasch@apple.com>
Date:   Wed, 31 Jan 2018 16:07:07 -0800
From:   Christoph Paasch <cpaasch@...le.com>
To:     netdev@...r.kernel.org
Cc:     Eric Dumazet <edumazet@...gle.com>,
        Mat Martineau <mathew.j.martineau@...ux.intel.com>
Subject: [RFC v2 05/14] tcp: Register handlers for extra TCP options

From: Mat Martineau <mathew.j.martineau@...ux.intel.com>

Allow additional TCP options to be handled by registered hook
functions.

Registered options have a priority that determines the order in which
options are prepared and written. Lower priority numbers are handled
first.

Option parsing will call the provided 'parse' function when a TCP option
number is not recognized by the normal option parsing code.

After parsing, there are two places where we post-process the options.
First, a 'check' callback that allows to drop the packet based on the
parsed options (e.g., useful for TCP MD5SIG). Then, a 'post_process'
function that gets called after other validity checks (aka, in-window,
PAWS,...). This post_process function can then update other state for
this particular extra-option.

In the output-path, the 'prepare' function determines the required space
for registered options and store associated data. 'write' adds the option
to the TCP header.

These additional TCP-options are stored in hlists of the TCP-socket. To
pass the state and options around during the 3-way handshake and in
time-wait state, the hlists are also on the tcp_request_sock and
tcp_timewait_sock.
The list is copied from the listener to the request-socket (calling into
the 'copy' callback). Then, moved from the request-socket to the
TCP-socket and finally to the time-wait socket.

Signed-off-by: Mat Martineau <mathew.j.martineau@...ux.intel.com>
Signed-off-by: Christoph Paasch <cpaasch@...le.com>
---

Notes:
    v2: * Fix a compiler error in tcp_twsk_destructor when TCP_MD5SIG is disabled

 drivers/infiniband/hw/cxgb4/cm.c |   2 +-
 include/linux/tcp.h              |  28 ++++
 include/net/tcp.h                | 110 ++++++++++++-
 net/ipv4/syncookies.c            |   6 +-
 net/ipv4/tcp.c                   | 327 ++++++++++++++++++++++++++++++++++++++-
 net/ipv4/tcp_input.c             |  49 +++++-
 net/ipv4/tcp_ipv4.c              |  98 +++++++++---
 net/ipv4/tcp_minisocks.c         |  34 +++-
 net/ipv4/tcp_output.c            |  40 ++---
 net/ipv6/syncookies.c            |   6 +-
 net/ipv6/tcp_ipv6.c              |  32 ++++
 11 files changed, 677 insertions(+), 55 deletions(-)

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 21db3b48a617..a1ea5583f07b 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3746,7 +3746,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
 	 */
 	memset(&tmp_opt, 0, sizeof(tmp_opt));
 	tcp_clear_options(&tmp_opt);
-	tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
+	tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL, NULL);
 
 	req = __skb_push(skb, sizeof(*req));
 	memset(req, 0, sizeof(*req));
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8f4c54986f97..6e1f0f29bf24 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -115,6 +115,24 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 #endif
 }
 
+#define OPTION_SACK_ADVERTISE	(1 << 0)
+#define OPTION_TS		(1 << 1)
+#define OPTION_MD5		(1 << 2)
+#define OPTION_WSCALE		(1 << 3)
+#define OPTION_FAST_OPEN_COOKIE	(1 << 8)
+#define OPTION_SMC		(1 << 9)
+
+struct tcp_out_options {
+	u16 options;		/* bit field of OPTION_* */
+	u16 mss;		/* 0 to disable */
+	u8 ws;			/* window scale, 0 to disable */
+	u8 num_sack_blocks;	/* number of SACK blocks to include */
+	u8 hash_size;		/* bytes in hash_location */
+	__u8 *hash_location;	/* temporary pointer, overloaded */
+	__u32 tsval, tsecr;	/* need to include OPTION_TS */
+	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
+};
+
 /* This is the max number of SACKS that we'll generate and process. It's safe
  * to increase this, although since:
  *   size = TCPOLEN_SACK_BASE_ALIGNED (4) + n * TCPOLEN_SACK_PERBLOCK (8)
@@ -137,6 +155,7 @@ struct tcp_request_sock {
 						  * FastOpen it's the seq#
 						  * after data-in-SYN.
 						  */
+	struct hlist_head		tcp_option_list;
 };
 
 static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
@@ -384,6 +403,8 @@ struct tcp_sock {
 	 */
 	struct request_sock *fastopen_rsk;
 	u32	*saved_syn;
+
+	struct hlist_head tcp_option_list;
 };
 
 enum tsq_enum {
@@ -411,6 +432,11 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk)
 	return (struct tcp_sock *)sk;
 }
 
+static inline struct sock *tcp_to_sk(const struct tcp_sock *tp)
+{
+	return (struct sock *)tp;
+}
+
 struct tcp_timewait_sock {
 	struct inet_timewait_sock tw_sk;
 #define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt
@@ -423,6 +449,8 @@ struct tcp_timewait_sock {
 	u32			  tw_last_oow_ack_time;
 
 	long			  tw_ts_recent_stamp;
+
+	struct hlist_head	  tcp_option_list;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
 #endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 093e967a2960..be6709e380a6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -202,6 +202,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOLEN_FASTOPEN_BASE  2
 #define TCPOLEN_EXP_FASTOPEN_BASE  4
 #define TCPOLEN_EXP_SMC_BASE   6
+#define TCPOLEN_EXP_BASE       6
 
 /* But this is what stacks really send out. */
 #define TCPOLEN_TSTAMP_ALIGNED		12
@@ -403,7 +404,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 		int flags, int *addr_len);
 void tcp_parse_options(const struct net *net, const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx,
-		       int estab, struct tcp_fastopen_cookie *foc);
+		       int estab, struct tcp_fastopen_cookie *foc,
+		       struct sock *sk);
 const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
 /*
@@ -2094,4 +2096,110 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
 #if IS_ENABLED(CONFIG_SMC)
 extern struct static_key_false tcp_have_smc;
 #endif
+
+struct tcp_extopt_store;
+
+struct tcp_extopt_ops {
+	u32			option_kind;
+	unsigned char		priority;
+	void (*parse)(int opsize, const unsigned char *opptr,
+		      const struct sk_buff *skb,
+		      struct tcp_options_received *opt_rx,
+		      struct sock *sk,
+		      struct tcp_extopt_store *store);
+	bool (*check)(struct sock *sk,
+		      const struct sk_buff *skb,
+		      struct tcp_options_received *opt_rx,
+		      struct tcp_extopt_store *store);
+	void (*post_process)(struct sock *sk,
+			     struct tcp_options_received *opt_rx,
+			     struct tcp_extopt_store *store);
+	/* Return the number of bytes consumed */
+	unsigned int (*prepare)(struct sk_buff *skb, u8 flags,
+				unsigned int remaining,
+				struct tcp_out_options *opts,
+				const struct sock *sk,
+				struct tcp_extopt_store *store);
+	__be32 *(*write)(__be32 *ptr, struct sk_buff *skb,
+			 struct tcp_out_options *opts, struct sock *sk,
+			 struct tcp_extopt_store *store);
+	int (*response_prepare)(struct sk_buff *orig, u8 flags,
+				unsigned int remaining,
+				struct tcp_out_options *opts,
+				const struct sock *sk,
+				struct tcp_extopt_store *store);
+	__be32 *(*response_write)(__be32 *ptr, struct sk_buff *orig,
+				  struct tcphdr *th,
+				  struct tcp_out_options *opts,
+				  const struct sock *sk,
+				  struct tcp_extopt_store *store);
+	int (*add_header_len)(const struct sock *orig,
+			      const struct sock *sk,
+			      struct tcp_extopt_store *store);
+	struct tcp_extopt_store *(*copy)(struct sock *listener,
+					 struct request_sock *req,
+					 struct tcp_options_received *opt,
+					 struct tcp_extopt_store *from);
+	struct tcp_extopt_store *(*move)(struct sock *from, struct sock *to,
+					 struct tcp_extopt_store *store);
+	void (*destroy)(struct tcp_extopt_store *store);
+	struct module		*owner;
+};
+
+/* The tcp_extopt_store is the generic structure that will be added to the
+ * list of TCP extra-options.
+ *
+ * Protocols using the framework can create a wrapper structure around it that
+ * stores protocol-specific state. The tcp_extopt-functions will provide
+ * tcp_extopt_store though, so the protocol can use container_of to get
+ * access to the wrapper structure containing the state.
+ */
+struct tcp_extopt_store {
+	struct hlist_node		list;
+	const struct tcp_extopt_ops	*ops;
+};
+
+struct hlist_head *tcp_extopt_get_list(const struct sock *sk);
+
+struct tcp_extopt_store *tcp_extopt_find_kind(u32 kind, const struct sock *sk);
+
+void tcp_extopt_parse(u32 opcode, int opsize, const unsigned char *opptr,
+		      const struct sk_buff *skb,
+		      struct tcp_options_received *opt_rx, struct sock *sk);
+
+bool tcp_extopt_check(struct sock *sk, const struct sk_buff *skb,
+		      struct tcp_options_received *opt_rx);
+
+void tcp_extopt_post_process(struct sock *sk,
+			     struct tcp_options_received *opt_rx);
+
+unsigned int tcp_extopt_prepare(struct sk_buff *skb, u8 flags,
+				unsigned int remaining,
+				struct tcp_out_options *opts,
+				const struct sock *sk);
+
+void tcp_extopt_write(__be32 *ptr, struct sk_buff *skb,
+		      struct tcp_out_options *opts, struct sock *sk);
+
+int tcp_extopt_response_prepare(struct sk_buff *orig, u8 flags,
+				unsigned int remaining,
+				struct tcp_out_options *opts,
+				const struct sock *sk);
+
+void tcp_extopt_response_write(__be32 *ptr, struct sk_buff *orig,
+			       struct tcphdr *th, struct tcp_out_options *opts,
+			       const struct sock *sk);
+
+int tcp_extopt_add_header(const struct sock *orig, const struct sock *sk);
+
+/* Socket lock must be held when calling this function */
+int tcp_register_extopt(struct tcp_extopt_store *store, struct sock *sk);
+
+void tcp_extopt_copy(struct sock *listener, struct request_sock *req,
+		     struct tcp_options_received *opt);
+
+void tcp_extopt_move(struct sock *from, struct sock *to);
+
+void tcp_extopt_destroy(struct sock *sk);
+
 #endif	/* _TCP_H */
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index fda37f2862c9..8373abf19440 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -313,7 +313,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, sk);
 
 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
 		tsoff = secure_tcp_ts_off(sock_net(sk),
@@ -325,6 +325,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
 	if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
 		goto out;
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+	    tcp_extopt_check(sk, skb, &tcp_opt))
+		goto out;
+
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
 	if (!req)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 874c9317b8df..ffb5f4fbd935 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -414,6 +414,7 @@ void tcp_init_sock(struct sock *sk)
 	tcp_init_xmit_timers(sk);
 	INIT_LIST_HEAD(&tp->tsq_node);
 	INIT_LIST_HEAD(&tp->tsorted_sent_queue);
+	INIT_HLIST_HEAD(&tp->tcp_option_list);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -3506,6 +3507,331 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
+struct hlist_head *tcp_extopt_get_list(const struct sock *sk)
+{
+	if (sk_fullsock(sk))
+		return &tcp_sk(sk)->tcp_option_list;
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		return &tcp_rsk(inet_reqsk(sk))->tcp_option_list;
+	else if (sk->sk_state == TCP_TIME_WAIT)
+		return &tcp_twsk(sk)->tcp_option_list;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_get_list);
+
+/* Caller must ensure that rcu is locked */
+struct tcp_extopt_store *tcp_extopt_find_kind(u32 kind, const struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (entry->ops->option_kind == kind)
+			return entry;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_find_kind);
+
+void tcp_extopt_parse(u32 opcode, int opsize, const unsigned char *opptr,
+		      const struct sk_buff *skb,
+		      struct tcp_options_received *opt_rx, struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+
+	rcu_read_lock();
+	entry = tcp_extopt_find_kind(opcode, sk);
+
+	if (entry && entry->ops->parse)
+		entry->ops->parse(opsize, opptr, skb, opt_rx, sk, entry);
+	rcu_read_unlock();
+}
+
+bool tcp_extopt_check(struct sock *sk, const struct sk_buff *skb,
+		      struct tcp_options_received *opt_rx)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+	bool drop = false;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		bool ret = false;
+
+		if (entry->ops->check)
+			ret = entry->ops->check(sk, skb, opt_rx, entry);
+
+		if (ret)
+			drop = true;
+	}
+	rcu_read_unlock();
+
+	return drop;
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_check);
+
+void tcp_extopt_post_process(struct sock *sk,
+			     struct tcp_options_received *opt_rx)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (entry->ops->post_process)
+			entry->ops->post_process(sk, opt_rx, entry);
+	}
+	rcu_read_unlock();
+}
+
+unsigned int tcp_extopt_prepare(struct sk_buff *skb, u8 flags,
+				unsigned int remaining,
+				struct tcp_out_options *opts,
+				const struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+	unsigned int used = 0;
+
+	if (!sk)
+		return 0;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (unlikely(!entry->ops->prepare))
+			continue;
+
+		used += entry->ops->prepare(skb, flags, remaining - used, opts,
+					    sk, entry);
+	}
+	rcu_read_unlock();
+
+	return roundup(used, 4);
+}
+
+void tcp_extopt_write(__be32 *ptr, struct sk_buff *skb,
+		      struct tcp_out_options *opts, struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+
+	if (!sk)
+		return;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (unlikely(!entry->ops->write))
+			continue;
+
+		ptr = entry->ops->write(ptr, skb, opts, sk, entry);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_write);
+
+int tcp_extopt_response_prepare(struct sk_buff *orig, u8 flags,
+				unsigned int remaining,
+				struct tcp_out_options *opts,
+				const struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+	unsigned int used = 0;
+
+	if (!sk)
+		return 0;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		int ret;
+
+		if (unlikely(!entry->ops->response_prepare))
+			continue;
+
+		ret = entry->ops->response_prepare(orig, flags,
+						   remaining - used, opts,
+						   sk, entry);
+
+		used += ret;
+	}
+	rcu_read_unlock();
+
+	return roundup(used, 4);
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_response_prepare);
+
+void tcp_extopt_response_write(__be32 *ptr, struct sk_buff *orig,
+			       struct tcphdr *th, struct tcp_out_options *opts,
+			       const struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+
+	if (!sk)
+		return;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (unlikely(!entry->ops->response_write))
+			continue;
+
+		ptr = entry->ops->response_write(ptr, orig, th, opts, sk, entry);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_response_write);
+
+int tcp_extopt_add_header(const struct sock *orig, const struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+	int tcp_header_len = 0;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (unlikely(!entry->ops->add_header_len))
+			continue;
+
+		tcp_header_len += entry->ops->add_header_len(orig, sk, entry);
+	}
+	rcu_read_unlock();
+
+	return tcp_header_len;
+}
+
+/* Socket lock must be held when calling this function */
+int tcp_register_extopt(struct tcp_extopt_store *store, struct sock *sk)
+{
+	struct hlist_node *add_before = NULL;
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+	int ret = 0;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	if (!store->ops->option_kind)
+		return -EINVAL;
+
+	if (!try_module_get(store->ops->owner))
+		return -ENOENT;
+
+	hlist_for_each_entry_rcu(entry, lhead, list) {
+		if (entry->ops->option_kind == store->ops->option_kind) {
+			pr_notice("Option kind %u already registered\n",
+				  store->ops->option_kind);
+			module_put(store->ops->owner);
+			return -EEXIST;
+		}
+
+		if (entry->ops->priority <= store->ops->priority)
+			add_before = &entry->list;
+	}
+
+	if (add_before)
+		hlist_add_behind_rcu(&store->list, add_before);
+	else
+		hlist_add_head_rcu(&store->list, lhead);
+
+	pr_debug("Option kind %u registered\n", store->ops->option_kind);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_register_extopt);
+
+void tcp_extopt_copy(struct sock *listener, struct request_sock *req,
+		     struct tcp_options_received *opt)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *from, *to;
+
+	from = tcp_extopt_get_list(listener);
+	to = tcp_extopt_get_list(req_to_sk(req));
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(entry, from, list) {
+		struct tcp_extopt_store *new;
+
+		if (!try_module_get(entry->ops->owner)) {
+			pr_err("%s Module get failed while copying\n", __func__);
+			continue;
+		}
+
+		new = entry->ops->copy(listener, req, opt, entry);
+		if (!new) {
+			module_put(entry->ops->owner);
+			continue;
+		}
+
+		hlist_add_tail_rcu(&new->list, to);
+	}
+	rcu_read_unlock();
+}
+
+void tcp_extopt_move(struct sock *from, struct sock *to)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lfrom, *lto;
+	struct hlist_node *tmp;
+
+	lfrom = tcp_extopt_get_list(from);
+	lto = tcp_extopt_get_list(to);
+
+	rcu_read_lock();
+	hlist_for_each_entry_safe(entry, tmp, lfrom, list) {
+		hlist_del_rcu(&entry->list);
+
+		if (entry->ops->move) {
+			entry = entry->ops->move(from, to, entry);
+			if (!entry)
+				continue;
+		}
+
+		hlist_add_tail_rcu(&entry->list, lto);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_move);
+
+void tcp_extopt_destroy(struct sock *sk)
+{
+	struct tcp_extopt_store *entry;
+	struct hlist_head *lhead;
+	struct hlist_node *tmp;
+
+	lhead = tcp_extopt_get_list(sk);
+
+	rcu_read_lock();
+	hlist_for_each_entry_safe(entry, tmp, lhead, list) {
+		struct module *owner = entry->ops->owner;
+
+		hlist_del_rcu(&entry->list);
+
+		entry->ops->destroy(entry);
+
+		module_put(owner);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(tcp_extopt_destroy);
+
 void tcp_done(struct sock *sk)
 {
 	struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3655,7 +3981,6 @@ void __init tcp_init(void)
 		INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
 	}
 
-
 	cnt = tcp_hashinfo.ehash_mask + 1;
 	sysctl_tcp_max_orphans = cnt / 2;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 94ba88b2246b..187e3fa761c8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3716,7 +3716,7 @@ static int smc_parse_options(const struct tcphdr *th,
 void tcp_parse_options(const struct net *net,
 		       const struct sk_buff *skb,
 		       struct tcp_options_received *opt_rx, int estab,
-		       struct tcp_fastopen_cookie *foc)
+		       struct tcp_fastopen_cookie *foc, struct sock *sk)
 {
 	const unsigned char *ptr;
 	const struct tcphdr *th = tcp_hdr(skb);
@@ -3816,9 +3816,18 @@ void tcp_parse_options(const struct net *net,
 					tcp_parse_fastopen_option(opsize -
 						TCPOLEN_EXP_FASTOPEN_BASE,
 						ptr + 2, th->syn, foc, true);
-				else
-					smc_parse_options(th, opt_rx, ptr,
-							  opsize);
+				else if (smc_parse_options(th, opt_rx, ptr,
+							   opsize))
+					break;
+				else if (opsize >= TCPOLEN_EXP_BASE)
+					tcp_extopt_parse(get_unaligned_be32(ptr),
+							 opsize, ptr, skb,
+							 opt_rx, sk);
+				break;
+
+			default:
+				tcp_extopt_parse(opcode, opsize, ptr, skb,
+						 opt_rx, sk);
 				break;
 
 			}
@@ -3869,11 +3878,13 @@ static bool tcp_fast_parse_options(const struct net *net,
 			goto extra_opt_check;
 	}
 
-	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL);
+	tcp_parse_options(net, skb, &tp->rx_opt, 1, NULL, tcp_to_sk(tp));
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
 extra_opt_check:
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+		return tcp_extopt_check(tcp_to_sk(tp), skb, &tp->rx_opt);
 	return false;
 }
 
@@ -5350,6 +5361,9 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 	tp->rx_opt.saw_tstamp = 0;
 
+	if (!hlist_empty(&tp->tcp_option_list))
+		goto slow_path;
+
 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
 	 *	if header_prediction is to be made
 	 *	'S' will always be tp->tcp_header_len >> 2
@@ -5537,7 +5551,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
 		tcp_clear_options(&opt);
 		opt.user_mss = opt.mss_clamp = 0;
-		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL);
+		tcp_parse_options(sock_net(sk), synack, &opt, 0, NULL, sk);
 		mss = opt.mss_clamp;
 	}
 
@@ -5600,10 +5614,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 	int saved_clamp = tp->rx_opt.mss_clamp;
 	bool fastopen_fail;
 
-	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc);
+	tcp_parse_options(sock_net(sk), skb, &tp->rx_opt, 0, &foc, sk);
 	if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+	    tcp_extopt_check(sk, skb, &tp->rx_opt))
+		goto discard;
+
 	if (th->ack) {
 		/* rfc793:
 		 * "If the state is SYN-SENT then
@@ -5686,6 +5704,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			tp->tcp_header_len = sizeof(struct tcphdr);
 		}
 
+		if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+			tcp_extopt_post_process(sk, &tp->rx_opt);
+
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
@@ -5779,6 +5800,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
 		tcp_ecn_rcv_syn(tp, th);
 
+		if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+			tcp_extopt_post_process(sk, &tp->rx_opt);
+
 		tcp_mtup_init(sk);
 		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
@@ -6262,12 +6286,17 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 
 	tcp_rsk(req)->af_specific = af_ops;
 	tcp_rsk(req)->ts_off = 0;
+	INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
 
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = af_ops->mss_clamp;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
 	tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0,
-			  want_cookie ? NULL : &foc);
+			  want_cookie ? NULL : &foc, sk);
+
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+	    tcp_extopt_check(sk, skb, &tmp_opt))
+		goto drop_and_free;
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -6328,6 +6357,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		tcp_reqsk_record_syn(sk, req, skb);
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
 	}
+
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+		tcp_extopt_copy(sk, req, &tmp_opt);
+
 	if (fastopen_sk) {
 		af_ops->send_synack(fastopen_sk, dst, &fl, req,
 				    &foc, TCP_SYNACK_FASTOPEN);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 95738aa0d8a6..4112594d04be 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -600,10 +600,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
-#ifdef CONFIG_TCP_MD5SIG
-		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
-#endif
+		__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
 	} rep;
+	struct hlist_head *extopt_list = NULL;
 	struct ip_reply_arg arg;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key *key = NULL;
@@ -613,6 +612,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 	struct sock *sk1 = NULL;
 #endif
 	struct net *net;
+	int offset = 0;
 
 	/* Never send a reset in response to a reset. */
 	if (th->rst)
@@ -624,6 +624,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 	if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
 		return;
 
+	if (sk)
+		extopt_list = tcp_extopt_get_list(sk);
+
 	/* Swap the send and the receive. */
 	memset(&rep, 0, sizeof(rep));
 	rep.th.dest   = th->source;
@@ -678,19 +681,44 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
 			goto out;
 
 	}
+#endif
+
+	if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+		unsigned int remaining;
+		struct tcp_out_options opts;
+		int used;
 
+		remaining = sizeof(rep.opt);
+#ifdef CONFIG_TCP_MD5SIG
+		if (key)
+			remaining -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+		memset(&opts, 0, sizeof(opts));
+
+		used = tcp_extopt_response_prepare(skb, TCPHDR_RST, remaining,
+						   &opts, sk);
+
+		arg.iov[0].iov_len += used;
+		rep.th.doff = arg.iov[0].iov_len / 4;
+
+		tcp_extopt_response_write(&rep.opt[0], skb, &rep.th, &opts, sk);
+		offset += used / 4;
+	}
+
+#ifdef CONFIG_TCP_MD5SIG
 	if (key) {
-		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
-				   (TCPOPT_NOP << 16) |
-				   (TCPOPT_MD5SIG << 8) |
-				   TCPOLEN_MD5SIG);
+		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
+					  (TCPOPT_NOP << 16) |
+					  (TCPOPT_MD5SIG << 8) |
+					  TCPOLEN_MD5SIG);
 		/* Update length and the length the header thinks exists */
 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
 		rep.th.doff = arg.iov[0].iov_len / 4;
 
-		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
-				     key, ip_hdr(skb)->saddr,
-				     ip_hdr(skb)->daddr, &rep.th);
+		tcp_v4_md5_hash_hdr((__u8 *)&rep.opt[offset],
+				    key, ip_hdr(skb)->saddr,
+				    ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
@@ -742,14 +770,14 @@ static void tcp_v4_send_ack(const struct sock *sk,
 	const struct tcphdr *th = tcp_hdr(skb);
 	struct {
 		struct tcphdr th;
-		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
-#ifdef CONFIG_TCP_MD5SIG
-			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
-#endif
-			];
+		__be32 opt[(MAX_TCP_OPTION_SPACE >> 2)];
 	} rep;
+	struct hlist_head *extopt_list = NULL;
 	struct net *net = sock_net(sk);
 	struct ip_reply_arg arg;
+	int offset = 0;
+
+	extopt_list = tcp_extopt_get_list(sk);
 
 	memset(&rep.th, 0, sizeof(struct tcphdr));
 	memset(&arg, 0, sizeof(arg));
@@ -763,6 +791,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
 		rep.opt[1] = htonl(tsval);
 		rep.opt[2] = htonl(tsecr);
 		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
+		offset += 3;
 	}
 
 	/* Swap the send and the receive. */
@@ -774,22 +803,45 @@ static void tcp_v4_send_ack(const struct sock *sk,
 	rep.th.ack     = 1;
 	rep.th.window  = htons(win);
 
+	if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+		unsigned int remaining;
+		struct tcp_out_options opts;
+		int used;
+
+		remaining = sizeof(rep.th) + sizeof(rep.opt) - arg.iov[0].iov_len;
+
 #ifdef CONFIG_TCP_MD5SIG
-	if (key) {
-		int offset = (tsecr) ? 3 : 0;
+		if (key)
+			remaining -= TCPOLEN_MD5SIG_ALIGNED;
+#endif
+
+		memset(&opts, 0, sizeof(opts));
+		used = tcp_extopt_response_prepare(skb, TCPHDR_ACK, remaining,
+						   &opts, sk);
+
+		arg.iov[0].iov_len += used;
+		rep.th.doff = arg.iov[0].iov_len / 4;
 
+		tcp_extopt_response_write(&rep.opt[offset], skb, &rep.th, &opts, sk);
+
+		offset += used / 4;
+	}
+
+#ifdef CONFIG_TCP_MD5SIG
+	if (key) {
 		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
 					  (TCPOPT_NOP << 16) |
 					  (TCPOPT_MD5SIG << 8) |
 					  TCPOLEN_MD5SIG);
 		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
-		rep.th.doff = arg.iov[0].iov_len/4;
+		rep.th.doff = arg.iov[0].iov_len / 4;
 
 		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
 				    key, ip_hdr(skb)->saddr,
 				    ip_hdr(skb)->daddr, &rep.th);
 	}
 #endif
+
 	arg.flags = reply_flags;
 	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
 				      ip_hdr(skb)->saddr, /* XXX */
@@ -893,6 +945,9 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
  */
 static void tcp_v4_reqsk_destructor(struct request_sock *req)
 {
+	if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+		tcp_extopt_destroy(req_to_sk(req));
+
 	kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
 }
 
@@ -1410,6 +1465,11 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 	if (likely(*own_req)) {
 		tcp_move_syn(newtp, req);
 		ireq->ireq_opt = NULL;
+
+		if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list))) {
+			tcp_extopt_move(req_to_sk(req), newsk);
+			INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+		}
 	} else {
 		newinet->inet_opt = NULL;
 	}
@@ -1907,6 +1967,8 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	/* Cleans up our, hopefully empty, out_of_order_queue. */
 	skb_rbtree_purge(&tp->out_of_order_queue);
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+		tcp_extopt_destroy(sk);
 #ifdef CONFIG_TCP_MD5SIG
 	/* Clean up the MD5 key list, if any */
 	if (tp->md5sig_info) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index a8384b0c11f8..46eb5a33aec1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -95,9 +95,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	bool paws_reject = false;
 
-	tmp_opt.saw_tstamp = 0;
+	tcp_clear_options(&tmp_opt);
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL);
+		tcp_parse_options(twsk_net(tw), skb, &tmp_opt, 0, NULL,
+				  (struct sock *)tw);
 
 		if (tmp_opt.saw_tstamp) {
 			if (tmp_opt.rcv_tsecr)
@@ -108,6 +109,10 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		}
 	}
 
+	if (unlikely(!hlist_empty(&tcptw->tcp_option_list)) &&
+	    tcp_extopt_check((struct sock *)tw, skb, &tmp_opt))
+		return TCP_TW_SUCCESS;
+
 	if (tw->tw_substate == TCP_FIN_WAIT2) {
 		/* Just repeat all the checks of tcp_rcv_state_process() */
 
@@ -251,7 +256,7 @@ EXPORT_SYMBOL(tcp_timewait_state_process);
 void tcp_time_wait(struct sock *sk, int state, int timeo)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	const struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_timewait_sock *tw;
 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
 
@@ -271,6 +276,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 		tcptw->tw_ts_offset	= tp->tsoffset;
 		tcptw->tw_last_oow_ack_time = 0;
+		INIT_HLIST_HEAD(&tcptw->tcp_option_list);
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
@@ -284,6 +290,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		}
 #endif
 
+		if (unlikely(!hlist_empty(&tp->tcp_option_list))) {
+			tcp_extopt_move(sk, (struct sock *)tw);
+			INIT_HLIST_HEAD(&tp->tcp_option_list);
+		}
 #ifdef CONFIG_TCP_MD5SIG
 		/*
 		 * The timewait bucket does not have the key DB from the
@@ -335,12 +345,15 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 
 void tcp_twsk_destructor(struct sock *sk)
 {
-#ifdef CONFIG_TCP_MD5SIG
 	struct tcp_timewait_sock *twsk = tcp_twsk(sk);
 
+#ifdef CONFIG_TCP_MD5SIG
 	if (twsk->tw_md5_key)
 		kfree_rcu(twsk->tw_md5_key, rcu);
 #endif
+
+	if (unlikely(!hlist_empty(&twsk->tcp_option_list)))
+		tcp_extopt_destroy(sk);
 }
 EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
 
@@ -470,6 +483,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 
 		INIT_LIST_HEAD(&newtp->tsq_node);
 		INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+		INIT_HLIST_HEAD(&newtp->tcp_option_list);
 
 		tcp_init_wl(newtp, treq->rcv_isn);
 
@@ -545,6 +559,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
 		if (newtp->af_specific->md5_lookup(sk, newsk))
 			newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
+		if (unlikely(!hlist_empty(&treq->tcp_option_list)))
+			newtp->tcp_header_len += tcp_extopt_add_header(req_to_sk(req), newsk);
+
 		if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
 			newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
 		newtp->rx_opt.mss_clamp = req->mss;
@@ -587,9 +604,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	bool paws_reject = false;
 	bool own_req;
 
-	tmp_opt.saw_tstamp = 0;
+	tcp_clear_options(&tmp_opt);
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
-		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+		tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL,
+				  req_to_sk(req));
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
@@ -604,6 +622,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		}
 	}
 
+	if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)) &&
+	    tcp_extopt_check(req_to_sk(req), skb, &tmp_opt))
+		return NULL;
+
 	/* Check for pure retransmitted SYN. */
 	if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn &&
 	    flg == TCP_FLAG_SYN &&
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e598bf54e3fb..6d418ce06b59 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -398,13 +398,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 	return tp->snd_una != tp->snd_up;
 }
 
-#define OPTION_SACK_ADVERTISE	(1 << 0)
-#define OPTION_TS		(1 << 1)
-#define OPTION_MD5		(1 << 2)
-#define OPTION_WSCALE		(1 << 3)
-#define OPTION_FAST_OPEN_COOKIE	(1 << 8)
-#define OPTION_SMC		(1 << 9)
-
 static void smc_options_write(__be32 *ptr, u16 *options)
 {
 #if IS_ENABLED(CONFIG_SMC)
@@ -420,17 +413,6 @@ static void smc_options_write(__be32 *ptr, u16 *options)
 #endif
 }
 
-struct tcp_out_options {
-	u16 options;		/* bit field of OPTION_* */
-	u16 mss;		/* 0 to disable */
-	u8 ws;			/* window scale, 0 to disable */
-	u8 num_sack_blocks;	/* number of SACK blocks to include */
-	u8 hash_size;		/* bytes in hash_location */
-	__u8 *hash_location;	/* temporary pointer, overloaded */
-	__u32 tsval, tsecr;	/* need to include OPTION_TS */
-	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
-};
-
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -447,12 +429,15 @@ struct tcp_out_options {
 static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
 			      struct tcp_out_options *opts)
 {
+	struct hlist_head *extopt_list;
 	u16 options = opts->options;	/* mungable copy */
 	struct tcp_sock *tp = NULL;
 
 	if (sk_fullsock(sk))
 		tp = tcp_sk(sk);
 
+	extopt_list = tcp_extopt_get_list(sk);
+
 	if (unlikely(OPTION_MD5 & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 			       (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
@@ -543,6 +528,9 @@ static void tcp_options_write(__be32 *ptr, struct sk_buff *skb, struct sock *sk,
 	}
 
 	smc_options_write(ptr, &options);
+
+	if (unlikely(!hlist_empty(extopt_list)))
+		tcp_extopt_write(ptr, skb, opts, sk);
 }
 
 static void smc_set_option(const struct tcp_sock *tp,
@@ -645,6 +633,10 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 
 	smc_set_option(tp, opts, &remaining);
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+		remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN, remaining,
+						opts, tcp_to_sk(tp));
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -708,6 +700,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,
 
 	smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
 
+	if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+		remaining -= tcp_extopt_prepare(skb, TCPHDR_SYN | TCPHDR_ACK,
+						remaining, opts,
+						req_to_sk(req));
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -741,6 +738,10 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
 		size += TCPOLEN_TSTAMP_ALIGNED;
 	}
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+		size += tcp_extopt_prepare(skb, 0, MAX_TCP_OPTION_SPACE - size,
+					   opts, tcp_to_sk(tp));
+
 	eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
 	if (unlikely(eff_sacks)) {
 		const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
@@ -3308,6 +3309,9 @@ static void tcp_connect_init(struct sock *sk)
 		tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)))
+		tp->tcp_header_len += tcp_extopt_add_header(sk, sk);
+
 	/* If user gave his TCP_MAXSEG, record it to clamp */
 	if (tp->rx_opt.user_mss)
 		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index e7a3a6b6cf56..d0716c7e9390 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -162,7 +162,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+	tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL, sk);
 
 	if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
 		tsoff = secure_tcpv6_ts_off(sock_net(sk),
@@ -174,6 +174,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
 		goto out;
 
+	if (unlikely(!hlist_empty(&tp->tcp_option_list)) &&
+	    tcp_extopt_check(sk, skb, &tcp_opt))
+		goto out;
+
 	ret = NULL;
 	req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
 	if (!req)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a1ab29e2ab3b..202bf011f462 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -499,6 +499,9 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
+	if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list)))
+		tcp_extopt_destroy(req_to_sk(req));
+
 	kfree(inet_rsk(req)->ipv6_opt);
 	kfree_skb(inet_rsk(req)->pktopts);
 }
@@ -788,6 +791,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	unsigned int tot_len = sizeof(struct tcphdr);
 	struct dst_entry *dst;
 	__be32 *topt;
+	struct hlist_head *extopt_list = NULL;
+	struct tcp_out_options extraopts;
 
 	if (tsecr)
 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
@@ -796,6 +801,25 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
 #endif
 
+	if (sk)
+		extopt_list = tcp_extopt_get_list(sk);
+
+	if (unlikely(extopt_list && !hlist_empty(extopt_list))) {
+		unsigned int remaining = MAX_TCP_OPTION_SPACE - tot_len;
+		u8 extraflags = rst ? TCPHDR_RST : 0;
+		int used;
+
+		if (!rst || !th->ack)
+			extraflags |= TCPHDR_ACK;
+
+		memset(&extraopts, 0, sizeof(extraopts));
+
+		used = tcp_extopt_response_prepare(skb, extraflags, remaining,
+						   &extraopts, sk);
+
+		tot_len += used;
+	}
+
 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
 			 GFP_ATOMIC);
 	if (!buff)
@@ -836,6 +860,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
 	}
 #endif
 
+	if (unlikely(extopt_list && !hlist_empty(extopt_list)))
+		tcp_extopt_response_write(topt, skb, t1, &extraopts, sk);
+
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.daddr = ipv6_hdr(skb)->saddr;
 	fl6.saddr = ipv6_hdr(skb)->daddr;
@@ -1230,6 +1257,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 				skb_set_owner_r(newnp->pktoptions, newsk);
 			}
 		}
+
+		if (unlikely(!hlist_empty(&tcp_rsk(req)->tcp_option_list))) {
+			tcp_extopt_move(req_to_sk(req), newsk);
+			INIT_HLIST_HEAD(&tcp_rsk(req)->tcp_option_list);
+		}
 	}
 
 	return newsk;
-- 
2.16.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ