[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1342473410-6265-2-git-send-email-ycheng@google.com>
Date: Mon, 16 Jul 2012 14:16:44 -0700
From: Yuchung Cheng <ycheng@...gle.com>
To: davem@...emloft.net, hkchu@...gle.com, edumazet@...gle.com,
ncardwell@...gle.com
Cc: sivasankar@...ucsd.edu, netdev@...r.kernel.org,
Yuchung Cheng <ycheng@...gle.com>
Subject: [PATCH 1/7] net-tcp: Fast Open base
This patch impelements the common code for both the client and server.
1. TCP Fast Open option processing. Since Fast Open does not have a
option number assigned by IANA yet, it shares the experiment option
code 254 by implementing draft-ietf-tcpm-experimental-options
with a 16 bits magic number 0xF989
2. The new sysctl tcp_fastopen
3. A place holder init function
Signed-off-by: Yuchung Cheng <ycheng@...gle.com>
---
include/linux/tcp.h | 10 ++++++++++
include/net/tcp.h | 9 ++++++++-
net/ipv4/Makefile | 2 +-
net/ipv4/syncookies.c | 2 +-
net/ipv4/sysctl_net_ipv4.c | 7 +++++++
net/ipv4/tcp_fastopen.c | 11 +++++++++++
net/ipv4/tcp_input.c | 26 ++++++++++++++++++++++----
net/ipv4/tcp_ipv4.c | 2 +-
net/ipv4/tcp_minisocks.c | 4 ++--
net/ipv4/tcp_output.c | 25 +++++++++++++++++++++----
net/ipv6/syncookies.c | 2 +-
net/ipv6/tcp_ipv6.c | 2 +-
12 files changed, 86 insertions(+), 16 deletions(-)
create mode 100644 net/ipv4/tcp_fastopen.c
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1888169..12948f5 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -243,6 +243,16 @@ static inline unsigned int tcp_optlen(const struct sk_buff *skb)
return (tcp_hdr(skb)->doff - 5) * 4;
}
+/* TCP Fast Open */
+#define TCP_FASTOPEN_COOKIE_MIN 4 /* Min Fast Open Cookie size in bytes */
+#define TCP_FASTOPEN_COOKIE_MAX 16 /* Max Fast Open Cookie size in bytes */
+
+/* TCP Fast Open Cookie as stored in memory */
+struct tcp_fastopen_cookie {
+ s8 len;
+ u8 val[TCP_FASTOPEN_COOKIE_MAX];
+};
+
/* This defines a selective acknowledgement block. */
struct tcp_sack_block_wire {
__be32 start_seq;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 439984b..87f486f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -170,6 +170,11 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
#define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */
+#define TCPOPT_EXP 254 /* Experimental */
+/* Magic number to be after the option value for sharing TCP
+ * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
+ */
+#define TCPOPT_FASTOPEN_MAGIC 0xF989
/*
* TCP option lengths
@@ -180,6 +185,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOLEN_SACK_PERM 2
#define TCPOLEN_TIMESTAMP 10
#define TCPOLEN_MD5SIG 18
+#define TCPOLEN_EXP_FASTOPEN_BASE 4
#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */
#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */
#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN)
@@ -222,6 +228,7 @@ extern int sysctl_tcp_retries1;
extern int sysctl_tcp_retries2;
extern int sysctl_tcp_orphan_retries;
extern int sysctl_tcp_syncookies;
+extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
@@ -417,7 +424,7 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int nonblock, int flags, int *addr_len);
extern void tcp_parse_options(const struct sk_buff *skb,
struct tcp_options_received *opt_rx, const u8 **hvpp,
- int estab);
+ int estab, struct tcp_fastopen_cookie *foc);
extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
/*
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 5a23e8b..63e6995 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -7,7 +7,7 @@ obj-y := route.o inetpeer.o protocol.o \
ip_output.o ip_sockglue.o inet_hashtables.o \
inet_timewait_sock.o inet_connection_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
- tcp_minisocks.o tcp_cong.o tcp_metrics.o \
+ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o \
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index eab2a7f..650e152 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -293,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 70730f7..2a946a19 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -367,6 +367,13 @@ static struct ctl_table ipv4_table[] = {
},
#endif
{
+ .procname = "tcp_fastopen",
+ .data = &sysctl_tcp_fastopen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "tcp_tw_recycle",
.data = &tcp_death_row.sysctl_tw_recycle,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
new file mode 100644
index 0000000..a7f729c
--- /dev/null
+++ b/net/ipv4/tcp_fastopen.c
@@ -0,0 +1,11 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+int sysctl_tcp_fastopen;
+
+static int __init tcp_fastopen_init(void)
+{
+ return 0;
+}
+
+late_initcall(tcp_fastopen_init);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 055ac49..d6e16e2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3729,7 +3729,8 @@ old_ack:
* the fast version below fails.
*/
void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
- const u8 **hvpp, int estab)
+ const u8 **hvpp, int estab,
+ struct tcp_fastopen_cookie *foc)
{
const unsigned char *ptr;
const struct tcphdr *th = tcp_hdr(skb);
@@ -3836,8 +3837,25 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
break;
}
break;
- }
+ case TCPOPT_EXP:
+ /* Fast Open option shares code 254 using a
+ * 16 bits magic number. It's valid only in
+ * SYN or SYN-ACK with an even size.
+ */
+ if (opsize < TCPOLEN_EXP_FASTOPEN_BASE ||
+ get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC ||
+ foc == NULL || !th->syn || (opsize & 1))
+ break;
+ foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE;
+ if (foc->len >= TCP_FASTOPEN_COOKIE_MIN &&
+ foc->len <= TCP_FASTOPEN_COOKIE_MAX)
+ memcpy(foc->val, ptr + 2, foc->len);
+ else if (foc->len != 0)
+ foc->len = -1;
+ break;
+
+ }
ptr += opsize-2;
length -= opsize;
}
@@ -3879,7 +3897,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
if (tcp_parse_aligned_timestamp(tp, th))
return true;
}
- tcp_parse_options(skb, &tp->rx_opt, hvpp, 1);
+ tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
return true;
}
@@ -5601,7 +5619,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcp_cookie_values *cvp = tp->cookie_values;
int saved_clamp = tp->rx_opt.mss_clamp;
- tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, NULL);
if (th->ack) {
/* rfc793:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7a0062c..588200e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1314,7 +1314,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index c66f2ed..5912ac3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -97,7 +97,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = tcptw->tw_ts_recent;
@@ -534,7 +534,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.saw_tstamp) {
tmp_opt.ts_recent = req->ts_recent;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 15a7c7b..4849be7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -385,15 +385,17 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
#define OPTION_COOKIE_EXTENSION (1 << 4)
+#define OPTION_FAST_OPEN_COOKIE (1 << 8)
struct tcp_out_options {
- u8 options; /* bit field of OPTION_* */
+ u16 options; /* bit field of OPTION_* */
+ u16 mss; /* 0 to disable */
u8 ws; /* window scale, 0 to disable */
u8 num_sack_blocks; /* number of SACK blocks to include */
u8 hash_size; /* bytes in hash_location */
- u16 mss; /* 0 to disable */
- __u32 tsval, tsecr; /* need to include OPTION_TS */
__u8 *hash_location; /* temporary pointer, overloaded */
+ __u32 tsval, tsecr; /* need to include OPTION_TS */
+ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
};
/* The sysctl int routines are generic, so check consistency here.
@@ -442,7 +444,7 @@ static u8 tcp_cookie_size_check(u8 desired)
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
struct tcp_out_options *opts)
{
- u8 options = opts->options; /* mungable copy */
+ u16 options = opts->options; /* mungable copy */
/* Having both authentication and cookies for security is redundant,
* and there's certainly not enough room. Instead, the cookie-less
@@ -564,6 +566,21 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
tp->rx_opt.dsack = 0;
}
+
+ if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
+ struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
+
+ *ptr++ = htonl((TCPOPT_EXP << 24) |
+ ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
+ TCPOPT_FASTOPEN_MAGIC);
+
+ memcpy(ptr, foc->val, foc->len);
+ if ((foc->len & 3) == 2) {
+ u8 *align = ((u8 *)ptr) + foc->len;
+ align[0] = align[1] = TCPOPT_NOP;
+ }
+ ptr += (foc->len + 3) >> 2;
+ }
}
/* Compute TCP options for SYN packets. This is not the final
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 7bf3cc4..bb46061 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -177,7 +177,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(skb, &tcp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3071f37..41eaeb5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1062,7 +1062,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
tcp_clear_options(&tmp_opt);
tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
tmp_opt.user_mss = tp->rx_opt.user_mss;
- tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
+ tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
if (tmp_opt.cookie_plus > 0 &&
tmp_opt.saw_tstamp &&
--
1.7.7.3
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists