diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2987ee8..05fa9b2 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -247,7 +247,7 @@ struct tcp_options_received { u32 ts_recent; /* Time stamp to echo next */ u32 rcv_tsval; /* Time stamp value */ u32 rcv_tsecr; /* Time stamp echo reply */ - u16 saw_tstamp : 1, /* Saw TIMESTAMP on last packet */ + u16 tstamp64_ok:1, /* Verified with cookie pair */ tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */ dsack : 1, /* D-SACK is scheduled */ wscale_ok : 1, /* Wscale seen on SYN packet */ @@ -260,13 +260,21 @@ struct tcp_options_received { u8 num_sacks; /* Number of SACK blocks */ u16 user_mss; /* mss requested by user in ioctl */ u16 mss_clamp; /* Maximal mss, negotiated at connection setup */ + + /* When the options are extended beyond the standard 40 bytes, + * this holds the additional data offset (up to 1,020 bytes). + */ + u8 extended; /* in 32-bit words */ + u8 saw_tstamp64:1, /* 64-bit TIMESTAMP seen on last packet */ + saw_tstamp:1, /* TIMESTAMP seen on last packet */ + __unused:6; }; static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { + rx_opt->tstamp64_ok = 0; rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; - rx_opt->cookie_plus = 0; } /* This is the max number of SACKS that we'll generate and process. It's safe diff --git a/include/net/tcp.h b/include/net/tcp.h index 4aed0c8..bbbfb93 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -156,9 +156,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); /* * TCP option */ - -#define TCPOPT_NOP 1 /* Padding */ #define TCPOPT_EOL 0 /* End of options */ +#define TCPOPT_NOP 1 /* Padding */ #define TCPOPT_MSS 2 /* Segment size negotiating */ #define TCPOPT_WINDOW 3 /* Window scaling */ #define TCPOPT_SACK_PERM 4 /* SACK Permitted */ @@ -166,30 +165,32 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ #define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ #define TCPOPT_COOKIE 253 /* Cookie extension (experimental) */ - -/* - * TCP option lengths - */ - -#define TCPOLEN_MSS 4 -#define TCPOLEN_WINDOW 3 -#define TCPOLEN_SACK_PERM 2 -#define TCPOLEN_TIMESTAMP 10 -#define TCPOLEN_MD5SIG 18 -#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ -#define TCPOLEN_COOKIE_PAIR 3 /* Cookie pair header extension */ -#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) -#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) - -/* But this is what stacks really send out. */ -#define TCPOLEN_TSTAMP_ALIGNED 12 +#define TCPOPT_TSTAMP64 254 /* 64-bit extension (experimental) */ + +/* TCP option lengths (same order as above) */ +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERM 2 +#define TCPOLEN_SACK_BASE 2 +#define TCPOLEN_SACK_PERBLOCK 8 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_MD5SIG 18 +#define TCPOLEN_COOKIE_BASE 2 /* Cookie-less header extension */ +#define TCPOLEN_COOKIE_PAIR 4 /* Cookie pair header extension */ +#define TCPOLEN_COOKIE_MIN (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MIN) +#define TCPOLEN_COOKIE_MAX (TCPOLEN_COOKIE_BASE+TCP_COOKIE_MAX) +#define TCPOLEN_TSTAMP64 3 + +/* TCP options 32-bit aligned (same order as above) */ +#define TCPOLEN_MSS_ALIGNED 4 #define TCPOLEN_WSCALE_ALIGNED 4 #define TCPOLEN_SACKPERM_ALIGNED 4 -#define TCPOLEN_SACK_BASE 2 #define TCPOLEN_SACK_BASE_ALIGNED 4 -#define TCPOLEN_SACK_PERBLOCK 8 +#define TCPOLEN_TSTAMP_ALIGNED 12 #define TCPOLEN_MD5SIG_ALIGNED 20 -#define TCPOLEN_MSS_ALIGNED 4 + +/* TCP option extensions (same order as above) */ +#define TCPOEXT_TSTAMP64 16 /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc6d324..362887e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3725,17 +3725,27 @@ old_ack: /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. + * + * Returns: + * 0 on success + * - on failure */ int tcp_parse_options(struct sk_buff *skb, const struct tcphdr *th, struct tcp_options_received *opt_rx, u8 **hvpp, int estab) { + __be32 *xdp = (__be32 *)th + th->doff; unsigned char *ptr = (unsigned char *)(th + 1); + int remaining = skb_headlen(skb) - tcp_header_len_th(th); int length = tcp_option_len_th(th); + int extend = 0; bool syn = th->syn; opt_rx->cookie_plus = 0; + opt_rx->extended = 0; + opt_rx->saw_tstamp64 = 0; /* false */ opt_rx->saw_tstamp = 0; /* false */ +repeat: while (length > 0) { int opsize; int opcode = *ptr++; @@ -3833,26 +3843,104 @@ int tcp_parse_options(struct sk_buff *skb, const struct tcphdr *th, /* not yet implemented */ break; case TCPOLEN_COOKIE_PAIR: { - /* not yet implemented */ + int words = ptr[1] & 0xf; + + if (!syn && + *ptr >= words && + words >= (TCP_COOKIE_MIN / 4) && + words <= (TCP_COOKIE_MAX / 4) && + opt_rx->extended == 0) { + int bytes = words * 4; + + opt_rx->extended = *ptr; + extend = *ptr * 4; + + /* Adjust end_seq, set in + * tcp_v[4,6]_rcv() + */ + TCP_SKB_CB(skb)->end_seq -= extend; + remaining -= extend; + + if (unlikely(remaining < 0)) { + /* missing data!!! */ + return remaining; + } + extend -= bytes; + + opt_rx->cookie_plus = bytes + + TCPOLEN_COOKIE_BASE; + *hvpp = (u8 *)xdp; + xdp += words; + } break; } case TCPOLEN_COOKIE_MIN+0: case TCPOLEN_COOKIE_MIN+2: case TCPOLEN_COOKIE_MIN+4: case TCPOLEN_COOKIE_MIN+6: - case TCPOLEN_COOKIE_MAX: /* 16-bit multiple */ if (syn) { opt_rx->cookie_plus = opsize; *hvpp = ptr; } break; + case TCPOLEN_COOKIE_MAX+0: + /* either cookie or cookie pair */ + if (syn || opt_rx->saw_tstamp64) { + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + } + break; + case TCPOLEN_COOKIE_MAX+4: + case TCPOLEN_COOKIE_MAX+8: + case TCPOLEN_COOKIE_MAX+12: + case TCPOLEN_COOKIE_MAX+TCP_COOKIE_MAX: + /* 32-bit multiple (pair) */ + if (opt_rx->saw_tstamp64) { + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + } + break; default: /* ignore option */ break; }; break; + case TCPOPT_TSTAMP64: + if (opsize == TCPOLEN_TSTAMP64) { + if (!syn && + *ptr >= (TCPOEXT_TSTAMP64 / 4) && + !opt_rx->saw_tstamp && + opt_rx->extended == 0) { + opt_rx->extended = *ptr; + extend = *ptr * 4; + + /* Adjust end_seq, set in + * tcp_v[4,6]_rcv() + */ + TCP_SKB_CB(skb)->end_seq -= extend; + remaining -= extend; + + if (unlikely(remaining < 0)) { + /* missing data!!! */ + return remaining; + } + extend -= TCPOEXT_TSTAMP64; + + /* 64-bits not yet implemented */ + xdp++; + opt_rx->rcv_tsval = ntohl(*xdp); + xdp += 2; + opt_rx->rcv_tsecr = ntohl(*xdp); + xdp++; + + opt_rx->saw_tstamp64 = 1; /* true */ + opt_rx->saw_tstamp = 1; /* true */ + } + } + break; + default: /* skip unrecognized options */ break; @@ -3861,6 +3949,13 @@ int tcp_parse_options(struct sk_buff *skb, const struct tcphdr *th, ptr += opsize - 2; length -= opsize; } + + if (unlikely(extend > 0)) { + ptr = (unsigned char *)xdp; + length = extend; + extend = 0; + goto repeat; + } return 0; } @@ -3887,6 +3982,11 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). + * + * Returns: + * 1 on success, fast + * 0 on success, slow + * - on failure */ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_sock *tp, u8 **hvpp) @@ -3896,11 +3996,14 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, */ if (th->doff == (sizeof(*th) / 4)) { tp->rx_opt.saw_tstamp = 0; + tp->rx_opt.extended = 0; return 0; - } else if (tp->rx_opt.tstamp_ok && - th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { - if (tcp_parse_aligned_timestamp(tp, th)) - return 1; + } + if (th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) && + tp->rx_opt.tstamp_ok && + tcp_parse_aligned_timestamp(tp, th)) { + tp->rx_opt.extended = 0; + return 1; } return tcp_parse_options(skb, th, &tp->rx_opt, hvpp, 1); } @@ -3911,8 +4014,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, */ u8 *tcp_parse_md5sig_option(struct tcphdr *th) { - int length = (th->doff << 2) - sizeof (*th); u8 *ptr = (u8*)(th + 1); + int length = tcp_option_len_th(th); /* If the TCP option is too short, we can short cut */ if (length < TCPOLEN_MD5SIG) @@ -4377,7 +4480,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) goto drop; - __skb_pull(skb, th->doff * 4); + __skb_pull(skb, (th->doff + tp->rx_opt.extended) * 4); TCP_ECN_accept_cwr(tp, skb); @@ -5038,8 +5141,8 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) /* Do we wait for any urgent data? - normally not... */ if (tp->urg_data == TCP_URG_NOTYET) { - u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) - - th->syn; + u32 ptr = ((th->doff + tp->rx_opt.extended) * 4) + + tp->urg_seq - ntohl(th->seq) - th->syn; /* Is the urgent pointer pointing into this packet? */ if (ptr < skb->len) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0f1b409..2240012 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -109,6 +109,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, } } else { /* otherwise initialized by tcp_parse_options() */ + tmp_opt.cookie_plus = 0; + tmp_opt.extended = 0; + tmp_opt.saw_tstamp64 = 0; /* false */ tmp_opt.saw_tstamp = 0; /* false */ } @@ -550,6 +553,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, } } else { /* otherwise initialized by tcp_parse_options() */ + tmp_opt.cookie_plus = 0; + tmp_opt.extended = 0; + tmp_opt.saw_tstamp64 = 0; /* false */ tmp_opt.saw_tstamp = 0; /* false */ } -- 1.6.3.3