diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 74728f7..2987ee8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -301,7 +301,11 @@ struct tcp_sock { /* * Header prediction flags - * 0x5?10 << 16 + snd_wnd in net byte order + * S << 28 + TCP_FLAG_ACK + snd_wnd, in net byte order + * (PSH flag is ignored) + * S is 5 (no options), or 8 (timestamp aligned) + * otherwise, 0 to turn it off -- for instance, when there are + * holes in receive space. */ __be32 pred_flags; diff --git a/include/net/tcp.h b/include/net/tcp.h index 34f5cc2..52dd185 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -310,13 +310,11 @@ extern int tcp_ioctl(struct sock *sk, extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + struct tcphdr *th); extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + struct tcphdr *th); extern void tcp_rcv_space_adjust(struct sock *sk); @@ -533,9 +531,19 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp) return (tp->srtt >> 3) + tp->rttvar; } +static inline u8 __tcp_fast_path_header_length(const struct tcp_sock *tp) +{ + return tp->rx_opt.tstamp_ok + ? sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED + : sizeof(struct tcphdr); +} + static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) { - tp->pred_flags = htonl((tp->tcp_header_len << 26) | + /* See linux/tcp.h for pred_flags details. + * 'S' (doff) is 32-bit words, convert from bytes: 26 = 28 - 2. + */ + tp->pred_flags = htonl((__tcp_fast_path_header_length(tp) << 26) | ntohl(TCP_FLAG_ACK) | snd_wnd); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 28e0296..d314170 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -152,7 +152,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) * tcp header plus fixed timestamp option length. * Resulting "len" is MSS free of SACK jitter. */ - len -= tcp_sk(sk)->tcp_header_len; + len -= __tcp_fast_path_header_length(tcp_sk(sk)); icsk->icsk_ack.last_seg_size = len; if (len == lss) { icsk->icsk_ack.rcv_mss = len; @@ -5206,7 +5206,7 @@ discard: * tcp_data_queue when everything is OK. */ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); int res; @@ -5224,32 +5224,18 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, * Our current scheme is not silly either but we take the * extra cost of the net_bh soft interrupt processing... * We do checksum and copy also but from device to kernel. + * + * See linux/tcp.h and net/tcp.h for pred_flags details. */ - - tp->rx_opt.saw_tstamp = 0; - - /* pred_flags is 0xS?10 << 16 + snd_wnd - * if header_prediction is to be made - * 'S' will always be tp->tcp_header_len >> 2 - * '?' will be 0 for the fast path, otherwise pred_flags is 0 to - * turn it off (when there are holes in the receive - * space for instance) - * PSH flag is ignored. - */ - if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && TCP_SKB_CB(skb)->seq == tp->rcv_nxt && !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { - int tcp_header_len = tp->tcp_header_len; - - /* Timestamp header prediction: tcp_header_len - * is automatically equal to th->doff*4 due to pred_flags - * match. - */ + int tcp_header_len = tcp_header_len_th(th); - /* Check timestamp */ - if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - /* No? Slow path! */ + /* Timestamp header prediction */ + if (tcp_header_len != sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) { + tp->rx_opt.saw_tstamp = 0; /* false */ + } else { if (!tcp_parse_aligned_timestamp(tp, th)) goto slow_path; @@ -5264,35 +5250,12 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, */ } - if (len <= tcp_header_len) { - /* Bulk data transfer: sender */ - if (len == tcp_header_len) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - /* We know that such packets are checksummed - * on entry. - */ - tcp_ack(sk, skb, 0); - __kfree_skb(skb); - tcp_data_snd_check(sk); - return 0; - } else { /* Header too small */ - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); - goto discard; - } - } else { + if (tcp_header_len < skb->len) { int eaten = 0; int copied_early = 0; if (tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len) { + skb->len - tcp_header_len <= tp->ucopy.len) { #ifdef CONFIG_NET_DMA if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) { copied_early = 1; @@ -5311,9 +5274,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && + if (tp->rx_opt.saw_tstamp && tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); @@ -5334,8 +5295,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, * seq == rcv_nxt and rcv_wup <= rcv_nxt. * Hence, check seq<=rcv_wup reduces to: */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && + if (tp->rx_opt.saw_tstamp && tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); @@ -5376,11 +5336,33 @@ no_ack: else sk->sk_data_ready(sk, 0); return 0; + } else { + /* Bulk data transfer: sender + * + * tcp_header_len > skb->len never happens, + * already checked by tcp_v[4,6]_rcv() + * + * Predicted packet is in window by definition. + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: + */ + if (tp->rx_opt.saw_tstamp && + tp->rcv_nxt == tp->rcv_wup) + tcp_store_ts_recent(tp); + + /* We know that such packets are checksummed + * on entry. + */ + tcp_ack(sk, skb, 0); + __kfree_skb(skb); + tcp_data_snd_check(sk); + return 0; } } slow_path: - if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb)) + /* Assumes header and options unchanged since checksum_init() */ + if (tcp_checksum_complete_user(sk, skb)) goto csum_error; /* @@ -5416,7 +5398,7 @@ discard: } static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + struct tcphdr *th) { u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); @@ -5693,7 +5675,7 @@ reset_and_undo: */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -5740,7 +5722,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; case TCP_SYN_SENT: - queued = tcp_rcv_synsent_state_process(sk, skb, th, len); + queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0a76e41..f999e06 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1551,7 +1551,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { + if (tcp_rcv_established(sk, skb, tcp_hdr(skb))) { rsk = sk; goto reset; } @@ -1578,7 +1578,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) } TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { + if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb))) { rsk = sk; goto reset; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f206ee5..37b7536 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -718,8 +718,7 @@ int tcp_child_process(struct sock *parent, struct sock *child, int state = child->sk_state; if (!sock_owned_by_user(child)) { - ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb), - skb->len); + ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb)); /* Wakeup parent, send SIGIO */ if (state == TCP_SYN_RECV && child->sk_state != state) parent->sk_data_ready(parent, 0); diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 9bc805d..de2a32e 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -88,7 +88,7 @@ static inline int tcp_probe_avail(void) * Note: arguments must match tcp_rcv_established()! */ static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + struct tcphdr *th) { const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b76939a..3d08a4d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1586,7 +1586,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ TCP_CHECK_TIMER(sk); - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) + if (tcp_rcv_established(sk, skb, tcp_hdr(skb))) goto reset; TCP_CHECK_TIMER(sk); if (opt_skb) @@ -1618,7 +1618,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } TCP_CHECK_TIMER(sk); - if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) + if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb))) goto reset; TCP_CHECK_TIMER(sk); if (opt_skb) -- 1.6.3.3