Change 3360379 by cratiu@cratiu on 2011/09/02 09:44:48 *pending* TCP/md5: Switch from a stack var to a percpu var to avoid a crash. tcp_v4_send_ack uses a stack variable to construct the TCP header for the response packet. When using TCP MD5 signatures on mips architecture a crash happens sometimes when the current core is the master core using the initial stack allocated in vmlinux. The reason for this is that the initial stack is mapped in kseg2 so it can't be directly translated to a physical address by virt_to_phys as expected by sg_set_buf from the following call chain: > (optimized: sg_set_buf) > sg_init_one+0x58/0xa4 > tcp_md5_hash_header+0x30/0x64 > tcp_v4_md5_hash_hdr+0xb4/0x134 > tcp_v4_send_ack+0x16c/0x25c > (optimized: tcp_v4_timewait_ack) > tcp_v4_rcv+0x1b3c/0x1e58 As a temporary fix that should not affect performance, the stack variable is converted in a percpu variable allocated at boot time. Affected files ... ... //packages/linux_2.6.32/main/src/include/net/tcp.h#6 edit ... //packages/linux_2.6.32/main/src/net/ipv4/tcp.c#11 edit ... //packages/linux_2.6.32/main/src/net/ipv4/tcp_ipv4.c#15 edit include/net/tcp.h | 10 +++++++++ net/ipv4/tcp.c | 5 ++++ net/ipv4/tcp_ipv4.c | 53 ++++++++++++++++++++++++---------------------------- 3 files changed, 40 insertions(+), 28 deletions(-) Signed-off-by: Cosmin Ratiu --- src/include/net/tcp.h~ +++ src/include/net/tcp.h @@ -1570,5 +1570,15 @@ return skc->skc_net_params->tcp.rmem; } +struct tcp_reply_hdr { + struct tcphdr th; + __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) +#ifdef CONFIG_TCP_MD5SIG + + (TCPOLEN_MD5SIG_ALIGNED >> 2) +#endif + ]; +}; + +extern struct tcp_reply_hdr *tcp_rep_percpu; #endif /* _TCP_H */ --- src/net/ipv4/tcp.c~ +++ src/net/ipv4/tcp.c @@ -3150,6 +3150,11 @@ tcp_hashinfo.lhash_size); tcp_register_congestion_control(&tcp_reno); + + /* Hack alert: a proper fix should be implemented for the md5 crash */ + tcp_rep_percpu = alloc_percpu(struct tcp_reply_hdr); + if (!tcp_rep_percpu) + panic("Cannot allocate per cpu tcp reply hdr\n"); } EXPORT_SYMBOL(tcp_close); --- src/net/ipv4/tcp_ipv4.c~ +++ src/net/ipv4/tcp_ipv4.c @@ -680,6 +680,8 @@ SOCK_STAT_INC(groupptr, TcpRstSent, skb_get_portid(skb)); } +struct tcp_reply_hdr *tcp_rep_percpu; + /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states outside socket context is ugly, certainly. What can I do? */ @@ -691,53 +693,48 @@ int reply_flags, u32 vlanprio) { struct tcphdr *th = tcp_hdr(skb); - struct { - struct tcphdr th; - __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) -#ifdef CONFIG_TCP_MD5SIG - + (TCPOLEN_MD5SIG_ALIGNED >> 2) -#endif - ]; - } rep; + struct tcp_reply_hdr *rep; struct ip_reply_arg arg; - memset(&rep.th, 0, sizeof(struct tcphdr)); + rep = per_cpu_ptr(tcp_rep_percpu, get_cpu()); + + memset(&rep->th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); - arg.iov[0].iov_base = (unsigned char *)&rep; - arg.iov[0].iov_len = sizeof(rep.th); + arg.iov[0].iov_base = (unsigned char *)rep; + arg.iov[0].iov_len = sizeof(rep->th); if (ts) { - rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | + rep->opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); - rep.opt[1] = htonl(tcp_time_stamp); - rep.opt[2] = htonl(ts); + rep->opt[1] = htonl(tcp_time_stamp); + rep->opt[2] = htonl(ts); arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; } /* Swap the send and the receive. */ - rep.th.dest = th->source; - rep.th.source = th->dest; - rep.th.doff = arg.iov[0].iov_len / 4; - rep.th.seq = htonl(seq); - rep.th.ack_seq = htonl(ack); - rep.th.ack = 1; - rep.th.window = htons(win); + rep->th.dest = th->source; + rep->th.source = th->dest; + rep->th.doff = arg.iov[0].iov_len / 4; + rep->th.seq = htonl(seq); + rep->th.ack_seq = htonl(ack); + rep->th.ack = 1; + rep->th.window = htons(win); #ifdef CONFIG_TCP_MD5SIG if (key) { int offset = (ts) ? 3 : 0; - rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); + rep->opt[offset++] = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_MD5SIG << 8) | + TCPOLEN_MD5SIG); arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; - rep.th.doff = arg.iov[0].iov_len/4; + rep->th.doff = arg.iov[0].iov_len/4; - tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], + tcp_v4_md5_hash_hdr((__u8 *) &rep->opt[offset], key, ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, &rep.th); + ip_hdr(skb)->daddr, &rep->th); } #endif arg.flags = reply_flags;