>From d2ba1fde42af44fbce361202e9af13daff9e4381 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 28 May 2012 21:04:12 +0000 Subject: [PATCH] netfilter: nf_ct_tcp: add namespace support This patch adds namespace support for TCP protocol tracker. Acked-by: Eric W. Biederman Signed-off-by: Gao feng Signed-off-by: Pablo Neira Ayuso --- include/net/netns/conntrack.h | 10 ++ net/netfilter/nf_conntrack_proto.c | 2 + net/netfilter/nf_conntrack_proto_tcp.c | 162 ++++++++++++++++++++++++++------ 3 files changed, 145 insertions(+), 29 deletions(-) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 0ef8592..680d799 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -1,15 +1,16 @@ #ifndef __NETNS_CONNTRACK_H #define __NETNS_CONNTRACK_H #include #include #include +#include struct ctl_table_header; struct nf_conntrack_ecache; struct nf_proto_net { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT @@ -19,20 +20,29 @@ struct nf_proto_net { #endif unsigned int users; }; struct nf_generic_net { struct nf_proto_net pn; unsigned int timeout; }; +struct nf_tcp_net { + struct nf_proto_net pn; + unsigned int timeouts[TCP_CONNTRACK_TIMEOUT_MAX]; + unsigned int tcp_loose; + unsigned int tcp_be_liberal; + unsigned int tcp_max_retrans; +}; + struct nf_ip_net { struct nf_generic_net generic; + struct nf_tcp_net tcp; #if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; #endif }; struct netns_ct { atomic_t count; unsigned int expect_count; diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b095b4a..8a71e8b 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -297,18 +297,20 @@ void nf_conntrack_l3proto_unregister(struct net *net, nf_ct_iterate_cleanup(net, kill_l3proto, proto); rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, struct nf_conntrack_l4proto *l4proto) { switch (l4proto->l4proto) { + case IPPROTO_TCP: + return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp; case 255: /* l4proto_generic */ return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic; default: if (l4proto->net_id) return net_generic(net, *l4proto->net_id); else return NULL; } return NULL; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 21ff1a9..a053f67 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -264,18 +264,23 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; +static inline struct nf_tcp_net *tcp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.tcp; +} + static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { const struct tcphdr *hp; struct tcphdr _hdr; /* Actually only need first 8 bytes. */ hp = skb_header_pointer(skb, dataoff, 8, &_hdr); if (hp == NULL) @@ -510,18 +515,19 @@ static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s16 receiver_offset; bool res; /* * Get the required data from the packet. @@ -714,19 +720,19 @@ static bool tcp_in_window(const struct nf_conn *ct, state->last_end = end; state->last_win = win; state->retrans = 0; } } res = true; } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || - nf_ct_tcp_be_liberal) + tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" @@ -822,18 +828,19 @@ static unsigned int *tcp_get_timeouts(struct net *net) static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; unsigned long timeout; unsigned int index; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); @@ -1014,19 +1021,19 @@ static int tcp_packet(struct nf_conn *ct, (th->syn ? 1 : 0), (th->ack ? 1 : 0), (th->fin ? 1 : 0), (th->rst ? 1 : 0), old_state, new_state); ct->proto.tcp.state = new_state; if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && + if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) timeout = timeouts[TCP_CONNTRACK_RETRANS]; else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK]) timeout = timeouts[TCP_CONNTRACK_UNACK]; else timeout = timeouts[new_state]; spin_unlock_bh(&ct->lock); @@ -1059,18 +1066,20 @@ static int tcp_packet(struct nf_conn *ct, } /* Called when a new connection for this protocol found. */ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { enum tcp_conntrack new_state; const struct tcphdr *th; struct tcphdr _tcph; + struct net *net = nf_ct_net(ct); + struct nf_tcp_net *tn = tcp_pernet(net); const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0]; const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1]; th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); /* Don't need lock here: this conntrack not in circulation yet */ new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; @@ -1087,19 +1096,19 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window); if (ct->proto.tcp.seen[0].td_maxwin == 0) ct->proto.tcp.seen[0].td_maxwin = 1; ct->proto.tcp.seen[0].td_maxend = ct->proto.tcp.seen[0].td_end; tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); - } else if (nf_ct_tcp_loose == 0) { + } else if (tn->tcp_loose == 0) { /* Don't try to pick up connections. */ return false; } else { memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ @@ -1354,208 +1363,301 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = { }; #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL static unsigned int tcp_sysctl_table_users; static struct ctl_table_header *tcp_sysctl_header; static struct ctl_table tcp_sysctl_table[] = { { .procname = "nf_conntrack_tcp_timeout_syn_sent", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_syn_recv", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_established", - .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_fin_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_last_ack", - .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_time_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_close", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_max_retrans", - .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_timeout_unacknowledged", - .data = &tcp_timeouts[TCP_CONNTRACK_UNACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { } }; #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT static struct ctl_table tcp_compat_sysctl_table[] = { { .procname = "ip_conntrack_tcp_timeout_syn_sent", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_syn_recv", - .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_established", - .data = &tcp_timeouts[TCP_CONNTRACK_ESTABLISHED], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_fin_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_FIN_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_last_ack", - .data = &tcp_timeouts[TCP_CONNTRACK_LAST_ACK], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_time_wait", - .data = &tcp_timeouts[TCP_CONNTRACK_TIME_WAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_close", - .data = &tcp_timeouts[TCP_CONNTRACK_CLOSE], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_timeout_max_retrans", - .data = &tcp_timeouts[TCP_CONNTRACK_RETRANS], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ip_conntrack_tcp_loose", - .data = &nf_ct_tcp_loose, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_tcp_be_liberal", - .data = &nf_ct_tcp_be_liberal, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "ip_conntrack_tcp_max_retrans", - .data = &nf_ct_tcp_max_retrans, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { } }; #endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ +static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL + struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; + + if (pn->ctl_table) + return 0; + + pn->ctl_table = kmemdup(tcp_sysctl_table, + sizeof(tcp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; + pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; + pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; + pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; + pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; + pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; + pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; + pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; + pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK]; + pn->ctl_table[10].data = &tn->tcp_loose; + pn->ctl_table[11].data = &tn->tcp_be_liberal; + pn->ctl_table[12].data = &tn->tcp_max_retrans; +#endif + return 0; +} + +static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + struct nf_tcp_net *tn = (struct nf_tcp_net *)pn; + pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, + sizeof(tcp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; + pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; + pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; + pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; + pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; + pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; + pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; + pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; + pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; + pn->ctl_compat_table[10].data = &tn->tcp_loose; + pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; + pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; +#endif +#endif + return 0; +} + +static int tcpv4_init_net(struct net *net) +{ + int i; + int ret = 0; + struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)tn; + +#ifdef CONFIG_SYSCTL + if (!pn->ctl_table) { +#else + if (!pn->user++) { +#endif + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; + + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + } + + ret = tcp_kmemdup_compat_sysctl_table(pn); + + if (ret < 0) + return ret; + + ret = tcp_kmemdup_sysctl_table(pn); + +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + if (ret < 0) { + kfree(pn->ctl_compat_table); + pn->ctl_compat_table = NULL; + } +#endif +#endif + return ret; +} + +static int tcpv6_init_net(struct net *net) +{ + int i; + struct nf_tcp_net *tn = tcp_pernet(net); + struct nf_proto_net *pn = (struct nf_proto_net *)tn; + +#ifdef CONFIG_SYSCTL + if (!pn->ctl_table) { +#else + if (!pn->user++) { +#endif + for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++) + tn->timeouts[i] = tcp_timeouts[i]; + tn->tcp_loose = nf_ct_tcp_loose; + tn->tcp_be_liberal = nf_ct_tcp_be_liberal; + tn->tcp_max_retrans = nf_ct_tcp_max_retrans; + } + + return tcp_kmemdup_sysctl_table(pn); +} + struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, .print_tuple = tcp_print_tuple, .print_conntrack = tcp_print_conntrack, @@ -1584,18 +1686,19 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &tcp_sysctl_table_users, .ctl_table_header = &tcp_sysctl_header, .ctl_table = tcp_sysctl_table, #ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT .ctl_compat_table = tcp_compat_sysctl_table, #endif #endif + .init_net = tcpv4_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4); struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, @@ -1624,11 +1727,12 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = TCP_CONNTRACK_TIMEOUT_MAX, .nla_policy = tcp_timeout_nla_policy, }, #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #ifdef CONFIG_SYSCTL .ctl_table_users = &tcp_sysctl_table_users, .ctl_table_header = &tcp_sysctl_header, .ctl_table = tcp_sysctl_table, #endif + .init_net = tcpv6_init_net, }; EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6); -- 1.7.10