lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTim_sgZhrcgvuH0=hFTfE-beMezvcvw6PspXRvHx@mail.gmail.com>
Date:	Fri, 3 Sep 2010 17:29:11 -0300
From:	Fabricio Archanjo <farchanjo@...il.com>
To:	Changli Gao <xiaosuo@...il.com>, netfilter-devel@...r.kernel.org,
	netdev@...r.kernel.org
Subject: Re: [PATCH v3] netfilter: xtables target SYNPROXY

hey all,
is this patch gonna be on kernel tree?
It works fines. Yesterday I was over attack, after applied this patch
my problem was solved. It hasn't dropped real connections. Sometimes i
changed to freebsd due synproxy state on pf.


Thanks,

On Fri, Jul 2, 2010 at 1:19 AM, Changli Gao <xiaosuo@...il.com> wrote:
>
> v3:
> fix the bug it can't work with bridge.
>
> netfilter: xtables target SYNPROXY.
>
> This patch implements an xtables target SYNPROXY. As the connection to the
> TCP server won't be established until the ACK from the client is received, it
> can protect the TCP server from the SYN-flood attacks.
>
> It works in the raw table of the PREROUTING chain, before conntracking system.
> Syncookies is used, so no new state is introduced into the conntracking system.
> In fact, until the first connection is established, conntracking system doesn't
> see any packets. So when there is a SYN-flood attack, conntracking system won't
> be busy on finding and deleting the un-assured ct.
>
> As the SYN-packet of the second connection request is sent locally, the DNAT
> rules which are in the PREROUTING chain should be moved to the OUTPUT chain.
>
> Signed-off-by: Changli Gao <xiaosuo@...il.com>
> ----
>  include/net/netfilter/nf_conntrack.h        |   10
>  include/net/netfilter/nf_conntrack_core.h   |   21
>  include/net/netfilter/nf_conntrack_extend.h |    2
>  include/net/tcp.h                           |    7
>  net/ipv4/syncookies.c                       |   22
>  net/ipv4/tcp_ipv4.c                         |    9
>  net/netfilter/Kconfig                       |   17
>  net/netfilter/Makefile                      |    1
>  net/netfilter/nf_conntrack_core.c           |   45 +
>  net/netfilter/xt_SYNPROXY.c                 |  679 ++++++++++++++++++++++++++++
>  10 files changed, 794 insertions(+), 19 deletions(-)
> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
> index e624dae..5e6d8e4 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -311,5 +311,15 @@ do {                                                       \
>  #define MODULE_ALIAS_NFCT_HELPER(helper) \
>         MODULE_ALIAS("nfct-helper-" helper)
>
> +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
> +    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
> +extern unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb,
> +                                         struct nf_conn *ct,
> +                                         enum ip_conntrack_info ctinfo);
> +
> +extern unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb,
> +                                          struct nf_conn *ct,
> +                                          enum ip_conntrack_info ctinfo);
> +#endif
>  #endif /* __KERNEL__ */
>  #endif /* _NF_CONNTRACK_H */
> diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
> index aced085..637b404 100644
> --- a/include/net/netfilter/nf_conntrack_core.h
> +++ b/include/net/netfilter/nf_conntrack_core.h
> @@ -54,6 +54,23 @@ nf_conntrack_find_get(struct net *net, u16 zone,
>
>  extern int __nf_conntrack_confirm(struct sk_buff *skb);
>
> +static inline unsigned int syn_proxy_post_call(struct sk_buff *skb,
> +                                              struct nf_conn *ct,
> +                                              enum ip_conntrack_info ctinfo)
> +{
> +       unsigned int ret = NF_ACCEPT;
> +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
> +    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
> +       unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *,
> +                                 enum ip_conntrack_info);
> +       syn_proxy = rcu_dereference(syn_proxy_post_hook);
> +       if (syn_proxy)
> +               ret = syn_proxy(skb, ct, ctinfo);
> +#endif
> +
> +       return ret;
> +}
> +
>  /* Confirm a connection: returns NF_DROP if packet must be dropped. */
>  static inline int nf_conntrack_confirm(struct sk_buff *skb)
>  {
> @@ -63,8 +80,10 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
>        if (ct && !nf_ct_is_untracked(ct)) {
>                if (!nf_ct_is_confirmed(ct))
>                        ret = __nf_conntrack_confirm(skb);
> -               if (likely(ret == NF_ACCEPT))
> +               if (likely(ret == NF_ACCEPT)) {
>                        nf_ct_deliver_cached_events(ct);
> +                       ret = syn_proxy_post_call(skb, ct, skb->nfctinfo);
> +               }
>        }
>        return ret;
>  }
> diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
> index 32d15bd..b2ae7e9 100644
> --- a/include/net/netfilter/nf_conntrack_extend.h
> +++ b/include/net/netfilter/nf_conntrack_extend.h
> @@ -11,6 +11,7 @@ enum nf_ct_ext_id {
>        NF_CT_EXT_ACCT,
>        NF_CT_EXT_ECACHE,
>        NF_CT_EXT_ZONE,
> +       NF_CT_EXT_SYNPROXY,
>        NF_CT_EXT_NUM,
>  };
>
> @@ -19,6 +20,7 @@ enum nf_ct_ext_id {
>  #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
>  #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
>  #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone
> +#define NF_CT_EXT_SYNPROXY_TYPE struct syn_proxy_state
>
>  /* Extensions: optional stuff which isn't permanently in struct. */
>  struct nf_ct_ext {
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index c2f96c2..06f28d3 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -460,8 +460,11 @@ extern int                 tcp_disconnect(struct sock *sk, int flags);
>  extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
>  extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
>                                    struct ip_options *opt);
> -extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
> -                                    __u16 *mss);
> +extern __u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr,
> +                                      __be16 sport, __be16 dport, __u32 seq,
> +                                      __u16 *mssp);
> +extern int cookie_v4_check_sequence(const struct iphdr *iph,
> +                                   const struct tcphdr *th, __u32 cookie);
>
>  extern __u32 cookie_init_timestamp(struct request_sock *req);
>  extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index 650cace..3adcba3 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -159,26 +159,21 @@ static __u16 const msstab[] = {
>  * Generate a syncookie.  mssp points to the mss, which is returned
>  * rounded down to the value encoded in the cookie.
>  */
> -__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
> +__u32 __cookie_v4_init_sequence(__be32 saddr, __be32 daddr, __be16 sport,
> +                               __be16 dport, __u32 seq, __u16 *mssp)
>  {
> -       const struct iphdr *iph = ip_hdr(skb);
> -       const struct tcphdr *th = tcp_hdr(skb);
>        int mssind;
>        const __u16 mss = *mssp;
>
> -       tcp_synq_overflow(sk);
> -
>        for (mssind = ARRAY_SIZE(msstab) - 1; mssind ; mssind--)
>                if (mss >= msstab[mssind])
>                        break;
>        *mssp = msstab[mssind];
>
> -       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
> -
> -       return secure_tcp_syn_cookie(iph->saddr, iph->daddr,
> -                                    th->source, th->dest, ntohl(th->seq),
> +       return secure_tcp_syn_cookie(saddr, daddr, sport, dport, seq,
>                                     jiffies / (HZ * 60), mssind);
>  }
> +EXPORT_SYMBOL(__cookie_v4_init_sequence);
>
>  /*
>  * This (misnamed) value is the age of syncookie which is permitted.
> @@ -191,10 +186,9 @@ __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
>  * Check if a ack sequence number is a valid syncookie.
>  * Return the decoded mss if it is, or 0 if not.
>  */
> -static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
> +int cookie_v4_check_sequence(const struct iphdr *iph, const struct tcphdr *th,
> +                            __u32 cookie)
>  {
> -       const struct iphdr *iph = ip_hdr(skb);
> -       const struct tcphdr *th = tcp_hdr(skb);
>        __u32 seq = ntohl(th->seq) - 1;
>        __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
>                                            th->source, th->dest, seq,
> @@ -203,6 +197,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
>
>        return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
>  }
> +EXPORT_SYMBOL(cookie_v4_check_sequence);
>
>  static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
>                                           struct request_sock *req,
> @@ -282,7 +277,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
>                goto out;
>
>        if (tcp_synq_no_recent_overflow(sk) ||
> -           (mss = cookie_check(skb, cookie)) == 0) {
> +           (mss = cookie_v4_check_sequence(ip_hdr(skb), tcp_hdr(skb),
> +                                           cookie)) == 0) {
>                NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
>                goto out;
>        }
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 8fa32f5..3b094c7 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -1332,7 +1332,14 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
>                TCP_ECN_create_request(req, tcp_hdr(skb));
>
>        if (want_cookie) {
> -               isn = cookie_v4_init_sequence(sk, skb, &req->mss);
> +               struct tcphdr *th;
> +
> +               tcp_synq_overflow(sk);
> +               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
> +               th = tcp_hdr(skb);
> +               isn = __cookie_v4_init_sequence(saddr, daddr, th->source,
> +                                               th->dest, ntohl(th->seq),
> +                                               &req->mss);
>                req->cookie_ts = tmp_opt.tstamp_ok;
>        } else if (!isn) {
>                struct inet_peer *peer = NULL;
> diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
> index 413ed24..fd8ad8c 100644
> --- a/net/netfilter/Kconfig
> +++ b/net/netfilter/Kconfig
> @@ -560,6 +560,23 @@ config NETFILTER_XT_TARGET_SECMARK
>
>          To compile it as a module, choose M here.  If unsure, say N.
>
> +config NETFILTER_XT_TARGET_SYNPROXY
> +       tristate '"SYNPROXY" target support (EXPERIMENTAL)'
> +       depends on EXPERIMENTAL
> +       depends on SYN_COOKIES
> +       depends on IP_NF_RAW
> +       depends on NF_CONNTRACK
> +       depends on NETFILTER_ADVANCED
> +       help
> +         The SYNPROXY target allows a raw rule to specify that some TCP
> +         connections are relayed to protect the TCP servers from the SYN-flood
> +         DoS attacks. Syn cookies is used to save the initial state, so no
> +         conntrack is needed until the client side connection is established.
> +         It frees the connection tracking system from creating/deleting
> +         conntracks when SYN-flood DoS attack acts.
> +
> +         To compile it as a module, choose M here.  If unsure, say N.
> +
>  config NETFILTER_XT_TARGET_TCPMSS
>        tristate '"TCPMSS" target support'
>        depends on (IPV6 || IPV6=n)
> diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
> index e28420a..4e32834 100644
> --- a/net/netfilter/Makefile
> +++ b/net/netfilter/Makefile
> @@ -62,6 +62,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
>  obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
>  obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
>  obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
> +obj-$(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) += xt_SYNPROXY.o
>
>  # matches
>  obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
> diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
> index 16b41b4..dd85d6f 100644
> --- a/net/netfilter/nf_conntrack_core.c
> +++ b/net/netfilter/nf_conntrack_core.c
> @@ -800,6 +800,26 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
>        return ct;
>  }
>
> +static inline unsigned int syn_proxy_pre_call(int protonum, struct sk_buff *skb,
> +                                             struct nf_conn *ct,
> +                                             enum ip_conntrack_info ctinfo)
> +{
> +       unsigned int ret = NF_ACCEPT;
> +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
> +    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
> +       unsigned int (*syn_proxy)(struct sk_buff *, struct nf_conn *,
> +                                 enum ip_conntrack_info);
> +
> +       if (protonum == IPPROTO_TCP) {
> +               syn_proxy = rcu_dereference(syn_proxy_pre_hook);
> +               if (syn_proxy)
> +                       ret = syn_proxy(skb, ct, ctinfo);
> +       }
> +#endif
> +
> +       return ret;
> +}
> +
>  unsigned int
>  nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
>                struct sk_buff *skb)
> @@ -855,8 +875,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
>                               l3proto, l4proto, &set_reply, &ctinfo);
>        if (!ct) {
>                /* Not valid part of a connection */
> -               NF_CT_STAT_INC_ATOMIC(net, invalid);
> -               ret = NF_ACCEPT;
> +               ret = syn_proxy_pre_call(protonum, skb, NULL, ctinfo);
> +               if (ret == NF_ACCEPT)
> +                       NF_CT_STAT_INC_ATOMIC(net, invalid);
>                goto out;
>        }
>
> @@ -869,6 +890,9 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
>
>        NF_CT_ASSERT(skb->nfct);
>
> +       ret = syn_proxy_pre_call(protonum, skb, ct, ctinfo);
> +       if (ret != NF_ACCEPT)
> +               goto out;
>        ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
>        if (ret <= 0) {
>                /* Invalid: inverse of the return code tells
> @@ -1476,6 +1500,17 @@ s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
>                        u32 seq);
>  EXPORT_SYMBOL_GPL(nf_ct_nat_offset);
>
> +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
> +    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
> +unsigned int (*syn_proxy_pre_hook)(struct sk_buff *skb, struct nf_conn *ct,
> +                                  enum ip_conntrack_info ctinfo);
> +EXPORT_SYMBOL(syn_proxy_pre_hook);
> +
> +unsigned int (*syn_proxy_post_hook)(struct sk_buff *skb, struct nf_conn *ct,
> +                                   enum ip_conntrack_info ctinfo);
> +EXPORT_SYMBOL(syn_proxy_post_hook);
> +#endif
> +
>  int nf_conntrack_init(struct net *net)
>  {
>        int ret;
> @@ -1496,6 +1531,12 @@ int nf_conntrack_init(struct net *net)
>
>                /* Howto get NAT offsets */
>                rcu_assign_pointer(nf_ct_nat_offset, NULL);
> +
> +#if defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY) || \
> +    defined(CONFIG_NETFILTER_XT_TARGET_SYNPROXY_MODULE)
> +               rcu_assign_pointer(syn_proxy_pre_hook, NULL);
> +               rcu_assign_pointer(syn_proxy_post_hook, NULL);
> +#endif
>        }
>        return 0;
>
> diff --git a/net/netfilter/xt_SYNPROXY.c b/net/netfilter/xt_SYNPROXY.c
> new file mode 100644
> index 0000000..1a55f33
> --- /dev/null
> +++ b/net/netfilter/xt_SYNPROXY.c
> @@ -0,0 +1,679 @@
> +/* (C) 2010- Changli Gao <xiaosuo@...il.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * It bases on ipt_REJECT.c
> + */
> +#define pr_fmt(fmt) "SYNPROXY: " fmt
> +#include <linux/module.h>
> +#include <linux/skbuff.h>
> +#include <linux/slab.h>
> +#include <linux/ip.h>
> +#include <linux/udp.h>
> +#include <linux/icmp.h>
> +#include <linux/unaligned/access_ok.h>
> +#include <net/icmp.h>
> +#include <net/ip.h>
> +#include <net/tcp.h>
> +#include <net/route.h>
> +#include <net/dst.h>
> +#include <net/netfilter/nf_conntrack.h>
> +#include <net/netfilter/nf_conntrack_extend.h>
> +#include <linux/netfilter/x_tables.h>
> +#include <linux/netfilter_ipv4/ip_tables.h>
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Changli Gao <xiaosuo@...il.com>");
> +MODULE_DESCRIPTION("Xtables: \"SYNPROXY\" target for IPv4");
> +MODULE_ALIAS("ipt_SYNPROXY");
> +
> +enum {
> +       TCP_SEND_FLAG_NOTRACE   = 0x1,
> +       TCP_SEND_FLAG_SYNCOOKIE = 0x2,
> +       TCP_SEND_FLAG_ACK2SYN   = 0x4,
> +};
> +
> +struct syn_proxy_state {
> +       u16     seq_inited;
> +       __be16  window;
> +       u32     seq_diff;
> +};
> +
> +static int get_mtu(const struct dst_entry *dst)
> +{
> +       int mtu;
> +
> +       mtu = dst_mtu(dst);
> +       if (mtu)
> +               return mtu;
> +
> +       return dst->dev ? dst->dev->mtu : 0;
> +}
> +
> +static int get_advmss(const struct dst_entry *dst)
> +{
> +       int advmss;
> +
> +       advmss = dst_metric(dst, RTAX_ADVMSS);
> +       if (advmss)
> +               return advmss;
> +       advmss = get_mtu(dst);
> +       if (advmss)
> +               return advmss - (sizeof(struct iphdr) + sizeof(struct tcphdr));
> +
> +       return TCP_MSS_DEFAULT;
> +}
> +
> +static int syn_proxy_route(struct sk_buff *skb, struct net *net, u16 *pmss)
> +{
> +       const struct iphdr *iph = ip_hdr(skb);
> +       struct rtable *rt;
> +       struct flowi fl = {};
> +       unsigned int type;
> +       int flags = 0;
> +       int err;
> +       u16 mss;
> +
> +       type = inet_addr_type(net, iph->saddr);
> +       if (type != RTN_LOCAL) {
> +               type = inet_addr_type(net, iph->daddr);
> +               if (type == RTN_LOCAL)
> +                       flags |= FLOWI_FLAG_ANYSRC;
> +       }
> +
> +       if (type == RTN_LOCAL) {
> +               fl.nl_u.ip4_u.daddr = iph->daddr;
> +               fl.nl_u.ip4_u.saddr = iph->saddr;
> +               fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
> +               fl.flags = flags;
> +               err = ip_route_output_key(net, &rt, &fl);
> +               if (err)
> +                       goto out;
> +
> +               skb_dst_set(skb, &rt->dst);
> +       } else {
> +               /* non-local src, find valid iif to satisfy
> +                * rp-filter when calling ip_route_input. */
> +               fl.nl_u.ip4_u.daddr = iph->saddr;
> +               err = ip_route_output_key(net, &rt, &fl);
> +               if (err)
> +                       goto out;
> +
> +               err = ip_route_input(skb, iph->daddr, iph->saddr,
> +                                    RT_TOS(iph->tos), rt->dst.dev);
> +               if (err) {
> +                       dst_release(&rt->dst);
> +                       goto out;
> +               }
> +               if (pmss) {
> +                       mss = get_advmss(&rt->dst);
> +                       if (*pmss > mss)
> +                               *pmss = mss;
> +               }
> +               dst_release(&rt->dst);
> +       }
> +
> +       err = skb_dst(skb)->error;
> +       if (!err && pmss) {
> +               mss = get_advmss(skb_dst(skb));
> +               if (*pmss > mss)
> +                       *pmss = mss;
> +       }
> +
> +out:
> +       return err;
> +}
> +
> +static int tcp_send(__be32 src, __be32 dst, __be16 sport, __be16 dport,
> +                   u32 seq, u32 ack_seq, __be16 window, u16 mss, u8 tcp_flags,
> +                   u8 tos, struct net_device *dev, int flags,
> +                   struct sk_buff *oskb)
> +{
> +       struct sk_buff *skb;
> +       struct iphdr *iph;
> +       struct tcphdr *th;
> +       int err, len;
> +
> +       len = sizeof(*th);
> +       if (mss)
> +               len += TCPOLEN_MSS;
> +
> +       skb = NULL;
> +       /* caller must give me a large enough oskb */
> +       if (oskb) {
> +               unsigned char *odata = oskb->data;
> +
> +               if (skb_recycle_check(oskb, 0)) {
> +                       oskb->data = odata;
> +                       skb_reset_tail_pointer(oskb);
> +                       skb = oskb;
> +                       pr_debug("recycle skb\n");
> +               }
> +       }
> +       if (!skb) {
> +               skb = alloc_skb(LL_MAX_HEADER + sizeof(*iph) + len, GFP_ATOMIC);
> +               if (!skb) {
> +                       err = -ENOMEM;
> +                       goto out;
> +               }
> +               skb_reserve(skb, LL_MAX_HEADER);
> +       }
> +
> +       skb_reset_network_header(skb);
> +       if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb) {
> +               iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
> +               iph->version    = 4;
> +               iph->ihl        = sizeof(*iph) / 4;
> +               iph->tos        = tos;
> +               /* tot_len is set in ip_local_out() */
> +               iph->id         = 0;
> +               iph->frag_off   = htons(IP_DF);
> +               iph->protocol   = IPPROTO_TCP;
> +               iph->saddr      = src;
> +               iph->daddr      = dst;
> +               th = (struct tcphdr *)skb_put(skb, len);
> +               th->source      = sport;
> +               th->dest        = dport;
> +       } else {
> +               iph = (struct iphdr *)skb->data;
> +               iph->id         = 0;
> +               iph->frag_off   = htons(IP_DF);
> +               skb_put(skb, iph->ihl * 4 + len);
> +               th = (struct tcphdr *)(skb->data + iph->ihl * 4);
> +       }
> +
> +       th->seq         = htonl(seq);
> +       th->ack_seq     = htonl(ack_seq);
> +       tcp_flag_byte(th) = tcp_flags;
> +       th->doff        = len / 4;
> +       th->window      = window;
> +       th->urg_ptr     = 0;
> +
> +       skb->protocol = htons(ETH_P_IP);
> +       if ((flags & TCP_SEND_FLAG_SYNCOOKIE) && mss)
> +               err = syn_proxy_route(skb, dev_net(dev), &mss);
> +       else
> +               err = syn_proxy_route(skb, dev_net(dev), NULL);
> +       if (err)
> +               goto err_out;
> +
> +       if ((flags & TCP_SEND_FLAG_SYNCOOKIE)) {
> +               if (mss) {
> +                       th->seq = htonl(__cookie_v4_init_sequence(dst, src,
> +                                                                 dport, sport,
> +                                                                 ack_seq - 1,
> +                                                                 &mss));
> +               } else {
> +                       mss = TCP_MSS_DEFAULT;
> +                       th->seq = htonl(__cookie_v4_init_sequence(dst, src,
> +                                                                 dport, sport,
> +                                                                 ack_seq - 1,
> +                                                                 &mss));
> +                       mss = 0;
> +               }
> +       }
> +
> +       if (mss)
> +               * (__force __be32 *)(th + 1) = htonl((TCPOPT_MSS << 24) |
> +                                                    (TCPOLEN_MSS << 16) |
> +                                                    mss);
> +       skb->ip_summed = CHECKSUM_PARTIAL;
> +       th->check = ~tcp_v4_check(len, src, dst, 0);
> +       skb->csum_start = (unsigned char *)th - skb->head;
> +       skb->csum_offset = offsetof(struct tcphdr, check);
> +
> +       if (!(flags & TCP_SEND_FLAG_ACK2SYN) || skb != oskb)
> +               iph->ttl        = dst_metric(skb_dst(skb), RTAX_HOPLIMIT);
> +
> +       if (skb->len > get_mtu(skb_dst(skb))) {
> +               if (printk_ratelimit())
> +                       pr_warning("%s has smaller mtu: %d\n",
> +                                  skb_dst(skb)->dev->name,
> +                                  get_mtu(skb_dst(skb)));
> +               err = -EINVAL;
> +               goto err_out;
> +       }
> +
> +       if ((flags & TCP_SEND_FLAG_NOTRACE)) {
> +               skb->nfct = &nf_ct_untracked_get()->ct_general;
> +               skb->nfctinfo = IP_CT_NEW;
> +               nf_conntrack_get(skb->nfct);
> +       }
> +
> +       pr_debug("ip_local_out: %pI4n:%hu -> %pI4n:%hu (seq=%u, "
> +                "ack_seq=%u mss=%hu flags=%hhx)\n", &src, ntohs(th->source),
> +                &dst, ntohs(th->dest), ntohl(th->seq), ack_seq, mss,
> +                tcp_flags);
> +
> +       err = ip_local_out(skb);
> +       if (err > 0)
> +               err = net_xmit_errno(err);
> +
> +       pr_debug("ip_local_out: return with %d\n", err);
> +out:
> +       if (oskb && oskb != skb)
> +               kfree_skb(oskb);
> +
> +       return err;
> +
> +err_out:
> +       kfree_skb(skb);
> +       goto out;
> +}
> +
> +static int get_mss(u8 *data, int len)
> +{
> +       u8 olen;
> +
> +       while (len >= TCPOLEN_MSS) {
> +               switch (data[0]) {
> +               case TCPOPT_EOL:
> +                       return 0;
> +               case TCPOPT_NOP:
> +                       data++;
> +                       len--;
> +                       break;
> +               case TCPOPT_MSS:
> +                       if (data[1] != TCPOLEN_MSS)
> +                               return -EINVAL;
> +                       return get_unaligned_be16(data + 2);
> +               default:
> +                       olen = data[1];
> +                       if (olen < 2 || olen > len)
> +                               return -EINVAL;
> +                       data += olen;
> +                       len -= olen;
> +                       break;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static DEFINE_PER_CPU(struct syn_proxy_state, syn_proxy_state);
> +
> +/* syn_proxy_pre isn't under the protection of nf_conntrack_proto_tcp.c */
> +static unsigned int syn_proxy_pre(struct sk_buff *skb, struct nf_conn *ct,
> +                                 enum ip_conntrack_info ctinfo)
> +{
> +       struct syn_proxy_state *state;
> +       struct iphdr *iph;
> +       struct tcphdr *th, _th;
> +
> +       /* only support IPv4 now */
> +       iph = ip_hdr(skb);
> +       if (iph->version != 4)
> +               return NF_ACCEPT;
> +
> +       th = skb_header_pointer(skb, iph->ihl * 4, sizeof(_th), &_th);
> +       if (th == NULL)
> +               return NF_DROP;
> +
> +       if (!ct || !nf_ct_is_confirmed(ct)) {
> +               int ret;
> +
> +               if (!th->syn && th->ack) {
> +                       u16 mss;
> +                       struct sk_buff *rec_skb;
> +
> +                       mss = cookie_v4_check_sequence(iph, th,
> +                                                      ntohl(th->ack_seq) - 1);
> +                       if (!mss)
> +                               return NF_ACCEPT;
> +
> +                       pr_debug("%pI4n:%hu -> %pI4n:%hu(mss=%hu)\n",
> +                                &iph->saddr, ntohs(th->source),
> +                                &iph->daddr, ntohs(th->dest), mss);
> +
> +                       if (skb_tailroom(skb) < TCPOLEN_MSS &&
> +                           skb->len < iph->ihl * 4 + sizeof(*th) + TCPOLEN_MSS)
> +                               rec_skb = NULL;
> +                       else
> +                               rec_skb = skb;
> +
> +                       local_bh_disable();
> +                       state = &__get_cpu_var(syn_proxy_state);
> +                       state->seq_inited = 1;
> +                       state->window = th->window;
> +                       state->seq_diff = ntohl(th->ack_seq) - 1;
> +                       if (rec_skb)
> +                               tcp_send(iph->saddr, iph->daddr, 0, 0,
> +                                        ntohl(th->seq) - 1, 0, th->window,
> +                                        mss, TCPHDR_SYN, 0, skb->dev,
> +                                        TCP_SEND_FLAG_ACK2SYN, rec_skb);
> +                       else
> +                               tcp_send(iph->saddr, iph->daddr, th->source,
> +                                        th->dest, ntohl(th->seq) - 1, 0,
> +                                        th->window, mss, TCPHDR_SYN,
> +                                        iph->tos, skb->dev, 0, NULL);
> +                       state->seq_inited = 0;
> +                       local_bh_enable();
> +
> +                       if (!rec_skb)
> +                               kfree_skb(skb);
> +
> +                       return NF_STOLEN;
> +               }
> +
> +               if (!ct || !th->syn || th->ack)
> +                       return NF_ACCEPT;
> +
> +               ret = NF_ACCEPT;
> +               local_bh_disable();
> +               state = &__get_cpu_var(syn_proxy_state);
> +               if (state->seq_inited) {
> +                       struct syn_proxy_state *nstate;
> +
> +                       nstate = nf_ct_ext_add(ct, NF_CT_EXT_SYNPROXY,
> +                                              GFP_ATOMIC);
> +                       if (nstate != NULL) {
> +                               nstate->seq_inited = 0;
> +                               nstate->window = state->window;
> +                               nstate->seq_diff = state->seq_diff;
> +                               pr_debug("seq_diff: %u\n", nstate->seq_diff);
> +                       } else {
> +                               ret = NF_DROP;
> +                       }
> +               }
> +               local_bh_enable();
> +
> +               return ret;
> +       }
> +
> +       state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
> +       if (!state)
> +               return NF_ACCEPT;
> +
> +       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
> +               __be32 newack;
> +
> +               /* don't need to mangle duplicate SYN packets */
> +               if (th->syn && !th->ack)
> +                       return NF_ACCEPT;
> +               if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*th)))
> +                       return NF_DROP;
> +               th = (struct tcphdr *)(skb->data + ip_hdrlen(skb));
> +               newack = htonl(ntohl(th->ack_seq) - state->seq_diff);
> +               inet_proto_csum_replace4(&th->check, skb, th->ack_seq, newack,
> +                                        0);
> +               pr_debug("alter ack seq: %u -> %u\n",
> +                        ntohl(th->ack_seq), ntohl(newack));
> +               th->ack_seq = newack;
> +       } else {
> +               /* Simultaneous open ? Oh, no. The connection between
> +                * client and us is established. */
> +               if (th->syn && !th->ack)
> +                       return NF_DROP;
> +       }
> +
> +       return NF_ACCEPT;
> +}
> +
> +static unsigned int syn_proxy_mangle_pkt(struct sk_buff *skb, struct iphdr *iph,
> +                                        struct tcphdr *th, u32 seq_diff)
> +{
> +       __be32 new;
> +       int olen;
> +
> +       if (skb->len < (iph->ihl + th->doff) * 4)
> +               return NF_DROP;
> +       if (!skb_make_writable(skb, (iph->ihl + th->doff) * 4))
> +               return NF_DROP;
> +       iph = (struct iphdr *)(skb->data);
> +       th = (struct tcphdr *)(skb->data + iph->ihl * 4);
> +
> +       new = tcp_flag_word(th) & (~TCP_FLAG_SYN);
> +       inet_proto_csum_replace4(&th->check, skb, tcp_flag_word(th), new, 0);
> +       tcp_flag_word(th) = new;
> +
> +       new = htonl(ntohl(th->seq) + seq_diff);
> +       inet_proto_csum_replace4(&th->check, skb, th->seq, new, 0);
> +       pr_debug("alter seq: %u -> %u\n", ntohl(th->seq), ntohl(new));
> +       th->seq = new;
> +
> +       olen = th->doff - sizeof(*th) / 4;
> +       if (olen) {
> +               __be32 *opt;
> +
> +               opt = (__force __be32 *)(th + 1);
> +#define TCPOPT_EOL_WORD ((TCPOPT_EOL << 24) + (TCPOPT_EOL << 16) + \
> +                        (TCPOPT_EOL << 8) + TCPOPT_EOL)
> +               inet_proto_csum_replace4(&th->check, skb, *opt, TCPOPT_EOL_WORD,
> +                                        0);
> +               *opt = TCPOPT_EOL_WORD;
> +       }
> +
> +       return NF_ACCEPT;
> +}
> +
> +static unsigned int syn_proxy_post(struct sk_buff *skb, struct nf_conn *ct,
> +                                  enum ip_conntrack_info ctinfo)
> +{
> +       struct syn_proxy_state *state;
> +       struct iphdr *iph;
> +       struct tcphdr *th;
> +
> +       /* untraced packets don't have NF_CT_EXT_SYNPROXY ext, as they don't
> +        * enter syn_proxy_pre() */
> +       state = nf_ct_ext_find(ct, NF_CT_EXT_SYNPROXY);
> +       if (state == NULL)
> +               return NF_ACCEPT;
> +
> +       iph = ip_hdr(skb);
> +       if (!skb_make_writable(skb, iph->ihl * 4 + sizeof(*th)))
> +               return NF_DROP;
> +       th = (struct tcphdr *)(skb->data + iph->ihl * 4);
> +       if (!state->seq_inited) {
> +               if (th->syn) {
> +                       /* It must be from original direction, as the ones
> +                        * from the other side are dropped in function
> +                        * syn_proxy_pre() */
> +                       if (!th->ack)
> +                               return NF_ACCEPT;
> +
> +                       pr_debug("SYN-ACK %pI4n:%hu -> %pI4n:%hu "
> +                                "(seq=%u ack_seq=%u)\n",
> +                                &iph->saddr, ntohs(th->source), &iph->daddr,
> +                                ntohs(th->dest), ntohl(th->seq),
> +                                ntohl(th->ack_seq));
> +
> +                       /* SYN-ACK from reply direction with the protection
> +                        * of conntrack */
> +                       spin_lock_bh(&ct->lock);
> +                       if (!state->seq_inited) {
> +                               state->seq_inited = 1;
> +                               pr_debug("update seq_diff %u -> %u\n",
> +                                        state->seq_diff,
> +                                        state->seq_diff - ntohl(th->seq));
> +                               state->seq_diff -= ntohl(th->seq);
> +                       }
> +                       spin_unlock_bh(&ct->lock);
> +                       tcp_send(iph->daddr, iph->saddr, th->dest, th->source,
> +                                ntohl(th->ack_seq),
> +                                ntohl(th->seq) + 1 + state->seq_diff,
> +                                state->window, 0, TCPHDR_ACK, iph->tos,
> +                                skb->dev, 0, NULL);
> +
> +                       return syn_proxy_mangle_pkt(skb, iph, th,
> +                                                   state->seq_diff + 1);
> +               } else {
> +                       __be32 newseq;
> +
> +                       if (!th->rst)
> +                               return NF_ACCEPT;
> +                       newseq = htonl(state->seq_diff + 1);
> +                       inet_proto_csum_replace4(&th->check, skb, th->seq,
> +                                                newseq, 0);
> +                       pr_debug("alter RST seq: %u -> %u\n",
> +                                ntohl(th->seq), ntohl(newseq));
> +                       th->seq = newseq;
> +
> +                       return NF_ACCEPT;
> +               }
> +       }
> +
> +       /* ct should be in ESTABLISHED state, but if the ack packets from
> +        * us are lost. */
> +       if (th->syn) {
> +               if (!th->ack)
> +                       return NF_ACCEPT;
> +
> +               tcp_send(iph->daddr, iph->saddr, th->dest, th->source,
> +                        ntohl(th->ack_seq),
> +                        ntohl(th->seq) + 1 + state->seq_diff,
> +                        state->window, 0, TCPHDR_ACK, iph->tos,
> +                        skb->dev, 0, NULL);
> +
> +               return syn_proxy_mangle_pkt(skb, iph, th, state->seq_diff + 1);
> +       }
> +
> +       if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
> +               __be32 newseq;
> +
> +               newseq = htonl(ntohl(th->seq) + state->seq_diff);
> +               inet_proto_csum_replace4(&th->check, skb, th->seq, newseq, 0);
> +               pr_debug("alter seq: %u -> %u\n", ntohl(th->seq),
> +                        ntohl(newseq));
> +               th->seq = newseq;
> +       }
> +
> +       return NF_ACCEPT;
> +}
> +
> +static unsigned int tcp_process(struct sk_buff *skb)
> +{
> +       const struct iphdr *iph;
> +       const struct tcphdr *th;
> +       int err;
> +       u16 mss;
> +
> +       iph = ip_hdr(skb);
> +       if (iph->frag_off & htons(IP_OFFSET))
> +               goto out;
> +       if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(*th)))
> +               goto out;
> +       th = (const struct tcphdr *)(skb->data + iph->ihl * 4);
> +       if ((tcp_flag_byte(th) &
> +            (TCPHDR_FIN | TCPHDR_RST | TCPHDR_ACK | TCPHDR_SYN)) != TCPHDR_SYN)
> +               goto out;
> +
> +       if (nf_ip_checksum(skb, NF_INET_PRE_ROUTING, iph->ihl * 4, IPPROTO_TCP))
> +               goto out;
> +       mss = 0;
> +       if (th->doff > sizeof(*th) / 4) {
> +               if (!pskb_may_pull(skb, (iph->ihl + th->doff) * 4))
> +                       goto out;
> +               err = get_mss((u8 *)(th + 1), th->doff * 4 - sizeof(*th));
> +               if (err < 0)
> +                       goto out;
> +               if (err != 0)
> +                       mss = err;
> +       } else if (th->doff != sizeof(*th) / 4)
> +               goto out;
> +
> +       tcp_send(iph->daddr, iph->saddr, th->dest, th->source, 0,
> +                ntohl(th->seq) + 1, 0, mss, TCPHDR_SYN | TCPHDR_ACK,
> +                iph->tos, skb->dev,
> +                TCP_SEND_FLAG_NOTRACE | TCP_SEND_FLAG_SYNCOOKIE, skb);
> +
> +       return NF_STOLEN;
> +
> +out:
> +       return NF_DROP;
> +}
> +
> +static unsigned int synproxy_tg(struct sk_buff *skb,
> +                               const struct xt_action_param *par)
> +{
> +       struct nf_conn *ct;
> +       enum ip_conntrack_info ctinfo;
> +       int ret;
> +
> +       /* received from lo */
> +       ct = nf_ct_get(skb, &ctinfo);
> +       if (ct)
> +               return IPT_CONTINUE;
> +
> +       local_bh_disable();
> +       if (!__get_cpu_var(syn_proxy_state).seq_inited)
> +               ret = tcp_process(skb);
> +       else
> +               ret = IPT_CONTINUE;
> +       local_bh_enable();
> +
> +       return ret;
> +}
> +
> +static int synproxy_tg_check(const struct xt_tgchk_param *par)
> +{
> +       int ret;
> +
> +       ret = nf_ct_l3proto_try_module_get(par->family);
> +       if (ret < 0)
> +               pr_info("cannot load conntrack support for proto=%u\n",
> +                       par->family);
> +
> +       return ret;
> +}
> +
> +static void synproxy_tg_destroy(const struct xt_tgdtor_param *par)
> +{
> +       nf_ct_l3proto_module_put(par->family);
> +}
> +
> +static struct xt_target synproxy_tg_reg __read_mostly = {
> +       .name           = "SYNPROXY",
> +       .family         = NFPROTO_IPV4,
> +       .target         = synproxy_tg,
> +       .table          = "raw",
> +       .hooks          = 1 << NF_INET_PRE_ROUTING,
> +       .proto          = IPPROTO_TCP,
> +       .checkentry     = synproxy_tg_check,
> +       .destroy        = synproxy_tg_destroy,
> +       .me             = THIS_MODULE,
> +};
> +
> +static struct nf_ct_ext_type syn_proxy_state_ext __read_mostly = {
> +       .len    = sizeof(struct syn_proxy_state),
> +       .align  = __alignof__(struct syn_proxy_state),
> +       .id     = NF_CT_EXT_SYNPROXY,
> +};
> +
> +static int __init synproxy_tg_init(void)
> +{
> +       int err;
> +
> +       rcu_assign_pointer(syn_proxy_pre_hook, syn_proxy_pre);
> +       rcu_assign_pointer(syn_proxy_post_hook, syn_proxy_post);
> +       err = nf_ct_extend_register(&syn_proxy_state_ext);
> +       if (err)
> +               goto err_out;
> +       err = xt_register_target(&synproxy_tg_reg);
> +       if (err)
> +               goto err_out2;
> +
> +       return err;
> +
> +err_out2:
> +       nf_ct_extend_unregister(&syn_proxy_state_ext);
> +err_out:
> +       rcu_assign_pointer(syn_proxy_post_hook, NULL);
> +       rcu_assign_pointer(syn_proxy_pre_hook, NULL);
> +       rcu_barrier();
> +
> +       return err;
> +}
> +
> +static void __exit synproxy_tg_exit(void)
> +{
> +       xt_unregister_target(&synproxy_tg_reg);
> +       nf_ct_extend_unregister(&syn_proxy_state_ext);
> +       rcu_assign_pointer(syn_proxy_post_hook, NULL);
> +       rcu_assign_pointer(syn_proxy_pre_hook, NULL);
> +       rcu_barrier();
> +}
> +
> +module_init(synproxy_tg_init);
> +module_exit(synproxy_tg_exit);
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ