[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <201206250824.31386.hans.schillstrom@ericsson.com>
Date: Mon, 25 Jun 2012 08:24:23 +0200
From: Hans Schillstrom <hans.schillstrom@...csson.com>
To: Eric Dumazet <eric.dumazet@...il.com>
CC: Vijay Subramanian <subramanian.vijay@...il.com>,
Dave Taht <dave.taht@...il.com>,
netdev <netdev@...r.kernel.org>,
Neal Cardwell <ncardwell@...gle.com>,
Tom Herbert <therbert@...gle.com>,
Jesper Dangaard Brouer <brouer@...hat.com>
Subject: Re: [PATCH v2 net-next] tcp: avoid tx starvation by SYNACK packets
Hi Eric
On Saturday 23 June 2012 10:42:42 Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@...gle.com>
>
> On Sat, 2012-06-23 at 00:34 -0700, Vijay Subramanian wrote:
>
> > This patch ([PATCH net-next] tcp: avoid tx starvation by SYNACK
> > packets) is neither in net/net-next trees nor on patchwork. Maybe it
> > was missed since it was sent during the merge window. Is this not
> > needed anymore or is it being tested currently?
>
> You're right, thanks for the reminder !
We have been runing this patch for a while now,
so I added a "Tested-by:"
>
> [PATCH v2 net-next] tcp: avoid tx starvation by SYNACK packets
>
> pfifo_fast being the default Qdisc, its pretty easy to fill it with
> SYNACK (small) packets while host is under synflood attack.
>
> Packets of established TCP sessions are dropped at Qdisc layer and
> host appears almost dead.
>
> Avoid this problem assigning TC_PRIO_FILLER priority to SYNACK
> generated in SYNCOOKIE mode, so that these packets are enqueued into
> pfifo_fast lowest priority (band 2).
>
> Other packets, queued to band 0 or 1 are dequeued before any SYNACK
> packets waiting in band 2.
>
> If not under synflood, SYNACK priority is as requested by listener
> sk_priority policy.
>
> Reported-by: Hans Schillstrom <hans.schillstrom@...csson.com>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Tested-by: Hans Schillstrom <hans.schillstrom@...csson.com>
> Cc: Jesper Dangaard Brouer <brouer@...hat.com>
> Cc: Neal Cardwell <ncardwell@...gle.com>
> Cc: Tom Herbert <therbert@...gle.com>
> Cc: Vijay Subramanian <subramanian.vijay@...il.com>
> ---
> net/dccp/ipv4.c | 2 ++
> net/ipv4/ip_output.c | 2 +-
> net/ipv4/tcp_ipv4.c | 7 ++++++-
> net/ipv6/inet6_connection_sock.c | 1 +
> net/ipv6/ip6_output.c | 2 +-
> net/ipv6/tcp_ipv6.c | 11 ++++++++---
> 6 files changed, 19 insertions(+), 6 deletions(-)
>
> diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
> index 3eb76b5..045176f 100644
> --- a/net/dccp/ipv4.c
> +++ b/net/dccp/ipv4.c
> @@ -515,6 +515,7 @@ static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
>
> dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
> ireq->rmt_addr);
> + skb->priority = sk->sk_priority;
> err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
> ireq->rmt_addr,
> ireq->opt);
> @@ -556,6 +557,7 @@ static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
> skb_dst_set(skb, dst_clone(dst));
>
> bh_lock_sock(ctl_sk);
> + skb->priority = ctl_sk->sk_priority;
> err = ip_build_and_send_pkt(skb, ctl_sk,
> rxiph->daddr, rxiph->saddr, NULL);
> bh_unlock_sock(ctl_sk);
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 0f3185a..71c6c20 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -155,7 +155,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
> ip_options_build(skb, &opt->opt, daddr, rt, 0);
> }
>
> - skb->priority = sk->sk_priority;
> + /* skb->priority is set by the caller */
> skb->mark = sk->sk_mark;
>
> /* Send it out. */
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index b52934f..5ef4131 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -81,7 +81,7 @@
> #include <linux/stddef.h>
> #include <linux/proc_fs.h>
> #include <linux/seq_file.h>
> -
> +#include <linux/pkt_sched.h>
> #include <linux/crypto.h>
> #include <linux/scatterlist.h>
>
> @@ -821,6 +821,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
> * Send a SYN-ACK after having received a SYN.
> * This still operates on a request_sock only, not on a big
> * socket.
> + * nocache is set for SYN-ACK sent in SYNCOOKIE mode
> */
> static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
> struct request_sock *req,
> @@ -843,6 +844,10 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
> __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
>
> skb_set_queue_mapping(skb, queue_mapping);
> +
> + /* SYNACK sent in SYNCOOKIE mode have low priority */
> + skb->priority = nocache ? TC_PRIO_FILLER : sk->sk_priority;
> +
> err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
> ireq->rmt_addr,
> ireq->opt);
> diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
> index e6cee52..5812a74 100644
> --- a/net/ipv6/inet6_connection_sock.c
> +++ b/net/ipv6/inet6_connection_sock.c
> @@ -248,6 +248,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
> /* Restore final destination back after routing done */
> fl6.daddr = np->daddr;
>
> + skb->priority = sk->sk_priority;
> res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
> rcu_read_unlock();
> return res;
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index a233a7c..a93378a 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -228,7 +228,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
> hdr->saddr = fl6->saddr;
> hdr->daddr = *first_hop;
>
> - skb->priority = sk->sk_priority;
> + /* skb->priority is set by the caller */
> skb->mark = sk->sk_mark;
>
> mtu = dst_mtu(dst);
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index 26a8862..f664452 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -43,6 +43,7 @@
> #include <linux/ipv6.h>
> #include <linux/icmpv6.h>
> #include <linux/random.h>
> +#include <linux/pkt_sched.h>
>
> #include <net/tcp.h>
> #include <net/ndisc.h>
> @@ -479,7 +480,8 @@ out:
>
> static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
> struct request_values *rvp,
> - u16 queue_mapping)
> + u16 queue_mapping,
> + bool syncookie)
> {
> struct inet6_request_sock *treq = inet6_rsk(req);
> struct ipv6_pinfo *np = inet6_sk(sk);
> @@ -515,6 +517,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
> if (skb) {
> __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
>
> + skb->priority = syncookie ? TC_PRIO_FILLER : sk->sk_priority;
> fl6.daddr = treq->rmt_addr;
> skb_set_queue_mapping(skb, queue_mapping);
> err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
> @@ -531,7 +534,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
> struct request_values *rvp)
> {
> TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
> - return tcp_v6_send_synack(sk, req, rvp, 0);
> + return tcp_v6_send_synack(sk, req, rvp, 0, false);
> }
>
> static void tcp_v6_reqsk_destructor(struct request_sock *req)
> @@ -909,6 +912,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
> dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
> if (!IS_ERR(dst)) {
> skb_dst_set(buff, dst);
> + skb->priority = ctl_sk->sk_priority;
> ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass);
> TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
> if (rst)
> @@ -1217,7 +1221,8 @@ have_isn:
>
> if (tcp_v6_send_synack(sk, req,
> (struct request_values *)&tmp_ext,
> - skb_get_queue_mapping(skb)) ||
> + skb_get_queue_mapping(skb),
> + want_cookie) ||
> want_cookie)
> goto drop_and_free;
>
>
>
>
--
Regards
Hans Schillstrom <hans.schillstrom@...csson.com>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists