[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1335234012.5205.97.camel@edumazet-glaptop>
Date: Tue, 24 Apr 2012 04:20:12 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: David Miller <davem@...emloft.net>
Cc: rick.jones2@...com, netdev@...r.kernel.org, therbert@...gle.com,
ncardwell@...gle.com, maze@...gle.com, ycheng@...gle.com,
ilpo.jarvinen@...sinki.fi
Subject: Re: [PATCH 2/2 net-next] tcp: sk_add_backlog() is too agressive
for TCP
On Mon, 2012-04-23 at 22:37 +0200, Eric Dumazet wrote:
> We could try to coalesce ACKs before backlogging them. I'll work on
> this.
>
I did an experiment, and found a basic coalescing was not working in
case of packet loss and SACK storm.
Doing a smart coalescing in this case sounds really complex.
Should we really continue this way ?
include/net/tcp.h | 1 +
net/ipv4/tcp_ipv4.c | 32 +++++++++++++++++++++++++++++++-
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index fc880e9..de8d847 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1418,6 +1418,7 @@ static inline unsigned int tcp_stream_is_thin(struct tcp_sock *tp)
return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
}
+extern bool tcp_ack_coalesce(struct sock *sk, struct sk_buff *skb);
/* /proc */
enum tcp_seq_states {
TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0883921..b5a3bac 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1670,6 +1670,36 @@ csum_err:
}
EXPORT_SYMBOL(tcp_v4_do_rcv);
+/* socket is owned by user.
+ * Before queuing this skb into backlog, try to coalesce it to previous skb.
+ * We only take care of pure ACKS.
+ */
+bool tcp_ack_coalesce(struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff *prev = sk->sk_backlog.tail;
+ const struct tcphdr *th, *thp;
+ unsigned int i, thlen;
+
+ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq ||
+ !prev ||
+ TCP_SKB_CB(skb)->seq != TCP_SKB_CB(prev)->end_seq)
+ return false;
+ th = tcp_hdr(skb);
+ thp = tcp_hdr(prev);
+ thlen = th->doff * 4;
+ i = sizeof(th->source) + sizeof(th->dest) +
+ sizeof(th->seq) + sizeof(th->ack_seq);
+ for (; i < thlen; i += 4) {
+ if (*(u32 *)((u8 *)th + i) != *(u32 *)((u8 *)thp + i))
+ return false;
+ }
+ if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(prev)->ack_seq))
+ TCP_SKB_CB(prev)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ consume_skb(skb);
+ return true;
+}
+EXPORT_SYMBOL(tcp_ack_coalesce);
+
/*
* From tcp_input.c
*/
@@ -1752,7 +1782,7 @@ process:
if (!tcp_prequeue(sk, skb))
ret = tcp_v4_do_rcv(sk, skb);
}
- } else if (unlikely(sk_add_backlog(sk, skb))) {
+ } else if (!tcp_ack_coalesce(sk, skb) && sk_add_backlog(sk, skb)) {
bh_unlock_sock(sk);
NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
goto discard_and_relse;
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists