[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <1335523113.2775.239.camel@edumazet-glaptop>
Date: Fri, 27 Apr 2012 12:38:33 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: David Miller <davem@...emloft.net>
Cc: netdev <netdev@...r.kernel.org>,
Neal Cardwell <ncardwell@...gle.com>,
Jeff Kirsher <jeffrey.t.kirsher@...el.com>,
Tom Herbert <therbert@...gle.com>,
Herbert Xu <herbert@...dor.apana.org.au>,
Ben Hutchings <bhutchings@...arflare.com>,
Matt Carlson <mcarlson@...adcom.com>,
Michael Chan <mchan@...adcom.com>,
Maciej Żenczykowski <maze@...gle.com>,
Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>
Subject: [PATCH 4/4 net-next] tcp: makes tcp_try_coalesce aware of
skb->head_frag
From: Eric Dumazet <edumazet@...gle.com>
TCP coalesce can check if skb to be merged has its skb->head mapped to a
page fragment, instead of a kmalloc() area.
We had to disable coalescing in this case, for performance reasons.
We 'upgrade' skb->head as a fragment in itself.
This reduces number of cache misses when user makes its copies, since a
less sk_buff are fetched.
This makes receive and ofo queues shorter and thus reduce cache line
misses in TCP stack.
This is a followup of patch "net: allow skb->head to be a page fragment"
Tested with tg3 nic, with GRO on or off. We can see "TCPRcvCoalesce"
counter being incremented.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>
Cc: Herbert Xu <herbert@...dor.apana.org.au>
Cc: Maciej Żenczykowski <maze@...gle.com>
Cc: Neal Cardwell <ncardwell@...gle.com>
Cc: Tom Herbert <therbert@...gle.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@...el.com>
Cc: Ben Hutchings <bhutchings@...arflare.com>
Cc: Matt Carlson <mcarlson@...adcom.com>
Cc: Michael Chan <mchan@...adcom.com>
---
net/ipv4/tcp_input.c | 55 ++++++++++++++++++++++++++++++++---------
1 file changed, 43 insertions(+), 12 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c93b0cb..96a631d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4464,10 +4464,12 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
*/
static bool tcp_try_coalesce(struct sock *sk,
struct sk_buff *to,
- struct sk_buff *from)
+ struct sk_buff *from,
+ bool *fragstolen)
{
- int len = from->len;
+ int delta, len = from->len;
+ *fragstolen = false;
if (tcp_hdr(from)->fin)
return false;
if (len <= skb_tailroom(to)) {
@@ -4478,15 +4480,19 @@ merge:
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
return true;
}
+
+ if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ return false;
+
if (skb_headlen(from) == 0 &&
- !skb_has_frag_list(to) &&
- !skb_has_frag_list(from) &&
(skb_shinfo(to)->nr_frags +
skb_shinfo(from)->nr_frags <= MAX_SKB_FRAGS)) {
- int delta = from->truesize - ksize(from->head) -
- SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ WARN_ON_ONCE(from->head_frag);
+ delta = from->truesize - ksize(from->head) -
+ SKB_DATA_ALIGN(sizeof(struct sk_buff));
WARN_ON_ONCE(delta < len);
+copyfrags:
memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
skb_shinfo(from)->frags,
skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
@@ -4499,6 +4505,20 @@ merge:
to->data_len += len;
goto merge;
}
+ if (from->head_frag) {
+ struct page *page;
+ unsigned int offset;
+
+ if (skb_shinfo(to)->nr_frags + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ return false;
+ page = virt_to_head_page(from->head);
+ offset = from->data - (unsigned char *)page_address(page);
+ skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ page, offset, skb_headlen(from));
+ *fragstolen = true;
+ delta = len; /* we dont know real truesize... */
+ goto copyfrags;
+ }
return false;
}
@@ -4540,10 +4560,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) {
- if (!tcp_try_coalesce(sk, skb1, skb)) {
+ bool fragstolen;
+
+ if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
} else {
- __kfree_skb(skb);
+ if (fragstolen)
+ kmem_cache_free(skbuff_head_cache, skb);
+ else
+ __kfree_skb(skb);
skb = NULL;
}
@@ -4626,6 +4651,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
const struct tcphdr *th = tcp_hdr(skb);
struct tcp_sock *tp = tcp_sk(sk);
int eaten = -1;
+ bool fragstolen = false;
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
goto drop;
@@ -4672,7 +4698,9 @@ queue_and_out:
goto drop;
tail = skb_peek_tail(&sk->sk_receive_queue);
- eaten = (tail && tcp_try_coalesce(sk, tail, skb)) ? 1 : 0;
+ eaten = (tail &&
+ tcp_try_coalesce(sk, tail, skb,
+ &fragstolen)) ? 1 : 0;
if (eaten <= 0) {
skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -4699,9 +4727,12 @@ queue_and_out:
tcp_fast_path_check(sk);
- if (eaten > 0)
- __kfree_skb(skb);
- else if (!sock_flag(sk, SOCK_DEAD))
+ if (eaten > 0) {
+ if (fragstolen)
+ kmem_cache_free(skbuff_head_cache, skb);
+ else
+ __kfree_skb(skb);
+ } else if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
return;
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists