[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1335981358.22133.605.camel@edumazet-glaptop>
Date: Wed, 02 May 2012 19:55:58 +0200
From: Eric Dumazet <eric.dumazet@...il.com>
To: Alexander Duyck <alexander.h.duyck@...el.com>
Cc: Alexander Duyck <alexander.duyck@...il.com>,
David Miller <davem@...emloft.net>,
netdev <netdev@...r.kernel.org>,
Neal Cardwell <ncardwell@...gle.com>,
Tom Herbert <therbert@...gle.com>,
Jeff Kirsher <jeffrey.t.kirsher@...el.com>,
Michael Chan <mchan@...adcom.com>,
Matt Carlson <mcarlson@...adcom.com>,
Herbert Xu <herbert@...dor.apana.org.au>,
Ben Hutchings <bhutchings@...arflare.com>,
Ilpo Järvinen <ilpo.jarvinen@...sinki.fi>,
Maciej Żenczykowski <maze@...gle.com>
Subject: [PATCH v2 net-next] net: take care of cloned skbs in
tcp_try_coalesce()
From: Eric Dumazet <edumazet@...gle.com>
Before stealing fragments or skb head, we must make sure skbs are not
cloned.
Alexander was worried about destination skb being cloned : In bridge
setups, a driver could be fooled if skb->data_len would not match skb
nr_frags.
If source skb is cloned, we must take references on pages instead.
Bug happened using tcpdump (if not using mmap())
Introduce kfree_skb_partial() helper to cleanup code.
Reported-by: Alexander Duyck <alexander.h.duyck@...el.com>
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
net/ipv4/tcp_input.c | 42 +++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 96a631d..f891a5e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4455,6 +4455,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
* @sk: socket
* @to: prior buffer
* @from: buffer to add in queue
+ * @fragstolen: pointer to boolean
*
* Before queueing skb @from after @to, try to merge them
* to reduce overall memory use and queue lengths, if cost is small.
@@ -4467,10 +4468,10 @@ static bool tcp_try_coalesce(struct sock *sk,
struct sk_buff *from,
bool *fragstolen)
{
- int delta, len = from->len;
+ int i, delta, len = from->len;
*fragstolen = false;
- if (tcp_hdr(from)->fin)
+ if (tcp_hdr(from)->fin || skb_cloned(to))
return false;
if (len <= skb_tailroom(to)) {
BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
@@ -4497,7 +4498,13 @@ copyfrags:
skb_shinfo(from)->frags,
skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
- skb_shinfo(from)->nr_frags = 0;
+
+ if (skb_cloned(from))
+ for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
+ skb_frag_ref(from, i);
+ else
+ skb_shinfo(from)->nr_frags = 0;
+
to->truesize += delta;
atomic_add(delta, &sk->sk_rmem_alloc);
sk_mem_charge(sk, delta);
@@ -4515,13 +4522,26 @@ copyfrags:
offset = from->data - (unsigned char *)page_address(page);
skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
page, offset, skb_headlen(from));
- *fragstolen = true;
+
+ if (skb_cloned(from))
+ get_page(page);
+ else
+ *fragstolen = true;
+
delta = len; /* we dont know real truesize... */
goto copyfrags;
}
return false;
}
+static void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
+{
+ if (head_stolen)
+ kmem_cache_free(skbuff_head_cache, skb);
+ else
+ __kfree_skb(skb);
+}
+
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -4565,10 +4585,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
} else {
- if (fragstolen)
- kmem_cache_free(skbuff_head_cache, skb);
- else
- __kfree_skb(skb);
+ kfree_skb_partial(skb, fragstolen);
skb = NULL;
}
@@ -4727,12 +4744,9 @@ queue_and_out:
tcp_fast_path_check(sk);
- if (eaten > 0) {
- if (fragstolen)
- kmem_cache_free(skbuff_head_cache, skb);
- else
- __kfree_skb(skb);
- } else if (!sock_flag(sk, SOCK_DEAD))
+ if (eaten > 0)
+ kfree_skb_partial(skb, fragstolen);
+ else if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
return;
}
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists