[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1629257542-36145-3-git-send-email-linyunsheng@huawei.com>
Date: Wed, 18 Aug 2021 11:32:18 +0800
From: Yunsheng Lin <linyunsheng@...wei.com>
To: <davem@...emloft.net>, <kuba@...nel.org>
CC: <alexander.duyck@...il.com>, <linux@...linux.org.uk>,
<mw@...ihalf.com>, <linuxarm@...neuler.org>,
<yisen.zhuang@...wei.com>, <salil.mehta@...wei.com>,
<thomas.petazzoni@...tlin.com>, <hawk@...nel.org>,
<ilias.apalodimas@...aro.org>, <ast@...nel.org>,
<daniel@...earbox.net>, <john.fastabend@...il.com>,
<akpm@...ux-foundation.org>, <peterz@...radead.org>,
<will@...nel.org>, <willy@...radead.org>, <vbabka@...e.cz>,
<fenghua.yu@...el.com>, <guro@...com>, <peterx@...hat.com>,
<feng.tang@...el.com>, <jgg@...pe.ca>, <mcroce@...rosoft.com>,
<hughd@...gle.com>, <jonathan.lemon@...il.com>, <alobakin@...me>,
<willemb@...gle.com>, <wenxu@...oud.cn>, <cong.wang@...edance.com>,
<haokexin@...il.com>, <nogikh@...gle.com>, <elver@...gle.com>,
<yhs@...com>, <kpsingh@...nel.org>, <andrii@...nel.org>,
<kafai@...com>, <songliubraving@...com>, <netdev@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <bpf@...r.kernel.org>,
<chenhao288@...ilicon.com>, <edumazet@...gle.com>,
<yoshfuji@...ux-ipv6.org>, <dsahern@...nel.org>,
<memxor@...il.com>, <linux@...pel-privat.de>, <atenart@...nel.org>,
<weiwan@...gle.com>, <ap420073@...il.com>, <arnd@...db.de>,
<mathew.j.martineau@...ux.intel.com>, <aahringo@...hat.com>,
<ceggers@...i.de>, <yangbo.lu@....com>, <fw@...len.de>,
<xiangxia.m.yue@...il.com>, <linmiaohe@...wei.com>
Subject: [PATCH RFC 2/7] skbuff: add interface to manipulate frag count for tx recycling
As the skb->pp_recycle and page->pp_magic may not be enough
to track if a frag page is from page pool after the calling
of __skb_frag_ref(), mostly because of a data race, see:
commit 2cc3aeb5eccc ("skbuff: Fix a potential race while
recycling page_pool packets").
As the case of tcp, there may be fragmenting, coalescing or
retransmiting case that might lose the track if a frag page
is from page pool or not.
So increment the frag count when __skb_frag_ref() is called,
and use the bit 0 in frag->bv_page to indicate if a page is
from a page pool, which automically pass down to another
frag->bv_page when doing a '*new_frag = *frag' or memcpying
the shinfo.
It seems we could do the trick for rx too if it makes sense.
Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
---
include/linux/skbuff.h | 43 ++++++++++++++++++++++++++++++++++++++++---
include/net/page_pool.h | 5 +++++
2 files changed, 45 insertions(+), 3 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db..2878d26 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -331,6 +331,11 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
return frag->bv_len;
}
+static inline bool skb_frag_is_pp(const skb_frag_t *frag)
+{
+ return (unsigned long)frag->bv_page & 1UL;
+}
+
/**
* skb_frag_size_set() - Sets the size of a skb fragment
* @frag: skb fragment
@@ -2190,6 +2195,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
skb->pfmemalloc = true;
}
+static inline void __skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off,
+ int size)
+{
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ frag->bv_page = (struct page *)((unsigned long)page | 0x1UL);
+ frag->bv_offset = off;
+ skb_frag_size_set(frag, size);
+
+ page = compound_head(page);
+ if (page_is_pfmemalloc(page))
+ skb->pfmemalloc = true;
+}
+
/**
* skb_fill_page_desc - initialise a paged fragment in an skb
* @skb: buffer containing fragment to be initialised
@@ -2211,6 +2231,14 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
skb_shinfo(skb)->nr_frags = i + 1;
}
+static inline void skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+ struct page *page, int off,
+ int size)
+{
+ __skb_fill_pp_page_desc(skb, i, page, off, size);
+ skb_shinfo(skb)->nr_frags = i + 1;
+}
+
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
int size, unsigned int truesize);
@@ -3062,7 +3090,10 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
*/
static inline struct page *skb_frag_page(const skb_frag_t *frag)
{
- return frag->bv_page;
+ unsigned long page = (unsigned long)frag->bv_page;
+
+ page &= ~1UL;
+ return (struct page *)page;
}
/**
@@ -3073,7 +3104,12 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
*/
static inline void __skb_frag_ref(skb_frag_t *frag)
{
- get_page(skb_frag_page(frag));
+ struct page *page = skb_frag_page(frag);
+
+ if (skb_frag_is_pp(frag))
+ page_pool_atomic_inc_frag_count(page);
+ else
+ get_page(page);
}
/**
@@ -3101,7 +3137,8 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
struct page *page = skb_frag_page(frag);
#ifdef CONFIG_PAGE_POOL
- if (recycle && page_pool_return_skb_page(page))
+ if ((recycle || skb_frag_is_pp(frag)) &&
+ page_pool_return_skb_page(page))
return;
#endif
put_page(page);
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 8d4ae4b..86babb2 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -270,6 +270,11 @@ static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
return ret;
}
+static void page_pool_atomic_inc_frag_count(struct page *page)
+{
+ atomic_long_inc(&page->pp_frag_count);
+}
+
static inline bool is_page_pool_compiled_in(void)
{
#ifdef CONFIG_PAGE_POOL
--
2.7.4
Powered by blists - more mailing lists