lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1629257542-36145-3-git-send-email-linyunsheng@huawei.com>
Date:   Wed, 18 Aug 2021 11:32:18 +0800
From:   Yunsheng Lin <linyunsheng@...wei.com>
To:     <davem@...emloft.net>, <kuba@...nel.org>
CC:     <alexander.duyck@...il.com>, <linux@...linux.org.uk>,
        <mw@...ihalf.com>, <linuxarm@...neuler.org>,
        <yisen.zhuang@...wei.com>, <salil.mehta@...wei.com>,
        <thomas.petazzoni@...tlin.com>, <hawk@...nel.org>,
        <ilias.apalodimas@...aro.org>, <ast@...nel.org>,
        <daniel@...earbox.net>, <john.fastabend@...il.com>,
        <akpm@...ux-foundation.org>, <peterz@...radead.org>,
        <will@...nel.org>, <willy@...radead.org>, <vbabka@...e.cz>,
        <fenghua.yu@...el.com>, <guro@...com>, <peterx@...hat.com>,
        <feng.tang@...el.com>, <jgg@...pe.ca>, <mcroce@...rosoft.com>,
        <hughd@...gle.com>, <jonathan.lemon@...il.com>, <alobakin@...me>,
        <willemb@...gle.com>, <wenxu@...oud.cn>, <cong.wang@...edance.com>,
        <haokexin@...il.com>, <nogikh@...gle.com>, <elver@...gle.com>,
        <yhs@...com>, <kpsingh@...nel.org>, <andrii@...nel.org>,
        <kafai@...com>, <songliubraving@...com>, <netdev@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>, <bpf@...r.kernel.org>,
        <chenhao288@...ilicon.com>, <edumazet@...gle.com>,
        <yoshfuji@...ux-ipv6.org>, <dsahern@...nel.org>,
        <memxor@...il.com>, <linux@...pel-privat.de>, <atenart@...nel.org>,
        <weiwan@...gle.com>, <ap420073@...il.com>, <arnd@...db.de>,
        <mathew.j.martineau@...ux.intel.com>, <aahringo@...hat.com>,
        <ceggers@...i.de>, <yangbo.lu@....com>, <fw@...len.de>,
        <xiangxia.m.yue@...il.com>, <linmiaohe@...wei.com>
Subject: [PATCH RFC 2/7] skbuff: add interface to manipulate frag count for tx recycling

As the skb->pp_recycle and page->pp_magic may not be enough
to track if a frag page is from page pool after the calling
of __skb_frag_ref(), mostly because of a data race, see:
commit 2cc3aeb5eccc ("skbuff: Fix a potential race while
recycling page_pool packets").

As the case of tcp, there may be fragmenting, coalescing or
retransmiting case that might lose the track if a frag page
is from page pool or not.

So increment the frag count when __skb_frag_ref() is called,
and use the bit 0 in frag->bv_page to indicate if a page is
from a page pool, which automically pass down to another
frag->bv_page when doing a '*new_frag = *frag' or memcpying
the shinfo.

It seems we could do the trick for rx too if it makes sense.

Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
---
 include/linux/skbuff.h  | 43 ++++++++++++++++++++++++++++++++++++++++---
 include/net/page_pool.h |  5 +++++
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db..2878d26 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -331,6 +331,11 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
 	return frag->bv_len;
 }
 
+static inline bool skb_frag_is_pp(const skb_frag_t *frag)
+{
+	return (unsigned long)frag->bv_page & 1UL;
+}
+
 /**
  * skb_frag_size_set() - Sets the size of a skb fragment
  * @frag: skb fragment
@@ -2190,6 +2195,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
 		skb->pfmemalloc	= true;
 }
 
+static inline void __skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+					   struct page *page, int off,
+					   int size)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+	frag->bv_page = (struct page *)((unsigned long)page | 0x1UL);
+	frag->bv_offset = off;
+	skb_frag_size_set(frag, size);
+
+	page = compound_head(page);
+	if (page_is_pfmemalloc(page))
+		skb->pfmemalloc = true;
+}
+
 /**
  * skb_fill_page_desc - initialise a paged fragment in an skb
  * @skb: buffer containing fragment to be initialised
@@ -2211,6 +2231,14 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
 	skb_shinfo(skb)->nr_frags = i + 1;
 }
 
+static inline void skb_fill_pp_page_desc(struct sk_buff *skb, int i,
+					 struct page *page, int off,
+					 int size)
+{
+	__skb_fill_pp_page_desc(skb, i, page, off, size);
+	skb_shinfo(skb)->nr_frags = i + 1;
+}
+
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize);
 
@@ -3062,7 +3090,10 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
  */
 static inline struct page *skb_frag_page(const skb_frag_t *frag)
 {
-	return frag->bv_page;
+	unsigned long page = (unsigned long)frag->bv_page;
+
+	page &= ~1UL;
+	return (struct page *)page;
 }
 
 /**
@@ -3073,7 +3104,12 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
  */
 static inline void __skb_frag_ref(skb_frag_t *frag)
 {
-	get_page(skb_frag_page(frag));
+	struct page *page = skb_frag_page(frag);
+
+	if (skb_frag_is_pp(frag))
+		page_pool_atomic_inc_frag_count(page);
+	else
+		get_page(page);
 }
 
 /**
@@ -3101,7 +3137,8 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 	struct page *page = skb_frag_page(frag);
 
 #ifdef CONFIG_PAGE_POOL
-	if (recycle && page_pool_return_skb_page(page))
+	if ((recycle || skb_frag_is_pp(frag)) &&
+	    page_pool_return_skb_page(page))
 		return;
 #endif
 	put_page(page);
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 8d4ae4b..86babb2 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -270,6 +270,11 @@ static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
 	return ret;
 }
 
+static void page_pool_atomic_inc_frag_count(struct page *page)
+{
+	atomic_long_inc(&page->pp_frag_count);
+}
+
 static inline bool is_page_pool_compiled_in(void)
 {
 #ifdef CONFIG_PAGE_POOL
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ