lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 30 Aug 2021 09:18:10 +0800
From:   Yunsheng Lin <linyunsheng@...wei.com>
To:     <davem@...emloft.net>, <kuba@...nel.org>
CC:     <netdev@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <linuxarm@...neuler.org>, <hawk@...nel.org>,
        <ilias.apalodimas@...aro.org>, <jonathan.lemon@...il.com>,
        <alobakin@...me>, <willemb@...gle.com>, <cong.wang@...edance.com>,
        <pabeni@...hat.com>, <haokexin@...il.com>, <nogikh@...gle.com>,
        <elver@...gle.com>, <memxor@...il.com>, <edumazet@...gle.com>,
        <alexander.duyck@...il.com>, <dsahern@...il.com>
Subject: [PATCH net-next 2/2] skbuff: keep track of pp page when __skb_frag_ref() is called

As the skb->pp_recycle and page->pp_magic may not be enough
to track if a frag page is from page pool after the calling
of __skb_frag_ref(), mostly because of a data race, see:
commit 2cc3aeb5eccc ("skbuff: Fix a potential race while
recycling page_pool packets").

There may be clone and expand head case that might lose the
track if a frag page is from page pool or not.

So increment the frag count when __skb_frag_ref() is called,
and only use page->pp_magic to indicate if a frag page is from
page pool, to avoid the above data race.

For 32 bit systems with 64 bit dma, we preserve the orginial
behavior as frag count is used to trace how many time does a
frag page is called with __skb_frag_ref().

Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
---
 include/linux/skbuff.h  | 13 ++++++++++++-
 include/net/page_pool.h | 17 +++++++++++++++++
 net/core/page_pool.c    | 12 ++----------
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db..8311482 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3073,6 +3073,16 @@ static inline struct page *skb_frag_page(const skb_frag_t *frag)
  */
 static inline void __skb_frag_ref(skb_frag_t *frag)
 {
+	struct page *page = skb_frag_page(frag);
+
+#ifdef CONFIG_PAGE_POOL
+	if (!PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+	    page_pool_is_pp_page(page)) {
+		page_pool_atomic_inc_frag_count(page);
+		return;
+	}
+#endif
+
 	get_page(skb_frag_page(frag));
 }
 
@@ -3101,7 +3111,8 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 	struct page *page = skb_frag_page(frag);
 
 #ifdef CONFIG_PAGE_POOL
-	if (recycle && page_pool_return_skb_page(page))
+	if ((!PAGE_POOL_DMA_USE_PP_FRAG_COUNT || recycle) &&
+	    page_pool_return_skb_page(page))
 		return;
 #endif
 	put_page(page);
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 2ad0706..8b43e3d9 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -244,6 +244,23 @@ static inline void page_pool_set_frag_count(struct page *page, long nr)
 	atomic_long_set(&page->pp_frag_count, nr);
 }
 
+static inline void page_pool_atomic_inc_frag_count(struct page *page)
+{
+	atomic_long_inc(&page->pp_frag_count);
+}
+
+static inline bool page_pool_is_pp_page(struct page *page)
+{
+	/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
+	 * in order to preserve any existing bits, such as bit 0 for the
+	 * head page of compound page and bit 1 for pfmemalloc page, so
+	 * mask those bits for freeing side when doing below checking,
+	 * and page_is_pfmemalloc() is checked in __page_pool_put_page()
+	 * to avoid recycling the pfmemalloc page.
+	 */
+	return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+}
+
 static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
 							  long nr)
 {
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index ba9f14d..442d37b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -24,7 +24,7 @@
 #define DEFER_TIME (msecs_to_jiffies(1000))
 #define DEFER_WARN_INTERVAL (60 * HZ)
 
-#define BIAS_MAX	LONG_MAX
+#define BIAS_MAX	(LONG_MAX / 2)
 
 static int page_pool_init(struct page_pool *pool,
 			  const struct page_pool_params *params)
@@ -741,15 +741,7 @@ bool page_pool_return_skb_page(struct page *page)
 	struct page_pool *pp;
 
 	page = compound_head(page);
-
-	/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
-	 * in order to preserve any existing bits, such as bit 0 for the
-	 * head page of compound page and bit 1 for pfmemalloc page, so
-	 * mask those bits for freeing side when doing below checking,
-	 * and page_is_pfmemalloc() is checked in __page_pool_put_page()
-	 * to avoid recycling the pfmemalloc page.
-	 */
-	if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+	if (!page_pool_is_pp_page(page))
 		return false;
 
 	pp = page->pp;
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ