netdev - [PATCH] net: page_pool: fix refcounting issues with fragmented allocation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230124124300.94886-1-nbd@nbd.name>
Date:   Tue, 24 Jan 2023 13:43:00 +0100
From:   Felix Fietkau <nbd@....name>
To:     netdev@...r.kernel.org, Jesper Dangaard Brouer <hawk@...nel.org>,
        Ilias Apalodimas <ilias.apalodimas@...aro.org>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>
Cc:     Lorenzo Bianconi <lorenzo@...nel.org>, linux-kernel@...r.kernel.org
Subject: [PATCH] net: page_pool: fix refcounting issues with fragmented allocation

While testing fragmented page_pool allocation in the mt76 driver, I was able
to reliably trigger page refcount underflow issues, which did not occur with
full-page page_pool allocation.
It appears to me, that handling refcounting in two separate counters
(page->pp_frag_count and page refcount) is racy when page refcount gets
incremented by code dealing with skb fragments directly, and
page_pool_return_skb_page is called multiple times for the same fragment.

Dropping page->pp_frag_count and relying entirely on the page refcount makes
these underflow issues and crashes go away.

Cc: Lorenzo Bianconi <lorenzo@...nel.org>
Signed-off-by: Felix Fietkau <nbd@....name>
---
 include/linux/mm_types.h | 17 +++++------------
 include/net/page_pool.h  | 19 ++++---------------
 net/core/page_pool.c     | 12 ++++--------
 3 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9757067c3053..96ec3b19a86d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -125,18 +125,11 @@ struct page {
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr;
-			union {
-				/**
-				 * dma_addr_upper: might require a 64-bit
-				 * value on 32-bit architectures.
-				 */
-				unsigned long dma_addr_upper;
-				/**
-				 * For frag page support, not supported in
-				 * 32-bit architectures with 64-bit DMA.
-				 */
-				atomic_long_t pp_frag_count;
-			};
+			/**
+			 * dma_addr_upper: might require a 64-bit
+			 * value on 32-bit architectures.
+			 */
+			unsigned long dma_addr_upper;
 		};
 		struct {	/* Tail pages of compound page */
 			unsigned long compound_head;	/* Bit zero is set */
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 813c93499f20..28e1fdbdcd53 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -279,14 +279,14 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
 
 static inline void page_pool_fragment_page(struct page *page, long nr)
 {
-	atomic_long_set(&page->pp_frag_count, nr);
+	page_ref_add(page, nr);
 }
 
 static inline long page_pool_defrag_page(struct page *page, long nr)
 {
 	long ret;
 
-	/* If nr == pp_frag_count then we have cleared all remaining
+	/* If nr == page_ref_count then we have cleared all remaining
 	 * references to the page. No need to actually overwrite it, instead
 	 * we can leave this to be overwritten by the calling function.
 	 *
@@ -295,22 +295,14 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
 	 * especially when dealing with a page that may be partitioned
 	 * into only 2 or 3 pieces.
 	 */
-	if (atomic_long_read(&page->pp_frag_count) == nr)
+	if (page_ref_count(page) == nr)
 		return 0;
 
-	ret = atomic_long_sub_return(nr, &page->pp_frag_count);
+	ret = page_ref_sub_return(page, nr);
 	WARN_ON(ret < 0);
 	return ret;
 }
 
-static inline bool page_pool_is_last_frag(struct page_pool *pool,
-					  struct page *page)
-{
-	/* If fragments aren't enabled or count is 0 we were the last user */
-	return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
-	       (page_pool_defrag_page(page, 1) == 0);
-}
-
 static inline void page_pool_put_page(struct page_pool *pool,
 				      struct page *page,
 				      unsigned int dma_sync_size,
@@ -320,9 +312,6 @@ static inline void page_pool_put_page(struct page_pool *pool,
 	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
 	 */
 #ifdef CONFIG_PAGE_POOL
-	if (!page_pool_is_last_frag(pool, page))
-		return;
-
 	page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
 #endif
 }
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b203d8660e4..0defcadae225 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -25,7 +25,7 @@
 #define DEFER_TIME (msecs_to_jiffies(1000))
 #define DEFER_WARN_INTERVAL (60 * HZ)
 
-#define BIAS_MAX	LONG_MAX
+#define BIAS_MAX(pool)	(PAGE_SIZE << ((pool)->p.order))
 
 #ifdef CONFIG_PAGE_POOL_STATS
 /* alloc_stat_inc is intended to be used in softirq context */
@@ -619,10 +619,6 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 	for (i = 0; i < count; i++) {
 		struct page *page = virt_to_head_page(data[i]);
 
-		/* It is not the last user for the page frag case */
-		if (!page_pool_is_last_frag(pool, page))
-			continue;
-
 		page = __page_pool_put_page(pool, page, -1, false);
 		/* Approved for bulk recycling in ptr_ring cache */
 		if (page)
@@ -659,7 +655,7 @@ EXPORT_SYMBOL(page_pool_put_page_bulk);
 static struct page *page_pool_drain_frag(struct page_pool *pool,
 					 struct page *page)
 {
-	long drain_count = BIAS_MAX - pool->frag_users;
+	long drain_count = BIAS_MAX(pool) - pool->frag_users;
 
 	/* Some user is still using the page frag */
 	if (likely(page_pool_defrag_page(page, drain_count)))
@@ -678,7 +674,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
 
 static void page_pool_free_frag(struct page_pool *pool)
 {
-	long drain_count = BIAS_MAX - pool->frag_users;
+	long drain_count = BIAS_MAX(pool) - pool->frag_users;
 	struct page *page = pool->frag_page;
 
 	pool->frag_page = NULL;
@@ -724,7 +720,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
 		pool->frag_users = 1;
 		*offset = 0;
 		pool->frag_offset = size;
-		page_pool_fragment_page(page, BIAS_MAX);
+		page_pool_fragment_page(page, BIAS_MAX(pool));
 		return page;
 	}
 
-- 
2.39.0