lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231113130041.58124-3-linyunsheng@huawei.com>
Date:   Mon, 13 Nov 2023 21:00:34 +0800
From:   Yunsheng Lin <linyunsheng@...wei.com>
To:     <davem@...emloft.net>, <kuba@...nel.org>, <pabeni@...hat.com>
CC:     <netdev@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        Mina Almasry <almasrymina@...gle.com>,
        Yunsheng Lin <linyunsheng@...wei.com>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        Ilias Apalodimas <ilias.apalodimas@...aro.org>,
        Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH RFC 2/8] net: page_pool: create hooks for custom page providers

From: Jakub Kicinski <kuba@...nel.org>

The page providers which try to reuse the same pages will
need to hold onto the ref, even if page gets released from
the pool - as in releasing the page from the pp just transfers
the "ownership" reference from pp to the provider, and provider
will wait for other references to be gone before feeding this
page back into the pool.

Signed-off-by: Jakub Kicinski <kuba@...nel.org>
Signed-off-by: Mina Almasry <almasrymina@...gle.com>
Signed-off-by: Yunsheng Lin <linyunsheng@...wei.com>
---
 include/net/page_pool/types.h | 19 ++++++++
 net/core/page_pool.c          | 81 +++++++++++++++++++++++++++++------
 2 files changed, 87 insertions(+), 13 deletions(-)

diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 6fc5134095ed..5e4fcd45ba50 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -60,6 +60,8 @@ struct page_pool_params {
 	int		nid;
 	struct device	*dev;
 	struct napi_struct *napi;
+	u8		memory_provider;
+	void            *mp_priv;
 	enum dma_data_direction dma_dir;
 	unsigned int	max_len;
 	unsigned int	offset;
@@ -118,6 +120,20 @@ struct page_pool_stats {
 };
 #endif
 
+struct mem_provider;
+
+enum pp_memory_provider_type {
+	__PP_MP_NONE, /* Use system allocator directly */
+};
+
+struct pp_memory_provider_ops {
+	int (*init)(struct page_pool *pool);
+	void (*destroy)(struct page_pool *pool);
+	struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp);
+	void (*release_page)(struct page_pool *pool, struct page *page);
+	void (*free_pages)(struct page_pool *pool, struct page *page);
+};
+
 struct page_pool {
 	struct page_pool_params p;
 
@@ -165,6 +181,9 @@ struct page_pool {
 	 */
 	struct ptr_ring ring;
 
+	const struct pp_memory_provider_ops *mp_ops;
+	void *mp_priv;
+
 #ifdef CONFIG_PAGE_POOL_STATS
 	/* recycle stats are per-cpu to avoid locking */
 	struct page_pool_recycle_stats __percpu *recycle_stats;
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 578b6f2eeb46..6c502bea842b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -23,6 +23,8 @@
 
 #include <trace/events/page_pool.h>
 
+static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
+
 #define DEFER_TIME (msecs_to_jiffies(1000))
 #define DEFER_WARN_INTERVAL (60 * HZ)
 
@@ -172,6 +174,7 @@ static int page_pool_init(struct page_pool *pool,
 			  const struct page_pool_params *params)
 {
 	unsigned int ring_qsize = 1024; /* Default */
+	int err;
 
 	memcpy(&pool->p, params, sizeof(pool->p));
 
@@ -225,10 +228,34 @@ static int page_pool_init(struct page_pool *pool,
 	/* Driver calling page_pool_create() also call page_pool_destroy() */
 	refcount_set(&pool->user_cnt, 1);
 
+	switch (pool->p.memory_provider) {
+	case __PP_MP_NONE:
+		break;
+	default:
+		err = -EINVAL;
+		goto free_ptr_ring;
+	}
+
+	pool->mp_priv = pool->p.mp_priv;
+	if (pool->mp_ops) {
+		err = pool->mp_ops->init(pool);
+		if (err) {
+			pr_warn("%s() mem-provider init failed %d\n",
+				__func__, err);
+			goto free_ptr_ring;
+		}
+
+		static_branch_inc(&page_pool_mem_providers);
+	}
+
 	if (pool->p.flags & PP_FLAG_DMA_MAP)
 		get_device(pool->p.dev);
 
 	return 0;
+
+free_ptr_ring:
+	ptr_ring_cleanup(&pool->ring, NULL);
+	return err;
 }
 
 /**
@@ -257,18 +284,13 @@ EXPORT_SYMBOL(page_pool_create);
 
 static void page_pool_return_page(struct page_pool *pool, struct page *page);
 
-noinline
-static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
+static bool page_pool_page_in_pref_node(struct page_pool *pool, struct page *page)
 {
-	struct ptr_ring *r = &pool->ring;
-	struct page *page;
 	int pref_nid; /* preferred NUMA node */
 
-	/* Quicker fallback, avoid locks when ring is empty */
-	if (__ptr_ring_empty(r)) {
-		alloc_stat_inc(pool, empty);
-		return NULL;
-	}
+	/* Always assume page is in pref node for mem providers */
+	if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+		return true;
 
 	/* Softirq guarantee CPU and thus NUMA node is stable. This,
 	 * assumes CPU refilling driver RX-ring will also run RX-NAPI.
@@ -280,13 +302,31 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
 	pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
 #endif
 
+	if (page_to_nid(page) == pref_nid)
+		return true;
+
+	return false;
+}
+
+noinline
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
+{
+	struct ptr_ring *r = &pool->ring;
+	struct page *page;
+
+	/* Quicker fallback, avoid locks when ring is empty */
+	if (__ptr_ring_empty(r)) {
+		alloc_stat_inc(pool, empty);
+		return NULL;
+	}
+
 	/* Refill alloc array, but only if NUMA match */
 	do {
 		page = __ptr_ring_consume(r);
 		if (unlikely(!page))
 			break;
 
-		if (likely(page_to_nid(page) == pref_nid)) {
+		if (likely(page_pool_page_in_pref_node(pool, page))) {
 			pool->alloc.cache[pool->alloc.count++] = page;
 		} else {
 			/* NUMA mismatch;
@@ -490,7 +530,10 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
 		return page;
 
 	/* Slow-path: cache empty, do real allocation */
-	page = __page_pool_alloc_pages_slow(pool, gfp);
+	if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+		page = pool->mp_ops->alloc_pages(pool, gfp);
+	else
+		page = __page_pool_alloc_pages_slow(pool, gfp);
 	return page;
 }
 EXPORT_SYMBOL(page_pool_alloc_pages);
@@ -543,7 +586,10 @@ void page_pool_return_page(struct page_pool *pool, struct page *page)
 {
 	int count;
 
-	__page_pool_release_page_dma(pool, page);
+	if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+		pool->mp_ops->release_page(pool, page);
+	else
+		__page_pool_release_page_dma(pool, page);
 
 	page_pool_clear_pp_info(page);
 
@@ -553,7 +599,11 @@ void page_pool_return_page(struct page_pool *pool, struct page *page)
 	count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
 	trace_page_pool_state_release(pool, page, count);
 
-	put_page(page);
+	if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
+		pool->mp_ops->free_pages(pool, page);
+	else
+		put_page(page);
+
 	/* An optimization would be to call __free_pages(page, pool->p.order)
 	 * knowing page is not part of page-cache (thus avoiding a
 	 * __page_cache_release() call).
@@ -821,6 +871,11 @@ static void __page_pool_destroy(struct page_pool *pool)
 	if (pool->disconnect)
 		pool->disconnect(pool);
 
+	if (pool->mp_ops) {
+		pool->mp_ops->destroy(pool);
+		static_branch_dec(&page_pool_mem_providers);
+	}
+
 	ptr_ring_cleanup(&pool->ring, NULL);
 
 	if (pool->p.flags & PP_FLAG_DMA_MAP)
-- 
2.33.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ