lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251107102853.1082118-4-dtatulea@nvidia.com>
Date: Fri, 7 Nov 2025 12:28:45 +0200
From: Dragos Tatulea <dtatulea@...dia.com>
To: Jakub Kicinski <kuba@...nel.org>, Jesper Dangaard Brouer
	<hawk@...nel.org>, "David S. Miller" <davem@...emloft.net>, Eric Dumazet
	<edumazet@...gle.com>, Paolo Abeni <pabeni@...hat.com>, Simon Horman
	<horms@...nel.org>, Shuah Khan <shuah@...nel.org>
CC: Dragos Tatulea <dtatulea@...dia.com>, Tariq Toukan <tariqt@...dia.com>,
	<netdev@...r.kernel.org>, <linux-kselftest@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>
Subject: [RFC 1/2] page_pool: add benchmarking for napi-based recycling

The code brings back the tasklet based code in order
to be able to run in softirq context.

One additional test is added which benchmarks the
impact of page_pool_napi_local().

Signed-off-by: Dragos Tatulea <dtatulea@...dia.com>
---
 .../bench/page_pool/bench_page_pool_simple.c  | 92 ++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
index cb6468adbda4..84683c547814 100644
--- a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -9,6 +9,7 @@
 #include <linux/limits.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/netdevice.h>
 #include <net/page_pool/helpers.h>
 
 #include "time_bench.h"
@@ -16,6 +17,8 @@
 static int verbose = 1;
 #define MY_POOL_SIZE 1024
 
+DEFINE_MUTEX(wait_for_tasklet);
+
 /* Makes tests selectable. Useful for perf-record to analyze a single test.
  * Hint: Bash shells support writing binary number like: $((2#101010)
  *
@@ -31,6 +34,10 @@ enum benchmark_bit {
 	bit_run_bench_no_softirq01,
 	bit_run_bench_no_softirq02,
 	bit_run_bench_no_softirq03,
+	bit_run_bench_tasklet01,
+	bit_run_bench_tasklet02,
+	bit_run_bench_tasklet03,
+	bit_run_bench_tasklet04,
 };
 
 #define bit(b)		(1 << (b))
@@ -120,7 +127,12 @@ static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
 	kfree(array);
 }
 
-enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+enum test_type {
+	type_fast_path,
+	type_napi_aware,
+	type_ptr_ring,
+	type_page_allocator,
+};
 
 /* Depends on compile optimizing this function */
 static int time_bench_page_pool(struct time_bench_record *rec, void *data,
@@ -132,6 +144,7 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
 
 	struct page_pool *pp;
 	struct page *page;
+	struct napi_struct napi = {0};
 
 	struct page_pool_params pp_params = {
 		.order = 0,
@@ -141,6 +154,7 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
 		.dev = NULL, /* Only use for DMA mapping */
 		.dma_dir = DMA_BIDIRECTIONAL,
 	};
+	struct page_pool_stats stats = {0};
 
 	pp = page_pool_create(&pp_params);
 	if (IS_ERR(pp)) {
@@ -155,6 +169,11 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
 	else
 		pr_warn("%s(): Cannot use page_pool fast-path\n", func);
 
+	if (type == type_napi_aware) {
+		napi.list_owner = smp_processor_id();
+		page_pool_enable_direct_recycling(pp, &napi);
+	}
+
 	time_bench_start(rec);
 	/** Loop to measure **/
 	for (i = 0; i < rec->loops; i++) {
@@ -173,7 +192,13 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
 			page_pool_recycle_direct(pp, page);
 
 		} else if (type == type_ptr_ring) {
-			/* Normal return path */
+			/* Normal return path, either direct or via ptr_ring */
+			page_pool_put_page(pp, page, -1, false);
+
+		} else if (type == type_napi_aware) {
+			/* NAPI-aware recycling: uses fast-path recycling if
+			 * possible.
+			 */
 			page_pool_put_page(pp, page, -1, false);
 
 		} else if (type == type_page_allocator) {
@@ -188,6 +213,14 @@ static int time_bench_page_pool(struct time_bench_record *rec, void *data,
 		}
 	}
 	time_bench_stop(rec, loops_cnt);
+
+	if (type == type_napi_aware) {
+		page_pool_get_stats(pp, &stats);
+		if (stats.recycle_stats.cached < rec->loops)
+			pr_warn("%s(): NAPI-aware recycling wasn't used\n",
+				func);
+	}
+
 out:
 	page_pool_destroy(pp);
 	return loops_cnt;
@@ -211,6 +244,54 @@ static int time_bench_page_pool03_slow(struct time_bench_record *rec,
 	return time_bench_page_pool(rec, data, type_page_allocator, __func__);
 }
 
+static int time_bench_page_pool04_napi_aware(struct time_bench_record *rec,
+					     void *data)
+{
+	return time_bench_page_pool(rec, data, type_napi_aware, __func__);
+}
+
+/* Testing page_pool requires running under softirq.
+ *
+ * Running under a tasklet satisfy this, as tasklets are built on top of
+ * softirq.
+ */
+static void pp_tasklet_handler(struct tasklet_struct *t)
+{
+	uint32_t nr_loops = loops;
+
+	if (in_serving_softirq())
+		pr_warn("%s(): in_serving_softirq fast-path\n",
+			__func__); // True
+	else
+		pr_warn("%s(): Cannot use page_pool fast-path\n", __func__);
+
+	if (enabled(bit_run_bench_tasklet01))
+		time_bench_loop(nr_loops, 0, "tasklet_page_pool01_fast_path",
+				NULL, time_bench_page_pool01_fast_path);
+
+	if (enabled(bit_run_bench_tasklet02))
+		time_bench_loop(nr_loops, 0, "tasklet_page_pool02_ptr_ring",
+				NULL, time_bench_page_pool02_ptr_ring);
+
+	if (enabled(bit_run_bench_tasklet03))
+		time_bench_loop(nr_loops, 0, "tasklet_page_pool03_slow", NULL,
+				time_bench_page_pool03_slow);
+
+	if (enabled(bit_run_bench_tasklet04))
+		time_bench_loop(nr_loops, 0, "tasklet_page_pool04_napi_aware",
+				NULL, time_bench_page_pool04_napi_aware);
+
+	mutex_unlock(&wait_for_tasklet); /* Module __init waiting on unlock */
+}
+DECLARE_TASKLET_DISABLED(pp_tasklet, pp_tasklet_handler);
+
+static void run_tasklet_tests(void)
+{
+	tasklet_enable(&pp_tasklet);
+	/* "Async" schedule tasklet, which runs on the CPU that schedule it */
+	tasklet_schedule(&pp_tasklet);
+}
+
 static int run_benchmark_tests(void)
 {
 	uint32_t nr_loops = loops;
@@ -251,12 +332,19 @@ static int __init bench_page_pool_simple_module_init(void)
 
 	run_benchmark_tests();
 
+	mutex_lock(&wait_for_tasklet);
+	run_tasklet_tests();
+	/* Sleep on mutex, waiting for tasklet to release */
+	mutex_lock(&wait_for_tasklet);
+
 	return 0;
 }
 module_init(bench_page_pool_simple_module_init);
 
 static void __exit bench_page_pool_simple_module_exit(void)
 {
+	tasklet_kill(&pp_tasklet);
+
 	if (verbose)
 		pr_info("Unloaded\n");
 }
-- 
2.50.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ