lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250905150641.2078838-4-xukuohai@huaweicloud.com>
Date: Fri,  5 Sep 2025 23:06:41 +0800
From: Xu Kuohai <xukuohai@...weicloud.com>
To: bpf@...r.kernel.org,
	linux-kselftest@...r.kernel.org,
	linux-kernel@...r.kernel.org
Cc: Alexei Starovoitov <ast@...nel.org>,
	Daniel Borkmann <daniel@...earbox.net>,
	Andrii Nakryiko <andrii@...nel.org>,
	Martin KaFai Lau <martin.lau@...ux.dev>,
	Eduard Zingerman <eddyz87@...il.com>,
	Yonghong Song <yhs@...com>,
	Song Liu <song@...nel.org>,
	John Fastabend <john.fastabend@...il.com>,
	KP Singh <kpsingh@...nel.org>,
	Stanislav Fomichev <sdf@...gle.com>,
	Hao Luo <haoluo@...gle.com>,
	Jiri Olsa <jolsa@...nel.org>,
	Mykola Lysenko <mykolal@...com>,
	Shuah Khan <shuah@...nel.org>,
	Stanislav Fomichev <sdf@...ichev.me>,
	Willem de Bruijn <willemb@...gle.com>,
	Jason Xing <kerneljasonxing@...il.com>,
	Paul Chaignon <paul.chaignon@...il.com>,
	Tao Chen <chen.dylane@...ux.dev>,
	Kumar Kartikeya Dwivedi <memxor@...il.com>,
	Martin Kelly <martin.kelly@...wdstrike.com>
Subject: [PATCH bpf-next v2 3/3] selftests/bpf/benchs: Add producer and overwrite bench for ring buffer

From: Xu Kuohai <xukuohai@...wei.com>

Add rb-prod test for bpf ring buffer to bench producer performance
without counsumer thread. And add --rb-overwrite option to bench
ring buffer in overwrite mode.

For reference, below are bench numbers collected from x86_64 and
arm64 CPUs.

- AMD EPYC 9654 (x86_64)

  Ringbuf, overwrite mode with multi-producer contention, no consumer
  ===================================================================
  rb-prod nr_prod 1    32.295 ± 0.004M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 2    9.591 ± 0.003M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 3    8.895 ± 0.002M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 4    9.206 ± 0.003M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 8    9.220 ± 0.002M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 12   4.595 ± 0.022M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 16   4.348 ± 0.016M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 20   3.957 ± 0.017M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 24   3.787 ± 0.014M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 28   3.603 ± 0.011M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 32   3.707 ± 0.011M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 36   3.562 ± 0.012M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 40   3.616 ± 0.012M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 44   3.598 ± 0.016M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 48   3.555 ± 0.014M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 52   3.463 ± 0.020M/s (drops 0.000 ± 0.000M/s)

- HiSilicon Kunpeng 920 (arm64)

  Ringbuf, overwrite mode with multi-producer contention, no consumer
  ===================================================================
  rb-prod nr_prod 1    14.687 ± 0.058M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 2    22.263 ± 0.007M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 3    5.736 ± 0.003M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 4    4.934 ± 0.001M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 8    4.661 ± 0.001M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 12   3.753 ± 0.013M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 16   3.706 ± 0.018M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 20   3.660 ± 0.015M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 24   3.610 ± 0.016M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 28   3.238 ± 0.010M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 32   3.270 ± 0.018M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 36   2.892 ± 0.021M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 40   2.995 ± 0.018M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 44   2.830 ± 0.019M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 48   2.877 ± 0.015M/s (drops 0.000 ± 0.000M/s)
  rb-prod nr_prod 52   2.814 ± 0.015M/s (drops 0.000 ± 0.000M/s)

Signed-off-by: Xu Kuohai <xukuohai@...wei.com>
---
 tools/testing/selftests/bpf/bench.c           |  2 +
 .../selftests/bpf/benchs/bench_ringbufs.c     | 95 +++++++++++++++++--
 .../bpf/benchs/run_bench_ringbufs.sh          |  4 +
 .../selftests/bpf/progs/ringbuf_bench.c       | 10 ++
 4 files changed, 103 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index bd29bb2e6cb5..a98063f6436a 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -541,6 +541,7 @@ extern const struct bench bench_trig_uretprobe_multi_nop5;
 
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
+extern const struct bench bench_rb_prod;
 extern const struct bench bench_pb_libbpf;
 extern const struct bench bench_pb_custom;
 extern const struct bench bench_bloom_lookup;
@@ -617,6 +618,7 @@ static const struct bench *benchs[] = {
 	/* ringbuf/perfbuf benchmarks */
 	&bench_rb_libbpf,
 	&bench_rb_custom,
+	&bench_rb_prod,
 	&bench_pb_libbpf,
 	&bench_pb_custom,
 	&bench_bloom_lookup,
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index e1ee979e6acc..6d58479fac91 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,7 @@ static struct {
 	int ringbuf_sz; /* per-ringbuf, in bytes */
 	bool ringbuf_use_output; /* use slower output API */
 	int perfbuf_sz; /* per-CPU size, in pages */
+	bool overwrite;
 } args = {
 	.back2back = false,
 	.batch_cnt = 500,
@@ -27,6 +28,7 @@ static struct {
 	.ringbuf_sz = 512 * 1024,
 	.ringbuf_use_output = false,
 	.perfbuf_sz = 128,
+	.overwrite = false,
 };
 
 enum {
@@ -35,6 +37,7 @@ enum {
 	ARG_RB_BATCH_CNT = 2002,
 	ARG_RB_SAMPLED = 2003,
 	ARG_RB_SAMPLE_RATE = 2004,
+	ARG_RB_OVERWRITE = 2005,
 };
 
 static const struct argp_option opts[] = {
@@ -43,6 +46,7 @@ static const struct argp_option opts[] = {
 	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
 	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
 	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+	{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
 	{},
 };
 
@@ -72,6 +76,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			argp_usage(state);
 		}
 		break;
+	case ARG_RB_OVERWRITE:
+		args.overwrite = true;
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
@@ -95,8 +102,30 @@ static inline void bufs_trigger_batch(void)
 
 static void bufs_validate(void)
 {
-	if (env.consumer_cnt != 1) {
-		fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
+	bool bench_prod = !strcmp(env.bench_name, "rb-prod");
+
+	if (args.overwrite && !bench_prod) {
+		fprintf(stderr, "overwite mode only works with benchmakr rb-prod!\n");
+		exit(1);
+	}
+
+	if (bench_prod && env.consumer_cnt != 0) {
+		fprintf(stderr, "rb-prod benchmark does not need consumer!\n");
+		exit(1);
+	}
+
+	if (bench_prod && args.back2back) {
+		fprintf(stderr, "back-to-back mode makes no sense for rb-prod!\n");
+		exit(1);
+	}
+
+	if (bench_prod && args.sampled) {
+		fprintf(stderr, "sampling mode makes no sense for rb-prod!\n");
+		exit(1);
+	}
+
+	if (!bench_prod && env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmarks excluding rb-prod need one consumer!\n");
 		exit(1);
 	}
 
@@ -132,8 +161,10 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
 }
 
-static struct ringbuf_bench *ringbuf_setup_skeleton(void)
+static struct ringbuf_bench *ringbuf_setup_skeleton(int bench_prod)
 {
+	__u32 flags;
+	struct bpf_map *ringbuf;
 	struct ringbuf_bench *skel;
 
 	setup_libbpf();
@@ -146,12 +177,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 
 	skel->rodata->batch_cnt = args.batch_cnt;
 	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+	skel->rodata->bench_prod = bench_prod;
 
 	if (args.sampled)
 		/* record data + header take 16 bytes */
 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
 
-	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
+	ringbuf = skel->maps.ringbuf;
+	if (args.overwrite) {
+		flags = bpf_map__map_flags(ringbuf) | BPF_F_OVERWRITE;
+		bpf_map__set_map_flags(ringbuf, flags);
+	}
+
+	bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
 
 	if (ringbuf_bench__load(skel)) {
 		fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +209,13 @@ static void ringbuf_libbpf_setup(void)
 {
 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
 	struct bpf_link *link;
+	int map_fd;
 
-	ctx->skel = ringbuf_setup_skeleton();
-	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
-					buf_process_sample, NULL, NULL);
+	ctx->skel = ringbuf_setup_skeleton(0);
+
+	map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+	ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample,
+					NULL, NULL);
 	if (!ctx->ringbuf) {
 		fprintf(stderr, "failed to create ringbuf\n");
 		exit(1);
@@ -232,7 +273,7 @@ static void ringbuf_custom_setup(void)
 	void *tmp;
 	int err;
 
-	ctx->skel = ringbuf_setup_skeleton();
+	ctx->skel = ringbuf_setup_skeleton(0);
 
 	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
 	if (ctx->epoll_fd < 0) {
@@ -277,6 +318,33 @@ static void ringbuf_custom_setup(void)
 	}
 }
 
+/* RINGBUF-PRODUCER benchmark */
+static struct ringbuf_prod_ctx {
+	struct ringbuf_bench *skel;
+} ringbuf_prod_ctx;
+
+static void ringbuf_prod_measure(struct bench_res *res)
+{
+	struct ringbuf_prod_ctx *ctx = &ringbuf_prod_ctx;
+
+	res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_prod_setup(void)
+{
+	struct ringbuf_prod_ctx *ctx = &ringbuf_prod_ctx;
+	struct bpf_link *link;
+
+	ctx->skel = ringbuf_setup_skeleton(1);
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
 #define RINGBUF_BUSY_BIT (1 << 31)
 #define RINGBUF_DISCARD_BIT (1 << 30)
 #define RINGBUF_META_LEN 8
@@ -540,6 +608,17 @@ const struct bench bench_rb_custom = {
 	.report_final = hits_drops_report_final,
 };
 
+const struct bench bench_rb_prod = {
+	.name = "rb-prod",
+	.argp = &bench_ringbufs_argp,
+	.validate = bufs_validate,
+	.setup = ringbuf_prod_setup,
+	.producer_thread = bufs_sample_producer,
+	.measure = ringbuf_prod_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
 const struct bench bench_pb_libbpf = {
 	.name = "pb-libbpf",
 	.argp = &bench_ringbufs_argp,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 91e3567962ff..84ae66beb0ec 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
 	summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
 done
 
+header "Ringbuf, overwrite mode with multi-producer contention, no consumer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+	summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite rb-prod)"
+done
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 6a468496f539..c55282ba4038 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -14,9 +14,11 @@ struct {
 
 const volatile int batch_cnt = 0;
 const volatile long use_output = 0;
+const volatile long bench_prod = 0;
 
 long sample_val = 42;
 long dropped __attribute__((aligned(128))) = 0;
+long hits __attribute__((aligned(128))) = 0;
 
 const volatile long wakeup_data_size = 0;
 
@@ -24,6 +26,9 @@ static __always_inline long get_flags()
 {
 	long sz;
 
+	if (bench_prod)
+		return BPF_RB_NO_WAKEUP;
+
 	if (!wakeup_data_size)
 		return 0;
 
@@ -47,6 +52,8 @@ int bench_ringbuf(void *ctx)
 				*sample = sample_val;
 				flags = get_flags();
 				bpf_ringbuf_submit(sample, flags);
+				if (bench_prod)
+					__sync_add_and_fetch(&hits, 1);
 			}
 		}
 	} else {
@@ -55,6 +62,9 @@ int bench_ringbuf(void *ctx)
 			if (bpf_ringbuf_output(&ringbuf, &sample_val,
 					       sizeof(sample_val), flags))
 				__sync_add_and_fetch(&dropped, 1);
+			else if (bench_prod)
+				__sync_add_and_fetch(&hits, 1);
+
 		}
 	}
 	return 0;
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ