[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250804022101.2171981-5-xukuohai@huaweicloud.com>
Date: Mon, 4 Aug 2025 10:21:00 +0800
From: Xu Kuohai <xukuohai@...weicloud.com>
To: bpf@...r.kernel.org,
linux-kselftest@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Eduard Zingerman <eddyz87@...il.com>,
Yonghong Song <yhs@...com>,
Song Liu <song@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>,
Hao Luo <haoluo@...gle.com>,
Jiri Olsa <jolsa@...nel.org>,
Mykola Lysenko <mykolal@...com>,
Shuah Khan <shuah@...nel.org>,
Stanislav Fomichev <sdf@...ichev.me>,
Willem de Bruijn <willemb@...gle.com>,
Jason Xing <kerneljasonxing@...il.com>,
Paul Chaignon <paul.chaignon@...il.com>,
Tao Chen <chen.dylane@...ux.dev>,
Kumar Kartikeya Dwivedi <memxor@...il.com>,
Martin Kelly <martin.kelly@...wdstrike.com>
Subject: [PATCH bpf-next 4/4] selftests/bpf/benchs: Add overwrite mode bench for rb-libbpf
From: Xu Kuohai <xukuohai@...wei.com>
Add overwrite mode bench for ring buffer.
For reference, below are bench numbers collected from x86_64 and arm64.
- x86_64 (AMD EPYC 9654)
Ringbuf, multi-producer contention, overwrite mode
==================================================
rb-libbpf nr_prod 1 14.970 ± 0.012M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 2 14.064 ± 0.007M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 3 7.493 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 4 6.575 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 8 3.696 ± 0.011M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 12 2.612 ± 0.012M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 16 2.335 ± 0.005M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 20 2.079 ± 0.005M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 24 1.965 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 28 1.846 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 32 1.790 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 36 1.735 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 40 1.701 ± 0.002M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 44 1.669 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 48 1.749 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 52 1.709 ± 0.001M/s (drops 0.000 ± 0.000M/s)
- arm64 (HiSilicon Kunpeng 920)
Ringbuf, multi-producer contention, overwrite mode
==================================================
rb-libbpf nr_prod 1 10.319 ± 0.231M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 2 9.219 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 3 6.699 ± 0.013M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 4 4.608 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 8 3.905 ± 0.001M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 12 3.282 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 16 3.182 ± 0.008M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 20 3.029 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 24 3.116 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 28 2.869 ± 0.005M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 32 3.075 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 36 2.795 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 40 2.947 ± 0.005M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 44 2.748 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 48 2.767 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 52 2.858 ± 0.002M/s (drops 0.000 ± 0.000M/s)
Signed-off-by: Xu Kuohai <xukuohai@...wei.com>
---
.../selftests/bpf/benchs/bench_ringbufs.c | 22 ++++++++++++++++++-
.../bpf/benchs/run_bench_ringbufs.sh | 4 ++++
2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index e1ee979e6acc..6fdfc61c721b 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,7 @@ static struct {
int ringbuf_sz; /* per-ringbuf, in bytes */
bool ringbuf_use_output; /* use slower output API */
int perfbuf_sz; /* per-CPU size, in pages */
+ bool overwrite_mode;
} args = {
.back2back = false,
.batch_cnt = 500,
@@ -27,6 +28,7 @@ static struct {
.ringbuf_sz = 512 * 1024,
.ringbuf_use_output = false,
.perfbuf_sz = 128,
+ .overwrite_mode = false,
};
enum {
@@ -35,6 +37,7 @@ enum {
ARG_RB_BATCH_CNT = 2002,
ARG_RB_SAMPLED = 2003,
ARG_RB_SAMPLE_RATE = 2004,
+ ARG_RB_OVERWRITE = 2005,
};
static const struct argp_option opts[] = {
@@ -43,6 +46,7 @@ static const struct argp_option opts[] = {
{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ { "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "overwrite mode"},
{},
};
@@ -72,6 +76,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
argp_usage(state);
}
break;
+ case ARG_RB_OVERWRITE:
+ args.overwrite_mode = true;
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -104,6 +111,11 @@ static void bufs_validate(void)
fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
exit(1);
}
+
+ if (args.overwrite_mode && strcmp(env.bench_name, "rb-libbpf") != 0) {
+ fprintf(stderr, "rb-overwrite mode only supports rb-libbpf!\n");
+ exit(1);
+ }
}
static void *bufs_sample_producer(void *input)
@@ -134,6 +146,8 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
+ __u32 flags;
+ struct bpf_map *ringbuf;
struct ringbuf_bench *skel;
setup_libbpf();
@@ -151,7 +165,13 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
/* record data + header take 16 bytes */
skel->rodata->wakeup_data_size = args.sample_rate * 16;
- bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
+ ringbuf = skel->maps.ringbuf;
+ if (args.overwrite_mode) {
+ flags = bpf_map__map_flags(ringbuf) | BPF_F_OVERWRITE;
+ bpf_map__set_map_flags(ringbuf, flags);
+ }
+
+ bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
if (ringbuf_bench__load(skel)) {
fprintf(stderr, "failed to load skeleton\n");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 91e3567962ff..4e758bc52b73 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
+header "Ringbuf, multi-producer contention, overwrite mode"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-overwrite --rb-batch-cnt 50 rb-libbpf)"
+done
--
2.43.0
Powered by blists - more mailing lists