linux-kernel - [PATCH v2 2/3] perf bench sched pipe: add complete graph simulation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250405120039.15953-3-dirk@gouders.net>
Date: Sat,  5 Apr 2025 14:00:07 +0200
From: Dirk Gouders <dirk@...ders.net>
To: Namhyung Kim <namhyung@...nel.org>,
        Arnaldo Carvalho de Melo <acme@...nel.org>,
        Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>
Cc: Dirk Gouders <dirk@...ders.net>, Ian Rogers <irogers@...gle.com>,
        Adrian Hunter <adrian.hunter@...el.com>,
        LKML <linux-kernel@...r.kernel.org>, linux-perf-users@...r.kernel.org
Subject: [PATCH v2 2/3] perf bench sched pipe: add complete graph simulation

Currently, we have only one worker function: the simulation of a ring
for token traversal.

Add another worker to simulate a complete graph (Kn) for token
traversal.  A new option -K/--Kn can be used to use the new worker.

Those different workers could be interesting, because they produce
workload varieties noticeable by perf-report(1), for example:

(booted with mitigations=off, 6 processes)

Ring simulation:

Samples: 92K of event 'cycles:P', Event count (approx.): 18690208287
Overhead  Command     Shared Object         Symbol
  13.16%  sched-pipe  [kernel.kallsyms]     [k] timerqueue_add
   7.10%  sched-pipe  [kernel.kallsyms]     [k] read_hpet
   3.36%  sched-pipe  [kernel.kallsyms]     [k] _copy_from_iter
   3.23%  sched-pipe  [kernel.kallsyms]     [k] _copy_to_iter
   2.64%  sched-pipe  [kernel.kallsyms]     [k] vfs_write
   2.55%  sched-pipe  [kernel.kallsyms]     [k] vfs_read

Kn simulation:

Samples: 163K of event 'cycles:P', Event count (approx.): 100366721164
Overhead  Command     Shared Object         Symbol
   5.11%  sched-pipe  [kernel.kallsyms]     [k] _copy_from_iter
   4.90%  sched-pipe  [kernel.kallsyms]     [k] queued_spin_lock_slowpath
   3.99%  sched-pipe  [kernel.kallsyms]     [k] _copy_to_iter
   3.35%  sched-pipe  [kernel.kallsyms]     [k] timerqueue_add
   2.80%  sched-pipe  [kernel.kallsyms]     [k] check_preemption_disabled
   2.56%  sched-pipe  [kernel.kallsyms]     [k] vfs_write
   2.40%  sched-pipe  [kernel.kallsyms]     [k] vfs_read

Signed-off-by: Dirk Gouders <dirk@...ders.net>
---
 tools/perf/Documentation/perf-bench.txt |  5 +++
 tools/perf/bench/sched-pipe.c           | 60 +++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 8a651f2fe3aa..6f7df3d47821 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -130,6 +130,11 @@ process).
 
 Options of *pipe*
 ^^^^^^^^^^^^^^^^^
+-K::
+--Kn::
+Simulate a complete graph instead of a ring for sending tokens.
+Each process sends and receives tokens to/from every other process.
+
 -l::
 --loop=::
 Specify number of loops.
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 28dd7f3a11b2..3c76e8249a9b 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -50,6 +50,8 @@ static bool			threaded;
 static unsigned int		nr_threads = 2;
 
 static bool			nonblocking;
+static bool			Kn_mode;	/* Toggle for ring mode -> complete graph mode */
+
 static char			*cgrp_names[2];
 static struct cgroup		*cgrps[2];
 
@@ -90,6 +92,7 @@ static const struct option options[] = {
 	OPT_BOOLEAN('n', "nonblocking",	&nonblocking,	"Use non-blocking operations"),
 	OPT_UINTEGER('p', "nprocs",	&nr_threads,    "Number of processes"),
 	OPT_UINTEGER('l', "loop",	&loops,		"Specify number of loops"),
+	OPT_BOOLEAN('K', "Kn",		&Kn_mode,	"Send tokens in a complete graph instead of a ring."),
 	OPT_BOOLEAN('T', "threaded",	&threaded,	"Specify threads/process based task setup"),
 	OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV",
 		     "Put sender and receivers in given cgroups",
@@ -188,11 +191,55 @@ static inline int read_pipe(struct thread_data *td)
 	return ret;
 }
 
+/*
+ * Worker thread for processes forming a complete graph,
+ * sending tokens one to each other.
+ */
+static void *worker_thread_kn(void *__tdata)
+{
+	struct thread_data *this_thread = __tdata;
+	struct thread_data *all_threads = this_thread - this_thread->nr;
+
+	int ret, m = 0;
+	unsigned int i;
+	unsigned int t;
+
+	ret = enter_cgroup(this_thread->nr);
+	if (ret < 0) {
+		this_thread->cgroup_failed = true;
+		return NULL;
+	}
+
+	if (nonblocking) {
+		this_thread->epoll_ev.events = EPOLLIN;
+		this_thread->epoll_fd = epoll_create(1);
+		BUG_ON(this_thread->epoll_fd < 0);
+		BUG_ON(epoll_ctl(this_thread->epoll_fd, EPOLL_CTL_ADD, this_thread->pipe_read, &this_thread->epoll_ev) < 0);
+	}
+
+	for (i = 0; i < loops; i++) {
+		/* First: feed all other workers. */
+		for (t = 0; t < nr_threads; t++)
+			if (t != this_thread->nr) {
+				ret = write(all_threads[t].pipe_write, &m, sizeof(int));
+				BUG_ON(ret != sizeof(int));
+			}
+
+		/* Read a token from all other workers. */
+		for (t = 1; t < nr_threads; t++) {
+			ret = read_pipe(this_thread);
+			BUG_ON(ret != sizeof(int));
+		}
+	}
+
+	return NULL;
+}
+
 /*
  * Worker thread for nodes forming a ring, receiving tokens from the left
  * neighbor and sending them to the right one.
  */
-static void *worker_thread(void *__tdata)
+static void *worker_thread_ring(void *__tdata)
 {
 	struct thread_data *this_thread = __tdata;
 	struct thread_data *first_thread = this_thread - this_thread->nr;
@@ -231,6 +278,9 @@ static void *worker_thread(void *__tdata)
 	return NULL;
 }
 
+/* Ring mode is the default. */
+void * (*worker_thread)(void *) = worker_thread_ring;
+
 static struct thread_data *create_thread_data(void)
 {
 	struct thread_data *threads;
@@ -279,6 +329,9 @@ int bench_sched_pipe(int argc, const char **argv)
 
 	argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
 
+	if (Kn_mode)
+		worker_thread = worker_thread_kn;
+
 	threads = create_thread_data();
 
 	gettimeofday(&start, NULL);
@@ -331,8 +384,9 @@ int bench_sched_pipe(int argc, const char **argv)
 
 	switch (bench_format) {
 	case BENCH_FORMAT_DEFAULT:
-		printf("# Executed %d pipe operations between %u %s\n\n", loops,
-		       nr_threads, threaded ? "threads" : "processes");
+		printf("# Executed %d pipe operations (%s) between %u %s\n\n", loops,
+		       Kn_mode ? "Kn" : "ring", nr_threads,
+		       threaded ? "threads" : "processes");
 
 		result_usec = diff.tv_sec * USEC_PER_SEC;
 		result_usec += diff.tv_usec;
-- 
2.45.3