lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250731132615.938435-3-yuzhuo@google.com>
Date: Thu, 31 Jul 2025 06:26:12 -0700
From: Yuzhuo Jing <yuzhuo@...gle.com>
To: Davidlohr Bueso <dave@...olabs.net>, "Paul E . McKenney" <paulmck@...nel.org>, 
	Josh Triplett <josh@...htriplett.org>, Frederic Weisbecker <frederic@...nel.org>, 
	Neeraj Upadhyay <neeraj.upadhyay@...nel.org>, Joel Fernandes <joelagnelf@...dia.com>, 
	Boqun Feng <boqun.feng@...il.com>, Uladzislau Rezki <urezki@...il.com>, 
	Steven Rostedt <rostedt@...dmis.org>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>, 
	Lai Jiangshan <jiangshanlai@...il.com>, Zqiang <qiang.zhang@...ux.dev>, 
	Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>, 
	Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>, 
	Mark Rutland <mark.rutland@....com>, 
	Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>, 
	Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>, 
	Liang Kan <kan.liang@...ux.intel.com>, Yuzhuo Jing <yzj@...ch.edu>, 
	Yuzhuo Jing <yuzhuo@...gle.com>, Sebastian Andrzej Siewior <bigeasy@...utronix.de>, linux-kernel@...r.kernel.org, 
	rcu@...r.kernel.org, linux-perf-users@...r.kernel.org
Subject: [PATCH v1 2/5] perf bench: Implement subprocess execution for 'sync rcu'

Monitor system state is useful for understanding performance impact.
This patch enables running external tool during the benchmark.  It
provides a similar semantic to 'perf record -- perf bench mem', except
that the order is reversed.

Because the benchmark threads are kernel module created kthreads, perf
cannot directly attach to them.  This patch propose a method to execute
the attach command from a child process, using command line
substitution.

If any of the command string contains "{READER,WRITER,KFREE}_TASKS"
placeholder, they are replaced with the real value upon startup.  The
thread ID information comes from
/sys/kernel/debug/rcuscale/{reader,writer,kfree}_tasks.

Example usage of running 'perf stat' to attach kernel threads:

$ ./perf bench sync rcu once  sync nreaders=1 nwriters=1 writer_cpu_offset=1 -- \
          perf stat -e ipi:ipi_send_cpu,rcu:rcu_grace_period \
          -t READER_TASKS,WRITER_TASKS
\# Running 'sync/rcu' benchmark:
Running experiment with options: nreaders=1 nwriters=1 writer_cpu_offset=1
Running child command: perf stat -e ipi:ipi_send_cpu,rcu:rcu_grace_period -t 1682932,1682933

 Performance counter stats for thread id '1682932,1682933':

             20105      ipi:ipi_send_cpu
               702      rcu:rcu_grace_period

      25.023871111 seconds time elapsed

Experiment finished.
Waiting for child process to exit.
Average grace-period duration: 188128.652 microseconds
Minimum grace-period duration: 9000.221
50th percentile grace-period duration: 217996.932
90th percentile grace-period duration: 218001.019
99th percentile grace-period duration: 218153.558
Maximum grace-period duration: 326999.705

Signed-off-by: Yuzhuo Jing <yuzhuo@...gle.com>
---
 tools/perf/bench/sync-rcu.c | 252 +++++++++++++++++++++++++++++++++++-
 1 file changed, 247 insertions(+), 5 deletions(-)

diff --git a/tools/perf/bench/sync-rcu.c b/tools/perf/bench/sync-rcu.c
index ac85841f0b68..934d2416c216 100644
--- a/tools/perf/bench/sync-rcu.c
+++ b/tools/perf/bench/sync-rcu.c
@@ -5,6 +5,7 @@
  * 2025  Yuzhuo Jing <yuzhuo@...gle.com>
  */
 #include <dirent.h>
+#include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <inttypes.h>
@@ -32,6 +33,7 @@
 static bool dryrun;
 static unsigned int cooldown = 3;
 static bool show_hist;
+static unsigned int child_delay = 1;
 static const char *debugfs = "/sys/kernel/debug";
 
 static const struct option bench_rcu_options[] = {
@@ -40,6 +42,8 @@ static const struct option bench_rcu_options[] = {
 		"Sleep time between each run (default: 3 seconds)"),
 	OPT_BOOLEAN(0,		"hist",		&show_hist,
 		"Show histogram of writer durations"),
+	OPT_UINTEGER(0,		"child-delay",	&child_delay,
+		"Wait for child startup before starting experiment (default: 1 second)"),
 	OPT_STRING(0,		"debugfs",	&debugfs,	"path",
 		"Debugfs mount point (default: /sys/kernel/debug)"),
 	OPT_END()
@@ -48,13 +52,18 @@ static const struct option bench_rcu_options[] = {
 static const char *const bench_rcu_usage[] = {
 	"RCU benchmark using rcuscale kernel module.",
 	"",
-	"perf bench sync rcu [options..]",
-	"perf bench sync rcu [options..] once  <gp_type> [<param=value>..]",
+	"perf bench sync rcu [options..] [-- <command>..]",
+	"perf bench sync rcu [options..] once  <gp_type> [<param=value>..] [-- <command>..]",
 	"",
 	"  <gp_type>: The type of grace period to use: sync, async, exp (expedited)",
 	"             This sets the gp_exp or gp_async kernel module parameters.",
 	"  <param>:   Any parameter of the rcuscale kernel module, e.g. holdoff=5.",
 	"             Valid options can be found from running `modinfo rcuscale`.",
+	"  <command>: A child command to run during the experiment.  This is useful",
+	"             for running tools that monitor system metrics during the",
+	"             experiment. If the command line string contains",
+	"             {READER,WRITER,KFREE}_TASKS placeholders, they will be substituted",
+	"             with the tasks PIDs, separated by comma.",
 	"",
 	"Notes on param:",
 	"  This benchmark manages gp_exp and gp_async, and sets block_start=1.",
@@ -73,6 +82,10 @@ static const char *const bench_rcu_usage[] = {
 	"  perf bench sync rcu once",
 	"  perf bench sync rcu once  sync nreaders=1 nwriters=1 writer_cpu_offset=1",
 	"",
+	"  perf bench sync rcu once  sync nreaders=1 nwriters=1 writer_cpu_offset=1 -- \\",
+	"      perf stat -e ipi:ipi_send_cpu,rcu:rcu_grace_period \\",
+	"      -t READER_TASKS,WRITER_TASKS",
+	"",
 	"In case perf exited abnormally, user need to unload rcuscale by running:",
 	"  modprobe -r rcuscale torture",
 	"",
@@ -105,6 +118,23 @@ struct modprobe_cmd {
 	}
 #define MODPROBE_REMOVE_CMD "modprobe -r rcuscale torture"
 
+/*
+ * Generated subprocess command.
+ *
+ * Different from modprobe_cmd, this struct is owns the argv array and all
+ * strings in the array.  The only exception is child_cmd_template, which
+ * contains the remainder of argv parsing.
+ *
+ * Upon each runonce(), generate_child_command will make a copy of the strings
+ * in child_cmd_template and also substitute placeholders to actual value.
+ */
+struct child_cmd {
+	int argc;
+	char **argv;
+};
+
+static struct child_cmd child_cmd_template;
+
 /*
  * Generic modprobe parameter definition.  This is the storage for an
  * instantiated module parameter.  This may come from parameters directly
@@ -122,6 +152,7 @@ struct modprobe_param {
 static struct modprobe_param simple_params[MAX_OPTS];
 static int simple_params_count;
 
+static pid_t child_pid;
 static bool in_child;
 
 struct durations {
@@ -177,6 +208,12 @@ static void cleanup(void)
 		return;
 
 	unload_module();
+
+	if (child_pid) {
+		kill(child_pid, SIGTERM);
+		waitpid(child_pid, NULL, 0);
+		child_pid = 0;
+	}
 }
 
 static void signal_handler(int sig)
@@ -407,6 +444,13 @@ static void parse_module_params(int argc, const char *argv[])
 		char *value;
 		char buf[MAX_OPTVALUE] = "";
 
+		/* Handle child command. */
+		if (strcmp(argv[0], "--") == 0) {
+			child_cmd_template.argc = argc - 1;
+			child_cmd_template.argv = (char **)argv + 1;
+			break;
+		}
+
 		if (strnlen(argv[0], MAX_OPTVALUE) >= MAX_OPTVALUE - 1)
 			fail("Module parameter too long: \"%s\"", argv[0]);
 		strlcpy(buf, argv[0], MAX_OPTVALUE);
@@ -434,6 +478,162 @@ static void parse_module_params(int argc, const char *argv[])
 	}
 }
 
+/* ======================== Child Command Handling ========================= */
+
+/*
+ * Read reader, writer, or kfree tasks from debugfs, and return a comma
+ * separated string.
+ */
+static char *get_tids(const char *debugfs_filename)
+{
+	char path[PATH_MAX];
+	FILE *fp;
+
+	char *tids = calloc(INIT_CAPACITY, sizeof(char));
+	size_t tids_len = 0;
+	size_t tids_capacity = INIT_CAPACITY;
+
+	char *line = NULL;
+	size_t line_buf_size = 0;
+
+	if (!tids)
+		fail("Failed to allocate memory for substitute string");
+
+	snprintf(path, sizeof(path), "%s/rcuscale/%s", debugfs, debugfs_filename);
+
+	fp = fopen(path, "r");
+	if (!fp)
+		err(EXIT_FAILURE, "Failed to open %s", path);
+
+	while (getline(&line, &line_buf_size, fp) != -1) {
+		size_t line_len = strlen(line);
+		bool is_first = (tids_len == 0);
+
+		// trim white space and new line characters
+		while (line_len && isspace(line[line_len - 1]))
+			line[--line_len] = '\0';
+
+		// 2 for NUL-terminator and ","
+		reserve_size(&tids, &tids_capacity, tids_len + line_len + 2);
+		// skip "," for the first value
+		if (!is_first)
+			strlcpy(tids + tids_len, ",", 2);
+		strcat(tids + tids_len, line);
+		tids_len += line_len + !is_first;
+	}
+
+	free(line);
+	fclose(fp);
+
+	return tids;
+}
+
+/*
+ * Replace the placeholder with the actual value.  Modifies the given new string.
+ */
+static void replace_child_arg(char **arg, const char *placeholder,
+				const char *debugfs_filename, char **replacement)
+{
+	size_t str_capacity = strlen(*arg) + 1;
+	size_t placeholder_len = strlen(placeholder);
+
+	while (true) {
+		size_t replacement_len;
+		const char *found = strstr(*arg, placeholder);
+		size_t placeholder_off, suffix_off;
+
+		if (found == NULL)
+			return;
+
+		placeholder_off = found - *arg;
+		found = NULL;
+
+		/* Replacement is calculated lazily upon encountering placeholder */
+		if (*replacement == NULL)
+			*replacement = get_tids(debugfs_filename);
+
+		replacement_len = strlen(*replacement);
+
+		reserve_size(arg, &str_capacity,
+			str_capacity - placeholder_len + replacement_len + 1);
+
+		suffix_off = placeholder_off + placeholder_len;
+
+		/* Move:                   v suffix_off
+		 *       PREFIX PLACEHOLDER SUFFIX
+		 *             ^ placeholder_off
+		 * To:   PREFIX _______ SUFFIX
+		 * Or:   PREFIX _______________ SUFFIX
+		 *                             ^ placeholder_off+replacement_len
+		 */
+		memmove(*arg + placeholder_off + replacement_len,
+			*arg + suffix_off, strlen(*arg + suffix_off) + 1);
+		/* Fill in the replacement */
+		memcpy(*arg + placeholder_off, *replacement, replacement_len);
+	}
+}
+
+/*
+ * Generate child command by replacing {READER,WRITER,KFREE}_TASKS with the actual
+ * values, comma separated.  Caller must call free_child_command().
+ */
+static struct child_cmd *generate_child_command(void)
+{
+	char *reader_tasks_string = NULL;
+	char *writer_tasks_string = NULL;
+	char *kfree_tasks_string = NULL;
+	struct child_cmd *cmd = calloc(1, sizeof(*cmd));
+
+	if (!cmd)
+		fail("Failed to allocate memory for child command");
+
+	cmd->argc = child_cmd_template.argc;
+	if (cmd->argc == 0) {
+		cmd->argv = NULL;
+		return cmd;
+	}
+
+	cmd->argv = malloc((cmd->argc + 1) * sizeof(char *));
+	if (!cmd->argv)
+		fail("Failed to allocate memory for child command");
+
+	for (int i = 0; i < cmd->argc; ++i) {
+		char *arg = strdup(child_cmd_template.argv[i]);
+
+		if (!arg)
+			fail("Failed to allocate memory for child command");
+
+		if (dryrun) {
+			cmd->argv[i] = arg;
+			continue;
+		}
+
+		replace_child_arg(&arg, "READER_TASKS", "reader_tasks", &reader_tasks_string);
+		replace_child_arg(&arg, "WRITER_TASKS", "writer_tasks", &writer_tasks_string);
+		replace_child_arg(&arg, "KFREE_TASKS", "kfree_tasks", &kfree_tasks_string);
+
+		cmd->argv[i] = arg;
+	}
+
+	cmd->argv[cmd->argc] = NULL;
+
+	free(reader_tasks_string);
+	free(writer_tasks_string);
+	free(kfree_tasks_string);
+
+	return cmd;
+}
+
+/*
+ * Free the child command.
+ */
+static void free_child_command(struct child_cmd *cmd)
+{
+	for (int i = 0; i < cmd->argc; i++)
+		free(cmd->argv[i]);
+	free(cmd->argv);
+}
+
 /* ====================== Experiment Result Handling ====================== */
 
 static void durations_add(struct durations *durations, u64 duration)
@@ -692,18 +892,53 @@ static void print_params(const struct modprobe_cmd *cmd)
 		printf("\n");
 }
 
+static void print_child_command(const struct child_cmd *cmd)
+{
+	if (cmd->argc == 0)
+		return;
+	printf("Running child command:");
+	for (int i = 0; i < cmd->argc; ++i)
+		printf(" %s", cmd->argv[i]);
+	printf("\n");
+}
+
 /*
  * Core Experiment function
  */
 static void runonce(const struct modprobe_cmd *modprobe_cmd)
 {
+	struct child_cmd *child_cmd;
 	struct durations *durations;
 
 	print_params(modprobe_cmd);
 	run_modprobe(modprobe_cmd);
 
-	if (dryrun)
+	child_cmd = generate_child_command();
+	print_child_command(child_cmd);
+
+	if (dryrun) {
+		free_child_command(child_cmd);
 		return;
+	}
+
+	if (child_cmd->argc != 0) {
+		// Start command in background
+		child_pid = fork();
+		if (child_pid < 0)
+			err(EXIT_FAILURE, "Failed to fork child process");
+
+		if (child_pid == 0) {
+			execvp(child_cmd->argv[0], child_cmd->argv);
+			in_child = true;
+			err(EXIT_FAILURE, "Failed to execute child command");
+		}
+		// otherwise, parent process
+	}
+	free_child_command(child_cmd);
+	child_cmd = NULL;
+
+	/* Wait for child process to initialize */
+	sleep(child_delay);
 
 	/* Start and wait for experiment */
 	start_experiment();
@@ -717,6 +952,13 @@ static void runonce(const struct modprobe_cmd *modprobe_cmd)
 
 	printf("Experiment finished.\n");
 
+	/* Wait for child to finish */
+	if (child_pid != 0) {
+		printf("Waiting for child process to exit.\n");
+		waitpid(child_pid, NULL, 0);
+		child_pid = 0;
+	}
+
 	/* Print statistics */
 	print_writer_duration_stats(durations);
 	free_durations(durations);
@@ -779,13 +1021,13 @@ int bench_sync_rcu(int argc, const char **argv)
 
 	/* Parse global options first. */
 	argc = parse_options(argc, argv, bench_rcu_options, bench_rcu_usage,
-			     PARSE_OPT_STOP_AT_NON_OPTION);
+			     PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_KEEP_DASHDASH);
 
 	/* The empty case is equivalent to 'once sync'.
 	 * Otherwise, at least two positional options are required:
 	 * once/range/ratio and sync/async/exp
 	 */
-	if (argc == 0) {
+	if (argc == 0 || strcmp(argv[0], "--") == 0) {
 		runmode = "once";
 		gp_type = "sync";
 	} else if (argc < 2) {
-- 
2.50.1.565.gc32cd1483b-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ