[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250731132615.938435-3-yuzhuo@google.com>
Date: Thu, 31 Jul 2025 06:26:12 -0700
From: Yuzhuo Jing <yuzhuo@...gle.com>
To: Davidlohr Bueso <dave@...olabs.net>, "Paul E . McKenney" <paulmck@...nel.org>,
Josh Triplett <josh@...htriplett.org>, Frederic Weisbecker <frederic@...nel.org>,
Neeraj Upadhyay <neeraj.upadhyay@...nel.org>, Joel Fernandes <joelagnelf@...dia.com>,
Boqun Feng <boqun.feng@...il.com>, Uladzislau Rezki <urezki@...il.com>,
Steven Rostedt <rostedt@...dmis.org>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Lai Jiangshan <jiangshanlai@...il.com>, Zqiang <qiang.zhang@...ux.dev>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
Liang Kan <kan.liang@...ux.intel.com>, Yuzhuo Jing <yzj@...ch.edu>,
Yuzhuo Jing <yuzhuo@...gle.com>, Sebastian Andrzej Siewior <bigeasy@...utronix.de>, linux-kernel@...r.kernel.org,
rcu@...r.kernel.org, linux-perf-users@...r.kernel.org
Subject: [PATCH v1 2/5] perf bench: Implement subprocess execution for 'sync rcu'
Monitor system state is useful for understanding performance impact.
This patch enables running external tool during the benchmark. It
provides a similar semantic to 'perf record -- perf bench mem', except
that the order is reversed.
Because the benchmark threads are kernel module created kthreads, perf
cannot directly attach to them. This patch propose a method to execute
the attach command from a child process, using command line
substitution.
If any of the command string contains "{READER,WRITER,KFREE}_TASKS"
placeholder, they are replaced with the real value upon startup. The
thread ID information comes from
/sys/kernel/debug/rcuscale/{reader,writer,kfree}_tasks.
Example usage of running 'perf stat' to attach kernel threads:
$ ./perf bench sync rcu once sync nreaders=1 nwriters=1 writer_cpu_offset=1 -- \
perf stat -e ipi:ipi_send_cpu,rcu:rcu_grace_period \
-t READER_TASKS,WRITER_TASKS
\# Running 'sync/rcu' benchmark:
Running experiment with options: nreaders=1 nwriters=1 writer_cpu_offset=1
Running child command: perf stat -e ipi:ipi_send_cpu,rcu:rcu_grace_period -t 1682932,1682933
Performance counter stats for thread id '1682932,1682933':
20105 ipi:ipi_send_cpu
702 rcu:rcu_grace_period
25.023871111 seconds time elapsed
Experiment finished.
Waiting for child process to exit.
Average grace-period duration: 188128.652 microseconds
Minimum grace-period duration: 9000.221
50th percentile grace-period duration: 217996.932
90th percentile grace-period duration: 218001.019
99th percentile grace-period duration: 218153.558
Maximum grace-period duration: 326999.705
Signed-off-by: Yuzhuo Jing <yuzhuo@...gle.com>
---
tools/perf/bench/sync-rcu.c | 252 +++++++++++++++++++++++++++++++++++-
1 file changed, 247 insertions(+), 5 deletions(-)
diff --git a/tools/perf/bench/sync-rcu.c b/tools/perf/bench/sync-rcu.c
index ac85841f0b68..934d2416c216 100644
--- a/tools/perf/bench/sync-rcu.c
+++ b/tools/perf/bench/sync-rcu.c
@@ -5,6 +5,7 @@
* 2025 Yuzhuo Jing <yuzhuo@...gle.com>
*/
#include <dirent.h>
+#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <inttypes.h>
@@ -32,6 +33,7 @@
static bool dryrun;
static unsigned int cooldown = 3;
static bool show_hist;
+static unsigned int child_delay = 1;
static const char *debugfs = "/sys/kernel/debug";
static const struct option bench_rcu_options[] = {
@@ -40,6 +42,8 @@ static const struct option bench_rcu_options[] = {
"Sleep time between each run (default: 3 seconds)"),
OPT_BOOLEAN(0, "hist", &show_hist,
"Show histogram of writer durations"),
+ OPT_UINTEGER(0, "child-delay", &child_delay,
+ "Wait for child startup before starting experiment (default: 1 second)"),
OPT_STRING(0, "debugfs", &debugfs, "path",
"Debugfs mount point (default: /sys/kernel/debug)"),
OPT_END()
@@ -48,13 +52,18 @@ static const struct option bench_rcu_options[] = {
static const char *const bench_rcu_usage[] = {
"RCU benchmark using rcuscale kernel module.",
"",
- "perf bench sync rcu [options..]",
- "perf bench sync rcu [options..] once <gp_type> [<param=value>..]",
+ "perf bench sync rcu [options..] [-- <command>..]",
+ "perf bench sync rcu [options..] once <gp_type> [<param=value>..] [-- <command>..]",
"",
" <gp_type>: The type of grace period to use: sync, async, exp (expedited)",
" This sets the gp_exp or gp_async kernel module parameters.",
" <param>: Any parameter of the rcuscale kernel module, e.g. holdoff=5.",
" Valid options can be found from running `modinfo rcuscale`.",
+ " <command>: A child command to run during the experiment. This is useful",
+ " for running tools that monitor system metrics during the",
+ " experiment. If the command line string contains",
+ " {READER,WRITER,KFREE}_TASKS placeholders, they will be substituted",
+ " with the tasks PIDs, separated by comma.",
"",
"Notes on param:",
" This benchmark manages gp_exp and gp_async, and sets block_start=1.",
@@ -73,6 +82,10 @@ static const char *const bench_rcu_usage[] = {
" perf bench sync rcu once",
" perf bench sync rcu once sync nreaders=1 nwriters=1 writer_cpu_offset=1",
"",
+ " perf bench sync rcu once sync nreaders=1 nwriters=1 writer_cpu_offset=1 -- \\",
+ " perf stat -e ipi:ipi_send_cpu,rcu:rcu_grace_period \\",
+ " -t READER_TASKS,WRITER_TASKS",
+ "",
"In case perf exited abnormally, user need to unload rcuscale by running:",
" modprobe -r rcuscale torture",
"",
@@ -105,6 +118,23 @@ struct modprobe_cmd {
}
#define MODPROBE_REMOVE_CMD "modprobe -r rcuscale torture"
+/*
+ * Generated subprocess command.
+ *
+ * Different from modprobe_cmd, this struct is owns the argv array and all
+ * strings in the array. The only exception is child_cmd_template, which
+ * contains the remainder of argv parsing.
+ *
+ * Upon each runonce(), generate_child_command will make a copy of the strings
+ * in child_cmd_template and also substitute placeholders to actual value.
+ */
+struct child_cmd {
+ int argc;
+ char **argv;
+};
+
+static struct child_cmd child_cmd_template;
+
/*
* Generic modprobe parameter definition. This is the storage for an
* instantiated module parameter. This may come from parameters directly
@@ -122,6 +152,7 @@ struct modprobe_param {
static struct modprobe_param simple_params[MAX_OPTS];
static int simple_params_count;
+static pid_t child_pid;
static bool in_child;
struct durations {
@@ -177,6 +208,12 @@ static void cleanup(void)
return;
unload_module();
+
+ if (child_pid) {
+ kill(child_pid, SIGTERM);
+ waitpid(child_pid, NULL, 0);
+ child_pid = 0;
+ }
}
static void signal_handler(int sig)
@@ -407,6 +444,13 @@ static void parse_module_params(int argc, const char *argv[])
char *value;
char buf[MAX_OPTVALUE] = "";
+ /* Handle child command. */
+ if (strcmp(argv[0], "--") == 0) {
+ child_cmd_template.argc = argc - 1;
+ child_cmd_template.argv = (char **)argv + 1;
+ break;
+ }
+
if (strnlen(argv[0], MAX_OPTVALUE) >= MAX_OPTVALUE - 1)
fail("Module parameter too long: \"%s\"", argv[0]);
strlcpy(buf, argv[0], MAX_OPTVALUE);
@@ -434,6 +478,162 @@ static void parse_module_params(int argc, const char *argv[])
}
}
+/* ======================== Child Command Handling ========================= */
+
+/*
+ * Read reader, writer, or kfree tasks from debugfs, and return a comma
+ * separated string.
+ */
+static char *get_tids(const char *debugfs_filename)
+{
+ char path[PATH_MAX];
+ FILE *fp;
+
+ char *tids = calloc(INIT_CAPACITY, sizeof(char));
+ size_t tids_len = 0;
+ size_t tids_capacity = INIT_CAPACITY;
+
+ char *line = NULL;
+ size_t line_buf_size = 0;
+
+ if (!tids)
+ fail("Failed to allocate memory for substitute string");
+
+ snprintf(path, sizeof(path), "%s/rcuscale/%s", debugfs, debugfs_filename);
+
+ fp = fopen(path, "r");
+ if (!fp)
+ err(EXIT_FAILURE, "Failed to open %s", path);
+
+ while (getline(&line, &line_buf_size, fp) != -1) {
+ size_t line_len = strlen(line);
+ bool is_first = (tids_len == 0);
+
+ // trim white space and new line characters
+ while (line_len && isspace(line[line_len - 1]))
+ line[--line_len] = '\0';
+
+ // 2 for NUL-terminator and ","
+ reserve_size(&tids, &tids_capacity, tids_len + line_len + 2);
+ // skip "," for the first value
+ if (!is_first)
+ strlcpy(tids + tids_len, ",", 2);
+ strcat(tids + tids_len, line);
+ tids_len += line_len + !is_first;
+ }
+
+ free(line);
+ fclose(fp);
+
+ return tids;
+}
+
+/*
+ * Replace the placeholder with the actual value. Modifies the given new string.
+ */
+static void replace_child_arg(char **arg, const char *placeholder,
+ const char *debugfs_filename, char **replacement)
+{
+ size_t str_capacity = strlen(*arg) + 1;
+ size_t placeholder_len = strlen(placeholder);
+
+ while (true) {
+ size_t replacement_len;
+ const char *found = strstr(*arg, placeholder);
+ size_t placeholder_off, suffix_off;
+
+ if (found == NULL)
+ return;
+
+ placeholder_off = found - *arg;
+ found = NULL;
+
+ /* Replacement is calculated lazily upon encountering placeholder */
+ if (*replacement == NULL)
+ *replacement = get_tids(debugfs_filename);
+
+ replacement_len = strlen(*replacement);
+
+ reserve_size(arg, &str_capacity,
+ str_capacity - placeholder_len + replacement_len + 1);
+
+ suffix_off = placeholder_off + placeholder_len;
+
+ /* Move: v suffix_off
+ * PREFIX PLACEHOLDER SUFFIX
+ * ^ placeholder_off
+ * To: PREFIX _______ SUFFIX
+ * Or: PREFIX _______________ SUFFIX
+ * ^ placeholder_off+replacement_len
+ */
+ memmove(*arg + placeholder_off + replacement_len,
+ *arg + suffix_off, strlen(*arg + suffix_off) + 1);
+ /* Fill in the replacement */
+ memcpy(*arg + placeholder_off, *replacement, replacement_len);
+ }
+}
+
+/*
+ * Generate child command by replacing {READER,WRITER,KFREE}_TASKS with the actual
+ * values, comma separated. Caller must call free_child_command().
+ */
+static struct child_cmd *generate_child_command(void)
+{
+ char *reader_tasks_string = NULL;
+ char *writer_tasks_string = NULL;
+ char *kfree_tasks_string = NULL;
+ struct child_cmd *cmd = calloc(1, sizeof(*cmd));
+
+ if (!cmd)
+ fail("Failed to allocate memory for child command");
+
+ cmd->argc = child_cmd_template.argc;
+ if (cmd->argc == 0) {
+ cmd->argv = NULL;
+ return cmd;
+ }
+
+ cmd->argv = malloc((cmd->argc + 1) * sizeof(char *));
+ if (!cmd->argv)
+ fail("Failed to allocate memory for child command");
+
+ for (int i = 0; i < cmd->argc; ++i) {
+ char *arg = strdup(child_cmd_template.argv[i]);
+
+ if (!arg)
+ fail("Failed to allocate memory for child command");
+
+ if (dryrun) {
+ cmd->argv[i] = arg;
+ continue;
+ }
+
+ replace_child_arg(&arg, "READER_TASKS", "reader_tasks", &reader_tasks_string);
+ replace_child_arg(&arg, "WRITER_TASKS", "writer_tasks", &writer_tasks_string);
+ replace_child_arg(&arg, "KFREE_TASKS", "kfree_tasks", &kfree_tasks_string);
+
+ cmd->argv[i] = arg;
+ }
+
+ cmd->argv[cmd->argc] = NULL;
+
+ free(reader_tasks_string);
+ free(writer_tasks_string);
+ free(kfree_tasks_string);
+
+ return cmd;
+}
+
+/*
+ * Free the child command.
+ */
+static void free_child_command(struct child_cmd *cmd)
+{
+ for (int i = 0; i < cmd->argc; i++)
+ free(cmd->argv[i]);
+ free(cmd->argv);
+}
+
/* ====================== Experiment Result Handling ====================== */
static void durations_add(struct durations *durations, u64 duration)
@@ -692,18 +892,53 @@ static void print_params(const struct modprobe_cmd *cmd)
printf("\n");
}
+static void print_child_command(const struct child_cmd *cmd)
+{
+ if (cmd->argc == 0)
+ return;
+ printf("Running child command:");
+ for (int i = 0; i < cmd->argc; ++i)
+ printf(" %s", cmd->argv[i]);
+ printf("\n");
+}
+
/*
* Core Experiment function
*/
static void runonce(const struct modprobe_cmd *modprobe_cmd)
{
+ struct child_cmd *child_cmd;
struct durations *durations;
print_params(modprobe_cmd);
run_modprobe(modprobe_cmd);
- if (dryrun)
+ child_cmd = generate_child_command();
+ print_child_command(child_cmd);
+
+ if (dryrun) {
+ free_child_command(child_cmd);
return;
+ }
+
+ if (child_cmd->argc != 0) {
+ // Start command in background
+ child_pid = fork();
+ if (child_pid < 0)
+ err(EXIT_FAILURE, "Failed to fork child process");
+
+ if (child_pid == 0) {
+ execvp(child_cmd->argv[0], child_cmd->argv);
+ in_child = true;
+ err(EXIT_FAILURE, "Failed to execute child command");
+ }
+ // otherwise, parent process
+ }
+ free_child_command(child_cmd);
+ child_cmd = NULL;
+
+ /* Wait for child process to initialize */
+ sleep(child_delay);
/* Start and wait for experiment */
start_experiment();
@@ -717,6 +952,13 @@ static void runonce(const struct modprobe_cmd *modprobe_cmd)
printf("Experiment finished.\n");
+ /* Wait for child to finish */
+ if (child_pid != 0) {
+ printf("Waiting for child process to exit.\n");
+ waitpid(child_pid, NULL, 0);
+ child_pid = 0;
+ }
+
/* Print statistics */
print_writer_duration_stats(durations);
free_durations(durations);
@@ -779,13 +1021,13 @@ int bench_sync_rcu(int argc, const char **argv)
/* Parse global options first. */
argc = parse_options(argc, argv, bench_rcu_options, bench_rcu_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_KEEP_DASHDASH);
/* The empty case is equivalent to 'once sync'.
* Otherwise, at least two positional options are required:
* once/range/ratio and sync/async/exp
*/
- if (argc == 0) {
+ if (argc == 0 || strcmp(argv[0], "--") == 0) {
runmode = "once";
gp_type = "sync";
} else if (argc < 2) {
--
2.50.1.565.gc32cd1483b-goog
Powered by blists - more mailing lists