[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250730022347.71722-2-yuzhuo@google.com>
Date: Tue, 29 Jul 2025 19:23:44 -0700
From: Yuzhuo Jing <yuzhuo@...gle.com>
To: Ian Rogers <irogers@...gle.com>, Yuzhuo Jing <yzj@...ch.edu>, Jonathan Corbet <corbet@....net>,
Davidlohr Bueso <dave@...olabs.net>, "Paul E . McKenney" <paulmck@...nel.org>,
Josh Triplett <josh@...htriplett.org>, Frederic Weisbecker <frederic@...nel.org>,
Neeraj Upadhyay <neeraj.upadhyay@...nel.org>, Joel Fernandes <joelagnelf@...dia.com>,
Boqun Feng <boqun.feng@...il.com>, Uladzislau Rezki <urezki@...il.com>,
Steven Rostedt <rostedt@...dmis.org>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Lai Jiangshan <jiangshanlai@...il.com>, Zqiang <qiang.zhang@...ux.dev>,
Andrew Morton <akpm@...ux-foundation.org>, Ingo Molnar <mingo@...nel.org>,
Borislav Petkov <bp@...en8.de>, Arnd Bergmann <arnd@...db.de>, Frank van der Linden <fvdl@...gle.com>,
linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org, rcu@...r.kernel.org
Cc: Yuzhuo Jing <yuzhuo@...gle.com>
Subject: [PATCH v1 1/4] rcuscale: Create debugfs file for writer durations
Creates an "rcuscale" folder in debugfs and creates a "writer_durations"
file in the folder. This file is in CSV format. Each line represents
one duration record, with columns defined as:
writer_id,duration
Added an option "writer_no_print" to skip printing writer durations on
cleanup.
This allows external tools to read structured data and also drastically
improves cleanup performance on large core count machines.
On a 256C 512T machines running nreaders=1 nwriters=511:
Before:
$ time modprobe -r rcuscale; modprobe -r torture
real 3m17.349s
user 0m0.000s
sys 3m15.288s
After:
$ time cat /sys/kernel/debug/rcuscale/writer_durations > durations.csv
real 0m0.005s
user 0m0.000s
sys 0m0.005s
$ time modprobe -r rcuscale; modprobe -r torture
real 0m0.388s
user 0m0.000s
sys 0m0.335s
Signed-off-by: Yuzhuo Jing <yuzhuo@...gle.com>
---
.../admin-guide/kernel-parameters.txt | 5 +
kernel/rcu/rcuscale.c | 142 +++++++++++++++++-
2 files changed, 139 insertions(+), 8 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..7b62a84a19d4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5583,6 +5583,11 @@
periods, but in jiffies. The default of zero
says no holdoff.
+ rcuscale.writer_no_print= [KNL]
+ Do not print writer durations to kernel ring buffer.
+ Instead, users can read them from the
+ rcuscale/writer_durations file in debugfs.
+
rcutorture.fqs_duration= [KNL]
Set duration of force_quiescent_state bursts
in microseconds.
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index b521d0455992..ad10b42be6fc 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -40,6 +40,8 @@
#include <linux/vmalloc.h>
#include <linux/rcupdate_trace.h>
#include <linux/sched/debug.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include "rcu.h"
@@ -97,6 +99,7 @@ torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
+torture_param(bool, writer_no_print, false, "Do not print writer durations to ring buffer");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -138,6 +141,9 @@ static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
static unsigned long b_rcu_gp_test_finished;
+static struct dentry *debugfs_dir;
+static struct dentry *debugfs_writer_durations;
+
#define MAX_MEAS 10000
#define MIN_MEAS 100
@@ -607,6 +613,7 @@ rcu_scale_writer(void *arg)
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
i_max = i;
+ writer_n_durations[me] = i_max + 1;
if (!started &&
atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
started = true;
@@ -620,6 +627,7 @@ rcu_scale_writer(void *arg)
nrealwriters) {
schedule_timeout_interruptible(10);
rcu_ftrace_dump(DUMP_ALL);
+ WRITE_ONCE(test_complete, true);
SCALEOUT_STRING("Test complete");
t_rcu_scale_writer_finished = t;
if (gp_exp) {
@@ -666,7 +674,6 @@ rcu_scale_writer(void *arg)
rcu_scale_free(wmbp);
cur_ops->gp_barrier();
}
- writer_n_durations[me] = i_max + 1;
torture_kthread_stopping("rcu_scale_writer");
return 0;
}
@@ -941,6 +948,117 @@ kfree_scale_init(void)
return firsterr;
}
+/*
+ * A seq_file for writer_durations. Content is only visible when all writers
+ * finish. Element i of the sequence is writer_durations + i.
+ */
+static void *writer_durations_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t writer_id = *pos;
+
+ if (!test_complete || writer_id < 0 || writer_id >= nrealwriters)
+ return NULL;
+
+ return writer_durations + writer_id;
+}
+
+static void *writer_durations_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return writer_durations_start(m, pos);
+}
+
+static void writer_durations_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * Each element in the seq_file is an array of one writer's durations.
+ * Each element prints writer_n_durations[writer_id] lines, and each line
+ * contains one duration record, in CSV format:
+ * writer_id,duration
+ */
+static int writer_durations_show(struct seq_file *m, void *v)
+{
+ u64 **durations = v;
+ loff_t writer_id = durations - writer_durations;
+
+ for (int i = 0; i < writer_n_durations[writer_id]; ++i)
+ seq_printf(m, "%lld,%lld\n", writer_id, durations[0][i]);
+
+ return 0;
+}
+
+static const struct seq_operations writer_durations_op = {
+ .start = writer_durations_start,
+ .next = writer_durations_next,
+ .stop = writer_durations_stop,
+ .show = writer_durations_show
+};
+
+static int writer_durations_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &writer_durations_op);
+}
+
+static const struct file_operations writer_durations_fops = {
+ .owner = THIS_MODULE,
+ .open = writer_durations_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*
+ * Create an rcuscale directory exposing run states and results.
+ */
+static int register_debugfs(void)
+{
+#define try_create_file(variable, name, mode, parent, data, fops) \
+({ \
+ variable = debugfs_create_file((name), (mode), (parent), (data), (fops)); \
+ err = PTR_ERR_OR_ZERO(variable); \
+ err; \
+})
+
+ int err;
+
+ debugfs_dir = debugfs_create_dir("rcuscale", NULL);
+ err = PTR_ERR_OR_ZERO(debugfs_dir);
+ if (err)
+ goto fail;
+
+ if (try_create_file(debugfs_writer_durations, "writer_durations", 0444,
+ debugfs_dir, NULL, &writer_durations_fops))
+ goto fail;
+
+ return 0;
+fail:
+ pr_err("rcu-scale: Failed to create debugfs file.");
+ /* unregister_debugfs is called by rcu_scale_cleanup, avoid
+ * calling it twice.
+ */
+ return err;
+#undef try_create_file
+}
+
+static void unregister_debugfs(void)
+{
+#define try_remove(variable) \
+do { \
+ if (!IS_ERR_OR_NULL(variable)) \
+ debugfs_remove(variable); \
+ variable = NULL; \
+} while (0)
+
+ try_remove(debugfs_writer_durations);
+
+ /* Remove directory after files. */
+ try_remove(debugfs_dir);
+
+#undef try_remove
+}
+
static void
rcu_scale_cleanup(void)
{
@@ -961,6 +1079,8 @@ rcu_scale_cleanup(void)
if (gp_exp && gp_async)
SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
+ unregister_debugfs();
+
// If built-in, just report all of the GP kthread's CPU time.
if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
kthread_tp = cur_ops->rso_gp_kthread();
@@ -1020,13 +1140,15 @@ rcu_scale_cleanup(void)
wdpp = writer_durations[i];
if (!wdpp)
continue;
- for (j = 0; j < writer_n_durations[i]; j++) {
- wdp = &wdpp[j];
- pr_alert("%s%s %4d writer-duration: %5d %llu\n",
- scale_type, SCALE_FLAG,
- i, j, *wdp);
- if (j % 100 == 0)
- schedule_timeout_uninterruptible(1);
+ if (!writer_no_print) {
+ for (j = 0; j < writer_n_durations[i]; j++) {
+ wdp = &wdpp[j];
+ pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+ scale_type, SCALE_FLAG,
+ i, j, *wdp);
+ if (j % 100 == 0)
+ schedule_timeout_uninterruptible(1);
+ }
}
kfree(writer_durations[i]);
if (writer_freelists) {
@@ -1202,6 +1324,10 @@ rcu_scale_init(void)
if (torture_init_error(firsterr))
goto unwind;
}
+
+ if (register_debugfs())
+ goto unwind;
+
torture_init_end();
return 0;
--
2.50.1.552.g942d659e1b-goog
Powered by blists - more mailing lists