lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250730022347.71722-2-yuzhuo@google.com>
Date: Tue, 29 Jul 2025 19:23:44 -0700
From: Yuzhuo Jing <yuzhuo@...gle.com>
To: Ian Rogers <irogers@...gle.com>, Yuzhuo Jing <yzj@...ch.edu>, Jonathan Corbet <corbet@....net>, 
	Davidlohr Bueso <dave@...olabs.net>, "Paul E . McKenney" <paulmck@...nel.org>, 
	Josh Triplett <josh@...htriplett.org>, Frederic Weisbecker <frederic@...nel.org>, 
	Neeraj Upadhyay <neeraj.upadhyay@...nel.org>, Joel Fernandes <joelagnelf@...dia.com>, 
	Boqun Feng <boqun.feng@...il.com>, Uladzislau Rezki <urezki@...il.com>, 
	Steven Rostedt <rostedt@...dmis.org>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>, 
	Lai Jiangshan <jiangshanlai@...il.com>, Zqiang <qiang.zhang@...ux.dev>, 
	Andrew Morton <akpm@...ux-foundation.org>, Ingo Molnar <mingo@...nel.org>, 
	Borislav Petkov <bp@...en8.de>, Arnd Bergmann <arnd@...db.de>, Frank van der Linden <fvdl@...gle.com>, 
	linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org, rcu@...r.kernel.org
Cc: Yuzhuo Jing <yuzhuo@...gle.com>
Subject: [PATCH v1 1/4] rcuscale: Create debugfs file for writer durations

Creates an "rcuscale" folder in debugfs and creates a "writer_durations"
file in the folder.  This file is in CSV format.  Each line represents
one duration record, with columns defined as:

  writer_id,duration

Added an option "writer_no_print" to skip printing writer durations on
cleanup.

This allows external tools to read structured data and also drastically
improves cleanup performance on large core count machines.

On a 256C 512T machines running nreaders=1 nwriters=511:

Before:
$ time modprobe -r rcuscale; modprobe -r torture
real    3m17.349s
user    0m0.000s
sys     3m15.288s

After:
$ time cat /sys/kernel/debug/rcuscale/writer_durations > durations.csv
real    0m0.005s
user    0m0.000s
sys     0m0.005s
$ time modprobe -r rcuscale; modprobe -r torture
real    0m0.388s
user    0m0.000s
sys     0m0.335s

Signed-off-by: Yuzhuo Jing <yuzhuo@...gle.com>
---
 .../admin-guide/kernel-parameters.txt         |   5 +
 kernel/rcu/rcuscale.c                         | 142 +++++++++++++++++-
 2 files changed, 139 insertions(+), 8 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..7b62a84a19d4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5583,6 +5583,11 @@
 			periods, but in jiffies.  The default of zero
 			says no holdoff.
 
+	rcuscale.writer_no_print= [KNL]
+			Do not print writer durations to kernel ring buffer.
+			Instead, users can read them from the
+			rcuscale/writer_durations file in debugfs.
+
 	rcutorture.fqs_duration= [KNL]
 			Set duration of force_quiescent_state bursts
 			in microseconds.
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index b521d0455992..ad10b42be6fc 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -40,6 +40,8 @@
 #include <linux/vmalloc.h>
 #include <linux/rcupdate_trace.h>
 #include <linux/sched/debug.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include "rcu.h"
 
@@ -97,6 +99,7 @@ torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
 torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
+torture_param(bool, writer_no_print, false, "Do not print writer durations to ring buffer");
 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
 torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
 torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -138,6 +141,9 @@ static u64 t_rcu_scale_writer_finished;
 static unsigned long b_rcu_gp_test_started;
 static unsigned long b_rcu_gp_test_finished;
 
+static struct dentry *debugfs_dir;
+static struct dentry *debugfs_writer_durations;
+
 #define MAX_MEAS 10000
 #define MIN_MEAS 100
 
@@ -607,6 +613,7 @@ rcu_scale_writer(void *arg)
 		t = ktime_get_mono_fast_ns();
 		*wdp = t - *wdp;
 		i_max = i;
+		writer_n_durations[me] = i_max + 1;
 		if (!started &&
 		    atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
 			started = true;
@@ -620,6 +627,7 @@ rcu_scale_writer(void *arg)
 			    nrealwriters) {
 				schedule_timeout_interruptible(10);
 				rcu_ftrace_dump(DUMP_ALL);
+				WRITE_ONCE(test_complete, true);
 				SCALEOUT_STRING("Test complete");
 				t_rcu_scale_writer_finished = t;
 				if (gp_exp) {
@@ -666,7 +674,6 @@ rcu_scale_writer(void *arg)
 		rcu_scale_free(wmbp);
 		cur_ops->gp_barrier();
 	}
-	writer_n_durations[me] = i_max + 1;
 	torture_kthread_stopping("rcu_scale_writer");
 	return 0;
 }
@@ -941,6 +948,117 @@ kfree_scale_init(void)
 	return firsterr;
 }
 
+/*
+ * A seq_file for writer_durations.  Content is only visible when all writers
+ * finish.  Element i of the sequence is writer_durations + i.
+ */
+static void *writer_durations_start(struct seq_file *m, loff_t *pos)
+{
+	loff_t writer_id = *pos;
+
+	if (!test_complete || writer_id < 0 || writer_id >= nrealwriters)
+		return NULL;
+
+	return writer_durations + writer_id;
+}
+
+static void *writer_durations_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return writer_durations_start(m, pos);
+}
+
+static void writer_durations_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * Each element in the seq_file is an array of one writer's durations.
+ * Each element prints writer_n_durations[writer_id] lines, and each line
+ * contains one duration record, in CSV format:
+ * writer_id,duration
+ */
+static int writer_durations_show(struct seq_file *m, void *v)
+{
+	u64 **durations = v;
+	loff_t writer_id = durations - writer_durations;
+
+	for (int i = 0; i < writer_n_durations[writer_id]; ++i)
+		seq_printf(m, "%lld,%lld\n", writer_id, durations[0][i]);
+
+	return 0;
+}
+
+static const struct seq_operations writer_durations_op = {
+	.start	= writer_durations_start,
+	.next	= writer_durations_next,
+	.stop	= writer_durations_stop,
+	.show	= writer_durations_show
+};
+
+static int writer_durations_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &writer_durations_op);
+}
+
+static const struct file_operations writer_durations_fops = {
+	.owner = THIS_MODULE,
+	.open = writer_durations_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+/*
+ * Create an rcuscale directory exposing run states and results.
+ */
+static int register_debugfs(void)
+{
+#define try_create_file(variable, name, mode, parent, data, fops)		\
+({										\
+	variable = debugfs_create_file((name), (mode), (parent), (data), (fops)); \
+	err = PTR_ERR_OR_ZERO(variable);					\
+	err;									\
+})
+
+	int err;
+
+	debugfs_dir = debugfs_create_dir("rcuscale", NULL);
+	err = PTR_ERR_OR_ZERO(debugfs_dir);
+	if (err)
+		goto fail;
+
+	if (try_create_file(debugfs_writer_durations, "writer_durations", 0444,
+			debugfs_dir, NULL, &writer_durations_fops))
+		goto fail;
+
+	return 0;
+fail:
+	pr_err("rcu-scale: Failed to create debugfs file.");
+	/* unregister_debugfs is called by rcu_scale_cleanup, avoid
+	 * calling it twice.
+	 */
+	return err;
+#undef try_create_file
+}
+
+static void unregister_debugfs(void)
+{
+#define try_remove(variable)			\
+do {						\
+	if (!IS_ERR_OR_NULL(variable))		\
+		debugfs_remove(variable);	\
+	variable = NULL;			\
+} while (0)
+
+	try_remove(debugfs_writer_durations);
+
+	/* Remove directory after files. */
+	try_remove(debugfs_dir);
+
+#undef try_remove
+}
+
 static void
 rcu_scale_cleanup(void)
 {
@@ -961,6 +1079,8 @@ rcu_scale_cleanup(void)
 	if (gp_exp && gp_async)
 		SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
 
+	unregister_debugfs();
+
 	// If built-in, just report all of the GP kthread's CPU time.
 	if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
 		kthread_tp = cur_ops->rso_gp_kthread();
@@ -1020,13 +1140,15 @@ rcu_scale_cleanup(void)
 			wdpp = writer_durations[i];
 			if (!wdpp)
 				continue;
-			for (j = 0; j < writer_n_durations[i]; j++) {
-				wdp = &wdpp[j];
-				pr_alert("%s%s %4d writer-duration: %5d %llu\n",
-					scale_type, SCALE_FLAG,
-					i, j, *wdp);
-				if (j % 100 == 0)
-					schedule_timeout_uninterruptible(1);
+			if (!writer_no_print) {
+				for (j = 0; j < writer_n_durations[i]; j++) {
+					wdp = &wdpp[j];
+					pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+						scale_type, SCALE_FLAG,
+						i, j, *wdp);
+					if (j % 100 == 0)
+						schedule_timeout_uninterruptible(1);
+				}
 			}
 			kfree(writer_durations[i]);
 			if (writer_freelists) {
@@ -1202,6 +1324,10 @@ rcu_scale_init(void)
 		if (torture_init_error(firsterr))
 			goto unwind;
 	}
+
+	if (register_debugfs())
+		goto unwind;
+
 	torture_init_end();
 	return 0;
 
-- 
2.50.1.552.g942d659e1b-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ