[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250619175335.2905836-1-joelagnelf@nvidia.com>
Date: Thu, 19 Jun 2025 13:53:35 -0400
From: Joel Fernandes <joelagnelf@...dia.com>
To: linux-kernel@...r.kernel.org,
Davidlohr Bueso <dave@...olabs.net>,
"Paul E. McKenney" <paulmck@...nel.org>,
Josh Triplett <josh@...htriplett.org>,
Frederic Weisbecker <frederic@...nel.org>,
Neeraj Upadhyay <neeraj.upadhyay@...nel.org>,
Joel Fernandes <joelagnelf@...dia.com>,
Boqun Feng <boqun.feng@...il.com>,
Uladzislau Rezki <urezki@...il.com>,
Steven Rostedt <rostedt@...dmis.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Lai Jiangshan <jiangshanlai@...il.com>,
Zqiang <qiang.zhang@...ux.dev>,
Miguel Ojeda <ojeda@...nel.org>,
Alex Gaynor <alex.gaynor@...il.com>,
Gary Guo <gary@...yguo.net>,
Björn Roy Baron <bjorn3_gh@...tonmail.com>,
Benno Lossin <benno.lossin@...ton.me>,
Andreas Hindborg <a.hindborg@...nel.org>,
Alice Ryhl <aliceryhl@...gle.com>,
Trevor Gross <tmgross@...ch.edu>,
Danilo Krummrich <dakr@...nel.org>
Cc: Lyude Paul <lyude@...hat.com>,
rcu@...r.kernel.org,
Joel Fernandes <joel@...lfernandes.org>,
rust-for-linux@...r.kernel.org
Subject: [PATCH v2] refscale: Add tests for local_irq_disable() vs local_interrupt_disable()
Add two new refscale test cases to compare the performance of
traditional local_irq_disable()/local_irq_enable() with the newer
local_interrupt_disable()/local_interrupt_enable() APIs.
The local_interrupt_disable()/local_interrupt_enable() APIs are
introduced to provide a Rust-compatible interface for interrupt
control, as mentioned in:
https://lore.kernel.org/all/20240527222254.565881-1-lyude@redhat.com/
The 2 new tests are "local_interrupt" for the new API and "local_irq" test
for the traditional one. This allows direct performance comparison
between the two approaches.
Test results on x86 with 4 readers, 5 runs, 10000 loops:
local_irq (traditional API):
Run 1: 1.306 ns
Run 2: 1.306 ns
Run 3: 1.305 ns
Run 4: 1.307 ns
Run 5: 1.085 ns
Average: ~1.26 ns per operation
local_interrupt (new API):
Run 1: 4.594 ns
Run 2: 4.201 ns
Run 3: 4.428 ns
Run 4: 4.905 ns
Run 5: 4.566 ns
Average: ~4.54 ns per operation
The results show higher overhead with local_interrupt_disable()/enable()
possibly coming from the additional state tracking.
To run the module, modprobe refscale scale_type=local_irq (or local_interrupt).
Cc: Lyude Paul <lyude@...hat.com>
Cc: Boqun Feng <boqun.feng@...il.com>
Cc: rcu@...r.kernel.org
Signed-off-by: Joel Fernandes (Google) <joel@...lfernandes.org>
---
kernel/rcu/refscale.c | 73 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 69 insertions(+), 4 deletions(-)
diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c
index f11a7c2af778..ac6e2391d672 100644
--- a/kernel/rcu/refscale.c
+++ b/kernel/rcu/refscale.c
@@ -71,7 +71,7 @@ MODULE_AUTHOR("Joel Fernandes (Google) <joel@...lfernandes.org>");
static char *scale_type = "rcu";
module_param(scale_type, charp, 0444);
-MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock.");
+MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock, local_interrupt, local_irq.");
torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s");
@@ -524,6 +524,62 @@ static const struct ref_scale_ops lock_irq_ops = {
.name = "lock-irq"
};
+// IRQ disable/enable tests using interrupt_disable/enable.
+static void ref_local_interrupt_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ local_interrupt_disable();
+ local_interrupt_enable();
+ }
+}
+
+static void ref_local_interrupt_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ local_interrupt_disable();
+ un_delay(udl, ndl);
+ local_interrupt_enable();
+ }
+}
+
+static const struct ref_scale_ops local_interrupt_ops = {
+ .readsection = ref_local_interrupt_section,
+ .delaysection = ref_local_interrupt_delay_section,
+ .name = "local_interrupt"
+};
+
+// IRQ disable/enable tests using local_irq_disable/enable.
+static void ref_local_irq_section(const int nloops)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ local_irq_disable();
+ local_irq_enable();
+ }
+}
+
+static void ref_local_irq_delay_section(const int nloops, const int udl, const int ndl)
+{
+ int i;
+
+ for (i = nloops; i >= 0; i--) {
+ local_irq_disable();
+ un_delay(udl, ndl);
+ local_irq_enable();
+ }
+}
+
+static const struct ref_scale_ops local_irq_ops = {
+ .readsection = ref_local_irq_section,
+ .delaysection = ref_local_irq_delay_section,
+ .name = "local_irq"
+};
+
// Definitions acquire-release.
static DEFINE_PER_CPU(unsigned long, test_acqrel);
@@ -956,13 +1012,22 @@ ref_scale_reader(void *arg)
rcu_scale_one_reader();
// Also keep interrupts disabled. This also has the effect
// of preventing entries into slow path for rcu_read_unlock().
- local_irq_save(flags);
+ // Exception: for IRQ ops, use preempt_disable instead since we need
+ // to test actual IRQ disable/enable performance.
+ if (cur_ops == &local_interrupt_ops || cur_ops == &local_irq_ops)
+ preempt_disable();
+ else
+ local_irq_save(flags);
start = ktime_get_mono_fast_ns();
rcu_scale_one_reader();
duration = ktime_get_mono_fast_ns() - start;
- local_irq_restore(flags);
+
+ if (cur_ops == &local_interrupt_ops || cur_ops == &local_irq_ops)
+ preempt_enable();
+ else
+ local_irq_restore(flags);
rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration;
// To reduce runtime-skew noise, do maintain-load invocations until
@@ -1194,7 +1259,7 @@ ref_scale_init(void)
int firsterr = 0;
static const struct ref_scale_ops *scale_ops[] = {
&rcu_ops, &srcu_ops, &srcu_fast_ops, &srcu_lite_ops, RCU_TRACE_OPS RCU_TASKS_OPS
- &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops,
+ &refcnt_ops, &rwlock_ops, &rwsem_ops, &lock_ops, &lock_irq_ops, &local_interrupt_ops, &local_irq_ops,
&acqrel_ops, &sched_clock_ops, &clock_ops, &jiffies_ops,
&typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
};
--
2.43.0
Powered by blists - more mailing lists