[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250624-rust-percpu-v1-4-9c59b07d2a9c@gmail.com>
Date: Tue, 24 Jun 2025 15:10:42 -0700
From: Mitchell Levy <levymitchell0@...il.com>
To: Miguel Ojeda <ojeda@...nel.org>, Alex Gaynor <alex.gaynor@...il.com>,
Boqun Feng <boqun.feng@...il.com>, Gary Guo <gary@...yguo.net>,
Björn Roy Baron <bjorn3_gh@...tonmail.com>,
Andreas Hindborg <a.hindborg@...nel.org>, Alice Ryhl <aliceryhl@...gle.com>,
Trevor Gross <tmgross@...ch.edu>, Andrew Morton <akpm@...ux-foundation.org>,
Dennis Zhou <dennis@...nel.org>, Tejun Heo <tj@...nel.org>,
Christoph Lameter <cl@...ux.com>, Danilo Krummrich <dakr@...nel.org>,
Benno Lossin <lossin@...nel.org>
Cc: linux-kernel@...r.kernel.org, rust-for-linux@...r.kernel.org,
linux-mm@...ck.org, Mitchell Levy <levymitchell0@...il.com>
Subject: [PATCH 4/5] rust: percpu: Add pin-hole optimizations for numerics
The C implementations of `this_cpu_add`, `this_cpu_sub`, etc., are
optimized to save an instruction by avoiding having to compute
`this_cpu_ptr(&x)` for some per-CPU variable `x`. For example, rather
than
u64 *x_ptr = this_cpu_ptr(&x);
*x_ptr += 5;
the implementation of `this_cpu_add` is clever enough to make use of the
fact that per-CPU variables are implemented on x86 via segment
registers, and so we can use only a single instruction (where we assume
`&x` is already in `rax`)
add gs:[rax], 5
Add this optimization via a `PerCpuNumeric` type to enable code-reuse
between `DynamicPerCpu` and `StaticPerCpu`.
Signed-off-by: Mitchell Levy <levymitchell0@...il.com>
---
lib/percpu_test_rust.rs | 36 +++++++++++++
rust/kernel/percpu.rs | 1 +
rust/kernel/percpu/numeric.rs | 117 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 154 insertions(+)
diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
index a9652e6ece08..114015435a85 100644
--- a/lib/percpu_test_rust.rs
+++ b/lib/percpu_test_rust.rs
@@ -25,6 +25,26 @@
define_per_cpu!(PERCPU: i64 = 0);
define_per_cpu!(UPERCPU: u64 = 0);
+macro_rules! make_optimization_test {
+ ($ty:ty) => {
+ let mut test: DynamicPerCpu<$ty> = DynamicPerCpu::new(GFP_KERNEL).unwrap();
+ {
+ let _ = CpuGuard::new();
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| *val = 10);
+ test.num().add(1);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 11));
+ test.num().add(10);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 21));
+ test.num().sub(5);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 16));
+ }
+ };
+}
+
impl kernel::Module for PerCpuTestModule {
fn init(_module: &'static ThisModule) -> Result<Self, Error> {
pr_info!("rust percpu test start\n");
@@ -94,6 +114,22 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
pr_info!("rust dynamic percpu test done\n");
+ pr_info!("rust numeric optimizations test start\n");
+
+ make_optimization_test!(u8);
+ make_optimization_test!(u16);
+ make_optimization_test!(u32);
+ make_optimization_test!(u64);
+ make_optimization_test!(usize);
+
+ make_optimization_test!(i8);
+ make_optimization_test!(i16);
+ make_optimization_test!(i32);
+ make_optimization_test!(i64);
+ make_optimization_test!(isize);
+
+ pr_info!("rust numeric optimizations test done\n");
+
// Return Err to unload the module
Result::Err(EINVAL)
}
diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
index a912f74349e0..9659bff2d889 100644
--- a/rust/kernel/percpu.rs
+++ b/rust/kernel/percpu.rs
@@ -2,6 +2,7 @@
//! This module contains abstractions for creating and using per-CPU variables from Rust.
//! See the define_per_cpu! macro and the PerCpu<T> type.
pub mod cpu_guard;
+pub mod numeric;
use bindings::{alloc_percpu, free_percpu};
diff --git a/rust/kernel/percpu/numeric.rs b/rust/kernel/percpu/numeric.rs
new file mode 100644
index 000000000000..e4008f872af1
--- /dev/null
+++ b/rust/kernel/percpu/numeric.rs
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+//! Pin-hole optimizations for PerCpu<T> where T is a numeric type.
+
+use crate::percpu::*;
+use core::arch::asm;
+
+/// Represents a per-CPU variable that can be manipulated with machine-intrinsic numeric
+/// operations.
+pub struct PerCpuNumeric<'a, T> {
+ ptr: &'a PerCpuPtr<T>,
+}
+
+macro_rules! impl_ops {
+ ($ty:ty, $reg:tt) => {
+ impl DynamicPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.alloc.0 }
+ }
+ }
+ impl StaticPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.0 }
+ }
+ }
+
+ impl PerCpuNumeric<'_, $ty> {
+ /// Adds `rhs` to the per-CPU variable.
+ pub fn add(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("add gs:[{off}], {val:", $reg, "}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg) rhs,
+ );
+ }
+ }
+ }
+ impl PerCpuNumeric<'_, $ty> {
+ /// Subtracts `rhs` from the per-CPU variable.
+ pub fn sub(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("sub gs:[{off}], {val:", $reg, "}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg) rhs,
+ );
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_ops_byte {
+ ($ty:ty) => {
+ impl DynamicPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+ /// variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.alloc.0 }
+ }
+ }
+ impl StaticPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+ /// variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.0 }
+ }
+ }
+
+ impl PerCpuNumeric<'_, $ty> {
+ /// Adds `rhs` to the per-CPU variable.
+ pub fn add(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("add gs:[{off}], {val}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg_byte) rhs,
+ );
+ }
+ }
+ }
+ impl PerCpuNumeric<'_, $ty> {
+ /// Subtracts `rhs` from the per-CPU variable.
+ pub fn sub(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("sub gs:[{off}], {val}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg_byte) rhs,
+ );
+ }
+ }
+ }
+ };
+}
+
+impl_ops_byte!(i8);
+impl_ops!(i16, "x");
+impl_ops!(i32, "e");
+impl_ops!(i64, "r");
+impl_ops!(isize, "r");
+
+impl_ops_byte!(u8);
+impl_ops!(u16, "x");
+impl_ops!(u32, "e");
+impl_ops!(u64, "r");
+impl_ops!(usize, "r");
--
2.34.1
Powered by blists - more mailing lists