[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260117-upgrade-poll-v1-1-179437b7bd49@google.com>
Date: Sat, 17 Jan 2026 15:25:19 +0000
From: Alice Ryhl <aliceryhl@...gle.com>
To: Christian Brauner <brauner@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
"Paul E. McKenney" <paulmck@...nel.org>
Cc: Gary Guo <gary@...yguo.net>, Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Carlos Llamas <cmllamas@...gle.com>, linux-fsdevel@...r.kernel.org,
rust-for-linux@...r.kernel.org, linux-kernel@...r.kernel.org,
Alice Ryhl <aliceryhl@...gle.com>
Subject: [PATCH RFC 1/2] rust: poll: make PollCondVar upgradable
Signed-off-by: Alice Ryhl <aliceryhl@...gle.com>
---
rust/kernel/sync/poll.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 161 insertions(+), 2 deletions(-)
diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs
index 0ec985d560c8d3405c08dbd86e48b14c7c34484d..9fb004c71c78375bb16ded7f518aa282cdcc50f5 100644
--- a/rust/kernel/sync/poll.rs
+++ b/rust/kernel/sync/poll.rs
@@ -5,12 +5,16 @@
//! Utilities for working with `struct poll_table`.
use crate::{
- bindings,
+ alloc::AllocError,
+ bindings, container_of,
fs::File,
prelude::*,
+ sync::atomic::{Acquire, Atomic, Relaxed, Release},
+ sync::lock::{Backend, Lock},
sync::{CondVar, LockClassKey},
+ types::Opaque,
};
-use core::{marker::PhantomData, ops::Deref};
+use core::{marker::PhantomData, ops::Deref, ptr};
/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class.
#[macro_export]
@@ -22,6 +26,17 @@ macro_rules! new_poll_condvar {
};
}
+/// Creates a [`UpgradePollCondVar`] initialiser with the given name and a newly-created lock
+/// class.
+#[macro_export]
+macro_rules! new_upgrade_poll_condvar {
+ ($($name:literal)?) => {
+ $crate::sync::poll::UpgradePollCondVar::new(
+ $crate::optional_name!($($name)?), $crate::static_lock_class!()
+ )
+ };
+}
+
/// Wraps the kernel's `poll_table`.
///
/// # Invariants
@@ -66,6 +81,7 @@ pub fn register_wait(&self, file: &File, cv: &PollCondVar) {
///
/// [`CondVar`]: crate::sync::CondVar
#[pin_data(PinnedDrop)]
+#[repr(transparent)]
pub struct PollCondVar {
#[pin]
inner: CondVar,
@@ -78,6 +94,17 @@ pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit
inner <- CondVar::new(name, key),
})
}
+
+ /// Use this `CondVar` as a `PollCondVar`.
+ ///
+ /// # Safety
+ ///
+ /// After the last use of the returned `&PollCondVar`, `__wake_up_pollfree` must be called on
+ /// the `wait_queue_head` at least one grace period before the `CondVar` is destroyed.
+ unsafe fn from_non_poll(c: &CondVar) -> &PollCondVar {
+ // SAFETY: Layout is the same. Caller ensures that PollTables are cleared in time.
+ unsafe { &*ptr::from_ref(c).cast() }
+ }
}
// Make the `CondVar` methods callable on `PollCondVar`.
@@ -104,3 +131,135 @@ fn drop(self: Pin<&mut Self>) {
unsafe { bindings::synchronize_rcu() };
}
}
+
+/// Wrapper around [`CondVar`] that can be upgraded to [`PollCondVar`].
+///
+/// By using this wrapper, you can avoid rcu for cases that don't use [`PollTable`], and in all
+/// cases you can avoid `synchronize_rcu()`.
+///
+/// # Invariants
+///
+/// `active` either references `simple`, or a `kmalloc` allocation holding an
+/// `UpgradePollCondVarInner`. In the latter case, the allocation remains valid until
+/// `Self::drop()` plus one grace period.
+#[pin_data(PinnedDrop)]
+pub struct UpgradePollCondVar {
+ #[pin]
+ simple: CondVar,
+ active: Atomic<*mut CondVar>,
+}
+
+#[pin_data]
+#[repr(C)]
+struct UpgradePollCondVarInner {
+ #[pin]
+ upgraded: CondVar,
+ #[pin]
+ rcu: Opaque<bindings::callback_head>,
+}
+
+impl UpgradePollCondVar {
+ /// Constructs a new upgradable condvar initialiser.
+ pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit<Self> {
+ pin_init!(&this in Self {
+ simple <- CondVar::new(name, key),
+ // SAFETY: `this->inner` is in-bounds.
+ active: Atomic::new((unsafe { &raw const (*this.as_ptr()).simple }).cast_mut()),
+ })
+ }
+
+ /// Obtain a [`PollCondVar`], upgrading if necessary.
+ ///
+ /// You should use the same lock as what is passed to the `wait_*` methods. Otherwise wakeups
+ /// may be missed.
+ pub fn poll<T: ?Sized, B: Backend>(
+ &self,
+ lock: &Lock<T, B>,
+ name: &'static CStr,
+ key: Pin<&'static LockClassKey>,
+ ) -> Result<&PollCondVar, AllocError> {
+ let mut ptr = self.active.load(Acquire).cast_const();
+ if ptr::eq(ptr, &self.simple) {
+ self.upgrade(lock, name, key)?;
+ ptr = self.active.load(Acquire).cast_const();
+ debug_assert_ne!(ptr, ptr::from_ref(&self.simple));
+ }
+ // SAFETY: Signature ensures that last use of returned `&PollCondVar` is before drop(), and
+ // drop() calls `__wake_up_pollfree` followed by waiting a grace period before the
+ // `CondVar` is destroyed.
+ Ok(unsafe { PollCondVar::from_non_poll(&*ptr) })
+ }
+
+ fn upgrade<T: ?Sized, B: Backend>(
+ &self,
+ lock: &Lock<T, B>,
+ name: &'static CStr,
+ key: Pin<&'static LockClassKey>,
+ ) -> Result<(), AllocError> {
+ let upgraded = KBox::pin_init(
+ pin_init!(UpgradePollCondVarInner {
+ upgraded <- CondVar::new(name, key),
+ rcu: Opaque::uninit(),
+ }),
+ GFP_KERNEL,
+ )
+ .map_err(|_| AllocError)?;
+
+ // SAFETY: The value is treated as pinned.
+ let upgraded = KBox::into_raw(unsafe { Pin::into_inner_unchecked(upgraded) });
+
+ let res = self.active.cmpxchg(
+ ptr::from_ref(&self.simple).cast_mut(),
+ // SAFETY: This operation stays in-bounds of the above allocation.
+ unsafe { &raw mut (*upgraded).upgraded },
+ Release,
+ );
+
+ if res.is_err() {
+ // SAFETY: The cmpxchg failed, so take back ownership of the box.
+ drop(unsafe { KBox::from_raw(upgraded) });
+ return Ok(());
+ }
+
+ // If a normal waiter registers in parallel with us, then either:
+ // * We took the lock first. In that case, the waiter sees the above cmpxchg.
+ // * They took the lock first. In that case, we wake them up below.
+ drop(lock.lock());
+ self.simple.notify_all();
+
+ Ok(())
+ }
+}
+
+// Make the `CondVar` methods callable on `UpgradePollCondVar`.
+impl Deref for UpgradePollCondVar {
+ type Target = CondVar;
+
+ fn deref(&self) -> &CondVar {
+ // SAFETY: By the type invariants, this is either `&self.simple` or references an
+ // allocation that lives until `UpgradePollCondVar::drop`.
+ unsafe { &*self.active.load(Acquire) }
+ }
+}
+
+#[pinned_drop]
+impl PinnedDrop for UpgradePollCondVar {
+ #[inline]
+ fn drop(self: Pin<&mut Self>) {
+ // ORDERING: All calls to upgrade happens-before Drop, so no synchronization is required.
+ let ptr = self.active.load(Relaxed);
+ if ptr::eq(ptr, &self.simple) {
+ return;
+ }
+ // SAFETY: When the pointer is not &self.active, it is an `UpgradePollCondVarInner`.
+ let ptr = unsafe { container_of!(ptr, UpgradePollCondVarInner, upgraded) };
+ // SAFETY: The pointer points at a valid `wait_queue_head`.
+ unsafe { bindings::__wake_up_pollfree((*ptr).upgraded.wait_queue_head.get()) };
+ // This skips drop of `CondVar`, but that's ok because we reimplemented its drop here.
+ //
+ // SAFETY: `__wake_up_pollfree` ensures that all registered PollTable instances are gone in
+ // one grace period, and this is the destructor so no new PollTable instances can be
+ // registered. Thus, it's safety to rcu free the `UpgradePollCondVarInner`.
+ unsafe { bindings::kvfree_call_rcu((*ptr).rcu.get(), ptr.cast::<ffi::c_void>()) };
+ }
+}
--
2.52.0.457.g6b5491de43-goog
Powered by blists - more mailing lists