[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20251119115630.708a124c@fedora>
Date: Wed, 19 Nov 2025 11:56:30 +0100
From: Boris Brezillon <boris.brezillon@...labora.com>
To: Alice Ryhl <aliceryhl@...gle.com>
Cc: Miguel Ojeda <ojeda@...nel.org>, Will Deacon <will@...nel.org>, Daniel
Almeida <daniel.almeida@...labora.com>, Boqun Feng <boqun.feng@...il.com>,
Gary Guo <gary@...yguo.net>, "Björn Roy Baron"
<bjorn3_gh@...tonmail.com>, Benno Lossin <lossin@...nel.org>, Andreas
Hindborg <a.hindborg@...nel.org>, Trevor Gross <tmgross@...ch.edu>, Danilo
Krummrich <dakr@...nel.org>, Joerg Roedel <joro@...tes.org>, Robin Murphy
<robin.murphy@....com>, Lorenzo Stoakes <lorenzo.stoakes@...cle.com>, "Liam
R. Howlett" <Liam.Howlett@...cle.com>, Asahi Lina
<lina+kernel@...hilina.net>, linux-kernel@...r.kernel.org,
rust-for-linux@...r.kernel.org, iommu@...ts.linux.dev, linux-mm@...ck.org
Subject: Re: [PATCH v3] io: add io_pgtable abstraction
On Wed, 12 Nov 2025 10:15:00 +0000
Alice Ryhl <aliceryhl@...gle.com> wrote:
> From: Asahi Lina <lina+kernel@...hilina.net>
>
> This will be used by the Tyr driver to create and modify the page table
> of each address space on the GPU. Each time a mapping gets created or
> removed by userspace, Tyr will call into GPUVM, which will figure out
> which calls to map_pages and unmap_pages are required to map the data in
> question in the page table so that the GPU may access those pages when
> using that address space.
>
> The Rust type wraps the struct using a raw pointer rather than the usual
> Opaque+ARef approach because Opaque+ARef requires the target type to be
> refcounted.
>
> Signed-off-by: Asahi Lina <lina+kernel@...hilina.net>
> Co-Developed-by: Alice Ryhl <aliceryhl@...gle.com>
> Signed-off-by: Alice Ryhl <aliceryhl@...gle.com>
This seems to be enough for what we need in Tyr (which is what we
basically have in Panthor, but translated to rust)
I'm no rust expert and I'm not an iommu maintainer either, so I'm not
sure how useful that is, but this is
Acked-by: Boris Brezillon <boris.brezillon@...labora.com>
> ---
> This patch is based on [1] but I have rewritten and simplified large
> parts of it. The Asahi driver no longer uses the io-pgtable abstraction,
> and Nova never planned to (since NVIDIA has its own separate memory).
> Therefore, I have simplified these abstractions to fit the needs of the
> Tyr GPU driver.
>
> This series depends on the PhysAddr typedef [2].
>
> [1]: https://lore.kernel.org/all/20250623-io_pgtable-v2-1-fd72daac75f1@collabora.com/
> [2]: https://lore.kernel.org/all/20251112-resource-phys-typedefs-v2-0-538307384f82@google.com/
> ---
> rust/bindings/bindings_helper.h | 3 +-
> rust/kernel/io.rs | 1 +
> rust/kernel/io/pgtable.rs | 254 ++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 257 insertions(+), 1 deletion(-)
>
> diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
> index 2e43c66635a2c9f31bd99b9817bd2d6ab89fbcf2..faab6bc9463321c092a8bbcb6281175e490caccd 100644
> --- a/rust/bindings/bindings_helper.h
> +++ b/rust/bindings/bindings_helper.h
> @@ -56,8 +56,9 @@
> #include <linux/fdtable.h>
> #include <linux/file.h>
> #include <linux/firmware.h>
> -#include <linux/interrupt.h>
> #include <linux/fs.h>
> +#include <linux/interrupt.h>
> +#include <linux/io-pgtable.h>
> #include <linux/ioport.h>
> #include <linux/jiffies.h>
> #include <linux/jump_label.h>
> diff --git a/rust/kernel/io.rs b/rust/kernel/io.rs
> index 56a435eb14e3a1ce72dd58b88cbf296041f1703e..5913e240d5a9814ceed52c6dc1a798e64158d567 100644
> --- a/rust/kernel/io.rs
> +++ b/rust/kernel/io.rs
> @@ -8,6 +8,7 @@
> use crate::{bindings, build_assert, ffi::c_void};
>
> pub mod mem;
> +pub mod pgtable;
> pub mod poll;
> pub mod resource;
>
> diff --git a/rust/kernel/io/pgtable.rs b/rust/kernel/io/pgtable.rs
> new file mode 100644
> index 0000000000000000000000000000000000000000..fe05bc1673f9a7741a887a3c9bbad866dd17a2b5
> --- /dev/null
> +++ b/rust/kernel/io/pgtable.rs
> @@ -0,0 +1,254 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +//! IOMMU page table management.
> +//!
> +//! C header: [`include/io-pgtable.h`](srctree/include/io-pgtable.h)
> +
> +use core::{
> + marker::PhantomData,
> + ptr::NonNull, //
> +};
> +
> +use crate::{
> + alloc,
> + bindings,
> + device::{Bound, Device},
> + devres::Devres,
> + error::to_result,
> + io::PhysAddr,
> + prelude::*, //
> +};
> +
> +use bindings::io_pgtable_fmt;
> +
> +/// Protection flags used with IOMMU mappings.
> +pub mod prot {
> + /// Read access.
> + pub const READ: u32 = bindings::IOMMU_READ;
> + /// Write access.
> + pub const WRITE: u32 = bindings::IOMMU_WRITE;
> + /// Request cache coherency.
> + pub const CACHE: u32 = bindings::IOMMU_CACHE;
> + /// Request no-execute permission.
> + pub const NOEXEC: u32 = bindings::IOMMU_NOEXEC;
> + /// MMIO peripheral mapping.
> + pub const MMIO: u32 = bindings::IOMMU_MMIO;
> + /// Privileged mapping.
> + pub const PRIV: u32 = bindings::IOMMU_PRIV;
> +}
> +
> +/// Represents a requested `io_pgtable` configuration.
> +pub struct Config {
> + /// Quirk bitmask (type-specific).
> + pub quirks: usize,
> + /// Valid page sizes, as a bitmask of powers of two.
> + pub pgsize_bitmap: usize,
> + /// Input address space size in bits.
> + pub ias: u32,
> + /// Output address space size in bits.
> + pub oas: u32,
> + /// IOMMU uses coherent accesses for page table walks.
> + pub coherent_walk: bool,
> +}
> +
> +/// An io page table using a specific format.
> +///
> +/// # Invariants
> +///
> +/// The pointer references a valid io page table.
> +pub struct IoPageTable<F> {
> + ptr: NonNull<bindings::io_pgtable_ops>,
> + _marker: PhantomData<F>,
> +}
> +
> +// SAFETY: `struct io_pgtable_ops` is not restricted to a single thread.
> +unsafe impl<F> Send for IoPageTable<F> {}
> +// SAFETY: `struct io_pgtable_ops` may be accessed concurrently.
> +unsafe impl<F> Sync for IoPageTable<F> {}
> +
> +/// The format used by this page table.
> +pub trait IoPageTableFmt: 'static {
> + /// The value representing this format.
> + const FORMAT: io_pgtable_fmt;
> +}
> +
> +impl<F: IoPageTableFmt> IoPageTable<F> {
> + /// Create a new `IoPageTable` as a device resource.
> + #[inline]
> + pub fn new(
> + dev: &Device<Bound>,
> + config: Config,
> + ) -> impl PinInit<Devres<IoPageTable<F>>, Error> + '_ {
> + // SAFETY: Devres ensures that the value is dropped during device unbind.
> + Devres::new(dev, unsafe { Self::new_raw(dev, config) })
> + }
> +
> + /// Create a new `IoPageTable`.
> + ///
> + /// # Safety
> + ///
> + /// If successful, then the returned value must be dropped before the device is unbound.
> + #[inline]
> + pub unsafe fn new_raw(dev: &Device<Bound>, config: Config) -> Result<IoPageTable<F>> {
> + let mut raw_cfg = bindings::io_pgtable_cfg {
> + quirks: config.quirks,
> + pgsize_bitmap: config.pgsize_bitmap,
> + ias: config.ias,
> + oas: config.oas,
> + coherent_walk: config.coherent_walk,
> + tlb: &raw const NOOP_FLUSH_OPS,
> + iommu_dev: dev.as_raw(),
> + // SAFETY: All zeroes is a valid value for `struct io_pgtable_cfg`.
> + ..unsafe { core::mem::zeroed() }
> + };
> +
> + // SAFETY:
> + // * The raw_cfg pointer is valid for the duration of this call.
> + // * The provided `FLUSH_OPS` contains valid function pointers that accept a null pointer
> + // as cookie.
> + // * The caller ensures that the io pgtable does not outlive the device.
> + let ops = unsafe {
> + bindings::alloc_io_pgtable_ops(F::FORMAT, &mut raw_cfg, core::ptr::null_mut())
> + };
> + // INVARIANT: We successfully created a valid page table.
> + Ok(IoPageTable {
> + ptr: NonNull::new(ops).ok_or(ENOMEM)?,
> + _marker: PhantomData,
> + })
> + }
> +
> + /// Obtain a raw pointer to the underlying `struct io_pgtable_ops`.
> + #[inline]
> + pub fn raw_ops(&self) -> *mut bindings::io_pgtable_ops {
> + self.ptr.as_ptr()
> + }
> +
> + /// Obtain a raw pointer to the underlying `struct io_pgtable`.
> + #[inline]
> + pub fn raw_pgtable(&self) -> *mut bindings::io_pgtable {
> + // SAFETY: The io_pgtable_ops of an io-pgtable is always the ops field of a io_pgtable.
> + unsafe { kernel::container_of!(self.raw_ops(), bindings::io_pgtable, ops) }
> + }
> +
> + /// Obtain a raw pointer to the underlying `struct io_pgtable_cfg`.
> + #[inline]
> + pub fn raw_cfg(&self) -> *mut bindings::io_pgtable_cfg {
> + // SAFETY: The `raw_pgtable()` method returns a valid pointer.
> + unsafe { &raw mut (*self.raw_pgtable()).cfg }
> + }
> +
> + /// Map a physically contiguous range of pages of the same size.
> + ///
> + /// # Safety
> + ///
> + /// * This page table must not contain any mapping that overlaps with the mapping created by
> + /// this call.
> + /// * If this page table is live, then the caller must ensure that it's okay to access the
> + /// physical address being mapped for the duration in which it is mapped.
> + #[inline]
> + pub unsafe fn map_pages(
> + &self,
> + iova: usize,
> + paddr: PhysAddr,
> + pgsize: usize,
> + pgcount: usize,
> + prot: u32,
> + flags: alloc::Flags,
> + ) -> Result<usize> {
> + let mut mapped: usize = 0;
> +
> + // SAFETY: The `map_pages` function in `io_pgtable_ops` is never null.
> + let map_pages = unsafe { (*self.raw_ops()).map_pages.unwrap_unchecked() };
> +
> + // SAFETY: The safety requirements of this method are sufficient to call `map_pages`.
> + to_result(unsafe {
> + (map_pages)(
> + self.raw_ops(),
> + iova,
> + paddr,
> + pgsize,
> + pgcount,
> + prot as i32,
> + flags.as_raw(),
> + &mut mapped,
> + )
> + })?;
> +
> + Ok(mapped)
> + }
> +
> + /// Unmap a range of virtually contiguous pages of the same size.
> + ///
> + /// # Safety
> + ///
> + /// This page table must contain a mapping at `iova` that consists of exactly `pgcount` pages
> + /// of size `pgsize`.
> + #[inline]
> + pub unsafe fn unmap_pages(&self, iova: usize, pgsize: usize, pgcount: usize) -> usize {
> + // SAFETY: The `unmap_pages` function in `io_pgtable_ops` is never null.
> + let unmap_pages = unsafe { (*self.raw_ops()).unmap_pages.unwrap_unchecked() };
> +
> + // SAFETY: The safety requirements of this method are sufficient to call `unmap_pages`.
> + unsafe { (unmap_pages)(self.raw_ops(), iova, pgsize, pgcount, core::ptr::null_mut()) }
> + }
> +}
> +
> +// These bindings are currently designed for use by GPU drivers, which use this page table together
> +// with GPUVM. When using GPUVM, a single mapping operation may be translated into many operations
> +// on the page table, and in that case you generally want to flush the TLB only once per GPUVM
> +// operation. Thus, do not use these callbacks as they would flush more often than needed.
> +static NOOP_FLUSH_OPS: bindings::iommu_flush_ops = bindings::iommu_flush_ops {
> + tlb_flush_all: Some(rust_tlb_flush_all_noop),
> + tlb_flush_walk: Some(rust_tlb_flush_walk_noop),
> + tlb_add_page: None,
> +};
> +
> +#[no_mangle]
> +extern "C" fn rust_tlb_flush_all_noop(_cookie: *mut core::ffi::c_void) {}
> +
> +#[no_mangle]
> +extern "C" fn rust_tlb_flush_walk_noop(
> + _iova: usize,
> + _size: usize,
> + _granule: usize,
> + _cookie: *mut core::ffi::c_void,
> +) {
> +}
> +
> +impl<F> Drop for IoPageTable<F> {
> + fn drop(&mut self) {
> + // SAFETY: The caller of `ttbr` promised that the page table is not live when this
> + // destructor runs.
> + unsafe { bindings::free_io_pgtable_ops(self.0.ops) };
> + }
> +}
> +
> +/// The `ARM_64_LPAE_S1` page table format.
> +pub enum ARM64LPAES1 {}
> +
> +impl IoPageTableFmt for ARM64LPAES1 {
> + const FORMAT: io_pgtable_fmt = bindings::io_pgtable_fmt_ARM_64_LPAE_S1 as io_pgtable_fmt;
> +}
> +
> +impl IoPageTable<ARM64LPAES1> {
> + /// Access the `ttbr` field of the configuration.
> + ///
> + /// This is the physical address of the page table, which may be passed to the device that
> + /// needs to use it.
> + ///
> + /// # Safety
> + ///
> + /// The caller must ensure that the device stops using the page table before dropping it.
> + #[inline]
> + pub unsafe fn ttbr(&self) -> u64 {
> + // SAFETY: `arm_lpae_s1_cfg` is the right cfg type for `ARM64LPAES1`.
> + unsafe { (*self.raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.ttbr }
> + }
> +
> + /// Access the `mair` field of the configuration.
> + #[inline]
> + pub fn mair(&self) -> u64 {
> + // SAFETY: `arm_lpae_s1_cfg` is the right cfg type for `ARM64LPAES1`.
> + unsafe { (*self.raw_cfg()).__bindgen_anon_1.arm_lpae_s1_cfg.mair }
> + }
> +}
>
> ---
> base-commit: ffee675aceb9f44b0502a8bec912abb0c4f4af62
> change-id: 20251111-io-pgtable-fe0822b4ebdd
> prerequisite-change-id: 20251106-resource-phys-typedefs-6db37927d159:v2
> prerequisite-patch-id: 350421d8dbaf3db51b1243d82077c5eb88f54db5
> prerequisite-patch-id: ac0166fb3cd235de76841789173051191a4d2434
> prerequisite-patch-id: f4bca02c77c40093690b66cdf477f928784bdbf4
> prerequisite-patch-id: 083d1c22b1a7eb0dcae37052b926362543c68e8a
>
> Best regards,
Powered by blists - more mailing lists