linux-kernel - Re: [PATCH 07/31] gpu: nova-core: set DMA mask width based on GPU architecture

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <DFNI82AQBBUV.1I49DFIO8072F@garyguo.net>
Date: Tue, 13 Jan 2026 13:43:37 +0000
From: "Gary Guo" <gary@...yguo.net>
To: "John Hubbard" <jhubbard@...dia.com>, "Danilo Krummrich"
 <dakr@...nel.org>
Cc: "Alexandre Courbot" <acourbot@...dia.com>, "Joel Fernandes"
 <joelagnelf@...dia.com>, "Timur Tabi" <ttabi@...dia.com>, "Alistair Popple"
 <apopple@...dia.com>, "Edwin Peer" <epeer@...dia.com>, "Zhi Wang"
 <zhiw@...dia.com>, "David Airlie" <airlied@...il.com>, "Simona Vetter"
 <simona@...ll.ch>, "Bjorn Helgaas" <bhelgaas@...gle.com>, "Miguel Ojeda"
 <ojeda@...nel.org>, "Alex Gaynor" <alex.gaynor@...il.com>, "Boqun Feng"
 <boqun.feng@...il.com>, "Gary Guo" <gary@...yguo.net>,
 Björn Roy Baron <bjorn3_gh@...tonmail.com>, "Benno Lossin"
 <lossin@...nel.org>, "Andreas Hindborg" <a.hindborg@...nel.org>, "Alice
 Ryhl" <aliceryhl@...gle.com>, "Trevor Gross" <tmgross@...ch.edu>,
 <nouveau@...ts.freedesktop.org>, <rust-for-linux@...r.kernel.org>, "LKML"
 <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH 07/31] gpu: nova-core: set DMA mask width based on GPU
 architecture

On Wed Dec 3, 2025 at 5:58 AM GMT, John Hubbard wrote:
> This removes a "TODO" item in the code, which was hardcoded to work on
> Ampere and Ada GPUs. Hopper/Blackwell+ have a larger width, so do an
> early read of boot42, in order to pick the correct value.
>
> Signed-off-by: John Hubbard <jhubbard@...dia.com>
> ---
>  drivers/gpu/nova-core/driver.rs | 33 +++++++++++++++++----------------
>  drivers/gpu/nova-core/gpu.rs    | 29 ++++++++++++++++++++++++++++-
>  2 files changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
> index d91bbc50cde7..3179a4d47af4 100644
> --- a/drivers/gpu/nova-core/driver.rs
> +++ b/drivers/gpu/nova-core/driver.rs
> @@ -4,8 +4,10 @@
>      auxiliary,
>      c_str,
>      device::Core,
> -    dma::Device,
> -    dma::DmaMask,
> +    dma::{
> +        Device,
> +        DmaMask, //
> +    },
>      pci,
>      pci::{
>          Class,
> @@ -17,7 +19,10 @@
>      sync::Arc, //
>  };
>  
> -use crate::gpu::Gpu;
> +use crate::gpu::{
> +    read_architecture,
> +    Gpu, //
> +};
>  
>  #[pin_data]
>  pub(crate) struct NovaCore {
> @@ -28,14 +33,6 @@ pub(crate) struct NovaCore {
>  
>  const BAR0_SIZE: usize = SZ_16M;
>  
> -// For now we only support Ampere which can use up to 47-bit DMA addresses.
> -//
> -// TODO: Add an abstraction for this to support newer GPUs which may support
> -// larger DMA addresses. Limiting these GPUs to smaller address widths won't
> -// have any adverse affects, unless installed on systems which require larger
> -// DMA addresses. These systems should be quite rare.
> -const GPU_DMA_BITS: u32 = 47;
> -
>  pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
>  
>  kernel::pci_device_table!(
> @@ -73,11 +70,6 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self
>          pdev.enable_device_mem()?;
>          pdev.set_master();
>  
> -        // SAFETY: No concurrent DMA allocations or mappings can be made because
> -        // the device is still being probed and therefore isn't being used by
> -        // other threads of execution.
> -        unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
> -
>          let devres_bar = Arc::pin_init(
>              pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")),
>              GFP_KERNEL,
> @@ -88,6 +80,15 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self
>          let bar_clone = Arc::clone(&devres_bar);
>          let bar = bar_clone.access(pdev.as_ref())?;
>  
> +        // Read the GPU architecture early to determine the correct DMA address width.
> +        // Hopper/Blackwell+ support 52-bit DMA addresses, earlier architectures use 47-bit.
> +        let arch = read_architecture(bar)?;
> +
> +        // SAFETY: No concurrent DMA allocations or mappings can be made because
> +        // the device is still being probed and therefore isn't being used by
> +        // other threads of execution.
> +        unsafe { pdev.dma_set_mask_and_coherent(DmaMask::try_new(arch.dma_addr_bits())?)? };
> +
>          let this = KBox::pin_init(
>              try_pin_init!(Self {
>                  gpu <- Gpu::new(pdev, devres_bar, bar),
> diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
> index c21ce91924f5..624bbc2a54e8 100644
> --- a/drivers/gpu/nova-core/gpu.rs
> +++ b/drivers/gpu/nova-core/gpu.rs
> @@ -102,7 +102,7 @@ fn try_from(value: u32) -> Result<Self, Self::Error> {
>  });
>  
>  impl Chipset {
> -    pub(crate) fn arch(&self) -> Architecture {
> +    pub(crate) const fn arch(&self) -> Architecture {
>          match self {
>              Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => {
>                  Architecture::Turing
> @@ -155,6 +155,19 @@ pub(crate) enum Architecture {
>      Blackwell = 0x1b,
>  }
>  
> +impl Architecture {
> +    /// Returns the number of DMA address bits supported by this architecture.
> +    ///
> +    /// Hopper and Blackwell support 52-bit DMA addresses, while earlier architectures
> +    /// (Turing, Ampere, Ada) support 47-bit DMA addresses.
> +    pub(crate) const fn dma_addr_bits(&self) -> u32 {

How about just return `DmaMask` from here? This get rids of the fallible
constructor call of `DmaMask`.

> +        match self {
> +            Self::Turing | Self::Ampere | Self::Ada => 47,
> +            Self::Hopper | Self::Blackwell => 52,
> +        }
> +    }
> +}
> +
>  impl TryFrom<u8> for Architecture {
>      type Error = Error;
>  
> @@ -203,6 +216,20 @@ pub(crate) struct Spec {
>      revision: Revision,
>  }
>  
> +/// Reads the GPU architecture from BAR0 registers.
> +///
> +/// This is a lightweight check used early in probe to determine the correct DMA address width
> +/// before the full [`Spec`] is constructed.
> +pub(crate) fn read_architecture(bar: &Bar0) -> Result<Architecture> {
> +    let boot0 = regs::NV_PMC_BOOT_0::read(bar);
> +
> +    if boot0.is_older_than_fermi() {
> +        return Err(ENODEV);
> +    }
> +
> +    regs::NV_PMC_BOOT_42::read(bar).architecture()

Can this just be `Spec::new`?

Best,
Gary

> +}
> +
>  impl Spec {
>      fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
>          // Some brief notes about boot0 and boot42, in chronological order: