[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251203055923.1247681-8-jhubbard@nvidia.com>
Date: Tue, 2 Dec 2025 21:58:59 -0800
From: John Hubbard <jhubbard@...dia.com>
To: Danilo Krummrich <dakr@...nel.org>
Cc: Alexandre Courbot <acourbot@...dia.com>,
Joel Fernandes <joelagnelf@...dia.com>,
Timur Tabi <ttabi@...dia.com>,
Alistair Popple <apopple@...dia.com>,
Edwin Peer <epeer@...dia.com>,
Zhi Wang <zhiw@...dia.com>,
David Airlie <airlied@...il.com>,
Simona Vetter <simona@...ll.ch>,
Bjorn Helgaas <bhelgaas@...gle.com>,
Miguel Ojeda <ojeda@...nel.org>,
Alex Gaynor <alex.gaynor@...il.com>,
Boqun Feng <boqun.feng@...il.com>,
Gary Guo <gary@...yguo.net>,
Björn Roy Baron <bjorn3_gh@...tonmail.com>,
Benno Lossin <lossin@...nel.org>,
Andreas Hindborg <a.hindborg@...nel.org>,
Alice Ryhl <aliceryhl@...gle.com>,
Trevor Gross <tmgross@...ch.edu>,
nouveau@...ts.freedesktop.org,
rust-for-linux@...r.kernel.org,
LKML <linux-kernel@...r.kernel.org>,
John Hubbard <jhubbard@...dia.com>
Subject: [PATCH 07/31] gpu: nova-core: set DMA mask width based on GPU architecture
This removes a "TODO" item in the code, which was hardcoded to work on
Ampere and Ada GPUs. Hopper/Blackwell+ have a larger width, so do an
early read of boot42, in order to pick the correct value.
Signed-off-by: John Hubbard <jhubbard@...dia.com>
---
drivers/gpu/nova-core/driver.rs | 33 +++++++++++++++++----------------
drivers/gpu/nova-core/gpu.rs | 29 ++++++++++++++++++++++++++++-
2 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/nova-core/driver.rs b/drivers/gpu/nova-core/driver.rs
index d91bbc50cde7..3179a4d47af4 100644
--- a/drivers/gpu/nova-core/driver.rs
+++ b/drivers/gpu/nova-core/driver.rs
@@ -4,8 +4,10 @@
auxiliary,
c_str,
device::Core,
- dma::Device,
- dma::DmaMask,
+ dma::{
+ Device,
+ DmaMask, //
+ },
pci,
pci::{
Class,
@@ -17,7 +19,10 @@
sync::Arc, //
};
-use crate::gpu::Gpu;
+use crate::gpu::{
+ read_architecture,
+ Gpu, //
+};
#[pin_data]
pub(crate) struct NovaCore {
@@ -28,14 +33,6 @@ pub(crate) struct NovaCore {
const BAR0_SIZE: usize = SZ_16M;
-// For now we only support Ampere which can use up to 47-bit DMA addresses.
-//
-// TODO: Add an abstraction for this to support newer GPUs which may support
-// larger DMA addresses. Limiting these GPUs to smaller address widths won't
-// have any adverse affects, unless installed on systems which require larger
-// DMA addresses. These systems should be quite rare.
-const GPU_DMA_BITS: u32 = 47;
-
pub(crate) type Bar0 = pci::Bar<BAR0_SIZE>;
kernel::pci_device_table!(
@@ -73,11 +70,6 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self
pdev.enable_device_mem()?;
pdev.set_master();
- // SAFETY: No concurrent DMA allocations or mappings can be made because
- // the device is still being probed and therefore isn't being used by
- // other threads of execution.
- unsafe { pdev.dma_set_mask_and_coherent(DmaMask::new::<GPU_DMA_BITS>())? };
-
let devres_bar = Arc::pin_init(
pdev.iomap_region_sized::<BAR0_SIZE>(0, c_str!("nova-core/bar0")),
GFP_KERNEL,
@@ -88,6 +80,15 @@ fn probe(pdev: &pci::Device<Core>, _info: &Self::IdInfo) -> Result<Pin<KBox<Self
let bar_clone = Arc::clone(&devres_bar);
let bar = bar_clone.access(pdev.as_ref())?;
+ // Read the GPU architecture early to determine the correct DMA address width.
+ // Hopper/Blackwell+ support 52-bit DMA addresses, earlier architectures use 47-bit.
+ let arch = read_architecture(bar)?;
+
+ // SAFETY: No concurrent DMA allocations or mappings can be made because
+ // the device is still being probed and therefore isn't being used by
+ // other threads of execution.
+ unsafe { pdev.dma_set_mask_and_coherent(DmaMask::try_new(arch.dma_addr_bits())?)? };
+
let this = KBox::pin_init(
try_pin_init!(Self {
gpu <- Gpu::new(pdev, devres_bar, bar),
diff --git a/drivers/gpu/nova-core/gpu.rs b/drivers/gpu/nova-core/gpu.rs
index c21ce91924f5..624bbc2a54e8 100644
--- a/drivers/gpu/nova-core/gpu.rs
+++ b/drivers/gpu/nova-core/gpu.rs
@@ -102,7 +102,7 @@ fn try_from(value: u32) -> Result<Self, Self::Error> {
});
impl Chipset {
- pub(crate) fn arch(&self) -> Architecture {
+ pub(crate) const fn arch(&self) -> Architecture {
match self {
Self::TU102 | Self::TU104 | Self::TU106 | Self::TU117 | Self::TU116 => {
Architecture::Turing
@@ -155,6 +155,19 @@ pub(crate) enum Architecture {
Blackwell = 0x1b,
}
+impl Architecture {
+ /// Returns the number of DMA address bits supported by this architecture.
+ ///
+ /// Hopper and Blackwell support 52-bit DMA addresses, while earlier architectures
+ /// (Turing, Ampere, Ada) support 47-bit DMA addresses.
+ pub(crate) const fn dma_addr_bits(&self) -> u32 {
+ match self {
+ Self::Turing | Self::Ampere | Self::Ada => 47,
+ Self::Hopper | Self::Blackwell => 52,
+ }
+ }
+}
+
impl TryFrom<u8> for Architecture {
type Error = Error;
@@ -203,6 +216,20 @@ pub(crate) struct Spec {
revision: Revision,
}
+/// Reads the GPU architecture from BAR0 registers.
+///
+/// This is a lightweight check used early in probe to determine the correct DMA address width
+/// before the full [`Spec`] is constructed.
+pub(crate) fn read_architecture(bar: &Bar0) -> Result<Architecture> {
+ let boot0 = regs::NV_PMC_BOOT_0::read(bar);
+
+ if boot0.is_older_than_fermi() {
+ return Err(ENODEV);
+ }
+
+ regs::NV_PMC_BOOT_42::read(bar).architecture()
+}
+
impl Spec {
fn new(dev: &device::Device, bar: &Bar0) -> Result<Spec> {
// Some brief notes about boot0 and boot42, in chronological order:
--
2.52.0
Powered by blists - more mailing lists