[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <DDJJ5745OETR.3TXR3ZVYM4E8T@nvidia.com>
Date: Thu, 16 Oct 2025 15:24:17 +0900
From: "Alexandre Courbot" <acourbot@...dia.com>
To: "Alistair Popple" <apopple@...dia.com>,
<rust-for-linux@...r.kernel.org>, <dri-devel@...ts.freedesktop.org>,
<dakr@...nel.org>, <acourbot@...dia.com>
Cc: "Miguel Ojeda" <ojeda@...nel.org>, "Alex Gaynor"
<alex.gaynor@...il.com>, "Boqun Feng" <boqun.feng@...il.com>, "Gary Guo"
<gary@...yguo.net>, Björn Roy Baron
<bjorn3_gh@...tonmail.com>, "Benno Lossin" <lossin@...nel.org>, "Andreas
Hindborg" <a.hindborg@...nel.org>, "Alice Ryhl" <aliceryhl@...gle.com>,
"Trevor Gross" <tmgross@...ch.edu>, "David Airlie" <airlied@...il.com>,
"Simona Vetter" <simona@...ll.ch>, "Maarten Lankhorst"
<maarten.lankhorst@...ux.intel.com>, "Maxime Ripard" <mripard@...nel.org>,
"Thomas Zimmermann" <tzimmermann@...e.de>, "John Hubbard"
<jhubbard@...dia.com>, "Joel Fernandes" <joelagnelf@...dia.com>, "Timur
Tabi" <ttabi@...dia.com>, <linux-kernel@...r.kernel.org>,
<nouveau@...ts.freedesktop.org>
Subject: Re: [PATCH v5 07/14] gpu: nova-core: gsp: Add GSP command queue
handling
On Mon Oct 13, 2025 at 3:20 PM JST, Alistair Popple wrote:
> This commit introduces core infrastructure for handling GSP command and
> message queues in the nova-core driver. The command queue system enables
> bidirectional communication between the host driver and GSP firmware
> through a remote message passing interface.
>
> The interface is based on passing serialised data structures over a ring
> buffer with separate transmit and receive queues. Commands are sent by
> writing to the CPU transmit queue and waiting for completion via the
> receive queue.
>
> To ensure safety mutable or immutable (depending on whether it is a send
> or receive operation) references are taken on the command queue when
> allocating the message to write/read to. This ensures message memory
> remains valid and the command queue can't be mutated whilst an operation
> is in progress.
>
> Currently this is only used by the probe() routine and therefore can
> only used by a single thread of execution. Locking to enable safe access
> from multiple threads will be introduced in a future series when that
> becomes necessary.
>
> Signed-off-by: Alistair Popple <apopple@...dia.com>
>
> ---
>
> Changes for v4:
> - Use read_poll_timeout() instead of wait_on()
> - Switch to using `init!` (Thanks Alex for figuring out the
> required workarounds)
> - Pass the enum type into the RPC bindings instead of a raw u32
> - Fixup the TODOs for extracting/allocating the send command regions
> - Split the sending functions into one taking just a command struct and
> another taking a command struct with extra payload
>
> Changes for v3:
> - Reduce the receive payloads to the correct size
> - Use opaque bindings
> - Clean up of the command queue PTE creation
> - Add an enum for the GSP functions
> - Rename GspCommandToGsp and GspMessageFromGsp
> - Rename GspCmdq
> - Add function to notify GSP of updated queue pointers
> - Inline driver area access functions
> - Fixup receive area calculations
>
> Changes for v2:
> - Rebased on Alex's latest series
> ---
> drivers/gpu/nova-core/gsp.rs | 9 +
> drivers/gpu/nova-core/gsp/cmdq.rs | 493 ++++++++++++++++++++++++++++++
> drivers/gpu/nova-core/regs.rs | 4 +
> drivers/gpu/nova-core/sbuffer.rs | 2 -
> scripts/Makefile.build | 2 +-
> 5 files changed, 507 insertions(+), 3 deletions(-)
> create mode 100644 drivers/gpu/nova-core/gsp/cmdq.rs
>
> diff --git a/drivers/gpu/nova-core/gsp.rs b/drivers/gpu/nova-core/gsp.rs
> index 554eb1a34ee7..1d472c5fad7a 100644
> --- a/drivers/gpu/nova-core/gsp.rs
> +++ b/drivers/gpu/nova-core/gsp.rs
> @@ -2,6 +2,7 @@
>
> mod boot;
>
> +use kernel::alloc::flags::GFP_KERNEL;
This import should not be needed, `GFP_KERNEL` being part of the
prelude.
> use kernel::device;
> use kernel::dma::CoherentAllocation;
> use kernel::dma::DmaAddress;
> @@ -11,6 +12,7 @@
> use kernel::transmute::AsBytes;
>
> use crate::fb::FbLayout;
> +use crate::gsp::cmdq::Cmdq;
>
> pub(crate) use fw::{GspFwWprMeta, LibosParams};
>
> @@ -18,6 +20,8 @@
>
> use fw::LibosMemoryRegionInitArgument;
>
> +pub(crate) mod cmdq;
> +
> pub(crate) const GSP_PAGE_SHIFT: usize = 12;
> pub(crate) const GSP_PAGE_SIZE: usize = 1 << GSP_PAGE_SHIFT;
>
> @@ -31,6 +35,7 @@ pub(crate) struct Gsp {
> loginit: LogBuffer,
> logintr: LogBuffer,
> logrm: LogBuffer,
> + pub(crate) cmdq: Cmdq,
> }
>
> #[repr(C)]
> @@ -110,11 +115,15 @@ pub(crate) fn new(pdev: &pci::Device<device::Bound>) -> Result<impl PinInit<Self
> let logrm = LogBuffer::new(dev)?;
> dma_write!(libos[2] = LibosMemoryRegionInitArgument::new("LOGRM", &logrm.0)?)?;
>
> + // Creates its own PTE array.
> + let cmdq = Cmdq::new(dev)?;
I believe this comment is a leftover.
> +
> Ok(try_pin_init!(Self {
> libos,
> loginit,
> logintr,
> logrm,
> + cmdq,
> }))
> }
> }
> diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
> new file mode 100644
> index 000000000000..3f8cb7a35922
> --- /dev/null
> +++ b/drivers/gpu/nova-core/gsp/cmdq.rs
> @@ -0,0 +1,493 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +use core::mem::offset_of;
> +use core::sync::atomic::fence;
> +use core::sync::atomic::Ordering;
> +
> +use kernel::alloc::flags::GFP_KERNEL;
This is one not needed either.
> +use kernel::device;
> +use kernel::dma::CoherentAllocation;
> +use kernel::dma_write;
> +use kernel::io::poll::read_poll_timeout;
> +use kernel::prelude::*;
> +use kernel::sync::aref::ARef;
> +use kernel::time::Delta;
> +use kernel::transmute::{AsBytes, FromBytes};
> +
> +use crate::driver::Bar0;
> +use crate::gsp::fw::{GspMsgElement, MsgFunction, MsgqRxHeader, MsgqTxHeader};
> +use crate::gsp::PteArray;
> +use crate::gsp::{GSP_PAGE_SHIFT, GSP_PAGE_SIZE};
> +use crate::regs::NV_PGSP_QUEUE_HEAD;
The custom with registers to just import the `regs` module and prefix
all registers with `regs::`.
> +use crate::sbuffer::SBufferIter;
> +
> +pub(crate) trait CommandToGsp: Sized + FromBytes + AsBytes {
> + const FUNCTION: MsgFunction;
> +}
> +
> +pub(crate) trait CommandToGspWithPayload: CommandToGsp {}
Unfortunately this hierarchy won't work, because this way a command that
is expected to take a payload can be sent without any.
I'd suggest instead having a `CommandToGspBase` trait that sets the
FUNCTION, and have `CommandToGsp` and `CommandToGspWithPayload` require
it.
> +
> +pub(crate) trait MessageFromGsp: Sized + FromBytes + AsBytes {
> + const FUNCTION: MsgFunction;
> +}
Just a bit of documentation for these new traits please. :)
(also applies to all undocumented types/methods in this file)
<snip>
> +impl DmaGspMem {
> + fn new(dev: &device::Device<device::Bound>) -> Result<Self> {
> + const MSGQ_SIZE: u32 = size_of::<Msgq>() as u32;
> + const RX_HDR_OFF: u32 = offset_of!(Msgq, rx) as u32;
> +
> + let gsp_mem =
> + CoherentAllocation::<GspMem>::alloc_coherent(dev, 1, GFP_KERNEL | __GFP_ZERO)?;
> + dma_write!(gsp_mem[0].ptes = PteArray::new(gsp_mem.dma_handle())?)?;
> + dma_write!(gsp_mem[0].cpuq.tx = MsgqTxHeader::new(MSGQ_SIZE, RX_HDR_OFF, MSGQ_NUM_PAGES))?;
> + dma_write!(gsp_mem[0].cpuq.rx = MsgqRxHeader::new())?;
> +
> + Ok(Self(gsp_mem))
> + }
> +
> + // Allocates the various regions for the command and reduces the payload size
> + // to match what is needed for the command.
Let's also explain what the elements in the returned tuple are.
> + //
> + // # Errors
> + //
> + // Returns `Err(EAGAIN)` if the driver area is too small to hold the
> + // requested command.
> + fn allocate_command_regions<'a, M: CommandToGsp>(
> + &'a mut self,
> + payload_size: usize,
> + ) -> Result<(&'a mut GspMsgElement, &'a mut M, &'a mut [u8], &'a mut [u8])> {
> + let driver_area = self.driver_write_area();
> + let msg_size = size_of::<GspMsgElement>() + size_of::<M>() + payload_size;
> + let driver_area_size = (driver_area.0.len() + driver_area.1.len()) << GSP_PAGE_SHIFT;
> +
> + if msg_size > driver_area_size {
> + return Err(EAGAIN);
> + }
> +
> + #[allow(clippy::incompatible_msrv)]
> + let (msg_header_slice, slice_1) = driver_area
> + .0
> + .as_flattened_mut()
We are going to have a slight problem here. `as_flattened_mut` is only
available since Rust 1.80... and we need to support 1.78.
I suppose we could work this around by defining a custom function tht
just calls `as_flattened_mut` in Rust versions that support it and
provides an alternative implementation for those that don't, but I
wonder whether there is a better way.
> + .split_at_mut(size_of::<GspMsgElement>());
> + let msg_header = GspMsgElement::from_bytes_mut(msg_header_slice).ok_or(EINVAL)?;
> + let (cmd_slice, payload_1) = slice_1.split_at_mut(size_of::<M>());
> + let cmd = M::from_bytes_mut(cmd_slice).ok_or(EINVAL)?;
> +
> + #[allow(clippy::incompatible_msrv)]
> + let payload_2 = driver_area.1.as_flattened_mut();
> +
> + let (payload_1, payload_2) = if payload_1.len() > payload_size {
> + // Payload fits entirely in payload_1
> + (&mut payload_1[..payload_size], &mut payload_2[0..0])
> + } else {
> + // Need all of payload_1 and some of payload_2
> + let payload_2_len = payload_size - payload_1.len();
> + (payload_1, &mut payload_2[..payload_2_len])
> + };
> +
> + Ok((msg_header, cmd, payload_1, payload_2))
> + }
We should probably add a few comments in this function to explain the
steps, otherwise it can be a bit difficult to parse.
> + fn calculate_checksum<T: Iterator<Item = u8>>(it: T) -> u32 {
> + let sum64 = it
> + .enumerate()
> + .map(|(idx, byte)| (((idx % 8) * 8) as u32, byte))
> + .fold(0, |acc, (rol, byte)| acc ^ u64::from(byte).rotate_left(rol));
> +
> + ((sum64 >> 32) as u32) ^ (sum64 as u32)
> + }
> +
> + // Notify GSP that we have updated the command queue pointers.
> + fn notify_gsp(bar: &Bar0) {
> + NV_PGSP_QUEUE_HEAD::default().set_address(0).write(bar);
> + }
Can we move this method to `regs.rs`, as an impl block of
`NV_PGSP_QUEUE_HEAD`? It makes more sense to have it there imho.
> +
> + #[expect(unused)]
> + pub(crate) fn send_gsp_command<M, E>(&mut self, bar: &Bar0, init: impl Init<M, E>) -> Result
> + where
> + M: CommandToGsp,
> + // This allows all error types, including `Infallible`, to be used with `init`. Without
> + // this we cannot use regular stack objects as `init` since their `Init` implementation
> + // does not return any error.
> + Error: From<E>,
> + {
> + #[repr(C)]
> + struct FullCommand<M> {
> + hdr: GspMsgElement,
> + cmd: M,
> + }
> + let (msg_header, cmd, _, _) = self.gsp_mem.allocate_command_regions::<M>(0)?;
> +
> + let seq = self.seq;
> + let initializer = try_init!(FullCommand {
> + hdr <- GspMsgElement::init(seq, size_of::<M>(), M::FUNCTION),
> + cmd <- init,
> + });
> +
> + // Fill the header and command in-place.
> + // SAFETY:
> + // - allocate_command_regions guarantees msg_header points to enough
> + // space in the command queue to contain FullCommand
> + // - __init ensures the command header and struct a fully initialized
> + unsafe {
> + initializer.__init(msg_header.as_bytes_mut().as_mut_ptr().cast())?;
> + }
> +
> + msg_header.set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([
> + msg_header.as_bytes(),
> + cmd.as_bytes(),
> + ])));
> +
> + dev_info!(
> + &self.dev,
> + "GSP RPC: send: seq# {}, function=0x{:x} ({}), length=0x{:x}\n",
> + self.seq,
> + msg_header.function_number(),
> + msg_header.function()?,
> + msg_header.length(),
> + );
> +
> + let elem_count = msg_header.element_count();
> + self.seq += 1;
> + self.gsp_mem.advance_cpu_write_ptr(elem_count);
> + Cmdq::notify_gsp(bar);
> +
> + Ok(())
> + }
> +
> + #[expect(unused)]
> + pub(crate) fn send_gsp_command_with_payload<M, E>(
> + &mut self,
> + bar: &Bar0,
> + payload_size: usize,
> + init: impl Init<M, E>,
> + init_payload: impl FnOnce(SBufferIter<core::array::IntoIter<&mut [u8], 2>>) -> Result,
> + ) -> Result
> + where
> + M: CommandToGspWithPayload,
> + // This allows all error types, including `Infallible`, to be used with `init`. Without
> + // this we cannot use regular stack objects as `init` since their `Init` implementation
> + // does not return any error.
> + Error: From<E>,
> + {
> + #[repr(C)]
> + struct FullCommand<M> {
> + hdr: GspMsgElement,
> + cmd: M,
> + }
> + let (msg_header, cmd, payload_1, payload_2) =
> + self.gsp_mem.allocate_command_regions::<M>(payload_size)?;
> +
> + let seq = self.seq;
> + let initializer = try_init!(FullCommand {
> + hdr <- GspMsgElement::init(seq, size_of::<M>() + payload_size, M::FUNCTION),
> + cmd <- init,
> + });
> +
> + // Fill the header and command in-place.
> + // SAFETY:
> + // - allocate_command_regions guarantees msg_header points to enough
> + // space in the command queue to contain FullCommand
> + // - __init ensures the command header and struct a fully initialized
> + unsafe {
> + initializer.__init(msg_header.as_bytes_mut().as_mut_ptr().cast())?;
> + }
> +
> + // Fill the payload
> + let sbuffer = SBufferIter::new_writer([&mut payload_1[..], &mut payload_2[..]]);
> + init_payload(sbuffer)?;
> +
> + msg_header.set_checksum(Cmdq::calculate_checksum(SBufferIter::new_reader([
> + msg_header.as_bytes(),
> + cmd.as_bytes(),
> + payload_1,
> + payload_2,
> + ])));
> +
> + dev_info!(
> + &self.dev,
> + "GSP RPC: send: seq# {}, function=0x{:x} ({}), length=0x{:x}\n",
> + self.seq,
> + msg_header.function_number(),
> + msg_header.function()?,
> + msg_header.length(),
> + );
> +
> + let elem_count = msg_header.element_count();
> + self.seq += 1;
> + self.gsp_mem.advance_cpu_write_ptr(elem_count);
> + Cmdq::notify_gsp(bar);
> +
> + Ok(())
> + }
I think we can factorize most of `send_gsp_command` and
`send_gsp_command_with_payload` into a private method that requires a
`CommandToGspBase` and takes the same parameters as
`send_gsp_command_with_payload`. All it would need to do when called
from `send_gsp_command` is take a closure that doesn't write any
payload.
> +
> + #[expect(unused)]
> + pub(crate) fn receive_msg_from_gsp<M: MessageFromGsp, R>(
> + &mut self,
> + timeout: Delta,
> + init: impl FnOnce(&M, SBufferIter<core::array::IntoIter<&[u8], 2>>) -> Result<R>,
> + ) -> Result<R> {
> + let driver_area = read_poll_timeout(
> + || Ok(self.gsp_mem.driver_read_area()),
> + |driver_area: &(&[[u8; 4096]], &[[u8; 4096]])| !driver_area.0.is_empty(),
> + Delta::from_millis(10),
> + timeout,
> + )?;
> +
> + #[allow(clippy::incompatible_msrv)]
> + let (msg_header_slice, slice_1) = driver_area
> + .0
> + .as_flattened()
> + .split_at(size_of::<GspMsgElement>());
> + let msg_header = GspMsgElement::from_bytes(msg_header_slice).ok_or(EIO)?;
> + if msg_header.length() < size_of::<M>() as u32 {
> + return Err(EIO);
> + }
> +
> + let function: MsgFunction = msg_header.function().map_err(|_| {
> + dev_info!(
> + self.dev,
> + "GSP RPC: receive: seq# {}, bad function=0x{:x}, length=0x{:x}\n",
> + msg_header.sequence(),
> + msg_header.function_number(),
> + msg_header.length(),
> + );
> + EIO
> + })?;
> +
> + // Log RPC receive with message type decoding
> + dev_info!(
> + self.dev,
> + "GSP RPC: receive: seq# {}, function=0x{:x} ({}), length=0x{:x}\n",
> + msg_header.sequence(),
> + msg_header.function_number(),
> + function,
> + msg_header.length(),
> + );
> +
> + let (cmd_slice, payload_1) = slice_1.split_at(size_of::<M>());
> + #[allow(clippy::incompatible_msrv)]
> + let payload_2 = driver_area.1.as_flattened();
> +
> + // Cut the payload slice(s) down to the actual length of the payload.
> + let (cmd_payload_1, cmd_payload_2) =
> + if payload_1.len() > msg_header.length() as usize - size_of::<M>() {
> + (
> + payload_1
> + .split_at(msg_header.length() as usize - size_of::<M>())
> + .0,
> + &payload_2[0..0],
> + )
> + } else {
> + (
> + payload_1,
> + payload_2
> + .split_at(msg_header.length() as usize - size_of::<M>() - payload_1.len())
> + .0,
> + )
> + };
> +
> + if Cmdq::calculate_checksum(SBufferIter::new_reader([
> + msg_header.as_bytes(),
> + cmd_slice,
> + cmd_payload_1,
> + cmd_payload_2,
> + ])) != 0
> + {
> + dev_err!(
> + self.dev,
> + "GSP RPC: receive: Call {} - bad checksum",
> + msg_header.sequence()
> + );
> + return Err(EIO);
> + }
> +
> + let result = if function == M::FUNCTION {
> + let cmd = M::from_bytes(cmd_slice).ok_or(EINVAL)?;
> + let sbuffer = SBufferIter::new_reader([cmd_payload_1, cmd_payload_2]);
> + init(cmd, sbuffer)
> + } else {
> + Err(ERANGE)
> + };
> +
> + self.gsp_mem
> + .advance_cpu_read_ptr(msg_header.length().div_ceil(GSP_PAGE_SIZE as u32));
> + result
> + }
Here we probably also want to comment the different steps a bit.
Powered by blists - more mailing lists