[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251125135247.62878956.alex@shazbot.org>
Date: Tue, 25 Nov 2025 13:52:47 -0700
From: Alex Williamson <alex@...zbot.org>
To: <ankita@...dia.com>
Cc: <jgg@...pe.ca>, <yishaih@...dia.com>, <skolothumtho@...dia.com>,
<kevin.tian@...el.com>, <aniketa@...dia.com>, <vsethi@...dia.com>,
<mochs@...dia.com>, <Yunxiang.Li@....com>, <yi.l.liu@...el.com>,
<zhangdongdong@...incomputing.com>, <avihaih@...dia.com>,
<bhelgaas@...gle.com>, <peterx@...hat.com>, <pstanner@...hat.com>,
<apopple@...dia.com>, <kvm@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <cjia@...dia.com>, <kwankhede@...dia.com>,
<targupta@...dia.com>, <zhiw@...dia.com>, <danw@...dia.com>,
<dnigam@...dia.com>, <kjaju@...dia.com>
Subject: Re: [PATCH v6 5/6] vfio/nvgrace-gpu: Inform devmem unmapped after
reset
On Tue, 25 Nov 2025 17:30:12 +0000
<ankita@...dia.com> wrote:
> From: Ankit Agrawal <ankita@...dia.com>
>
> Introduce a new flag reset_done to notify that the GPU has just
> been reset and the mapping to the GPU memory is zapped.
>
> Implement the reset_done handler to set this new variable. It
> will be used later in the patches to wait for the GPU memory
> to be ready before doing any mapping or access.
>
> cc: Jason Gunthorpe <jgg@...pe.ca>
> Suggested-by: Alex Williamson <alex@...zbot.org>
> Signed-off-by: Ankit Agrawal <ankita@...dia.com>
> ---
> drivers/vfio/pci/nvgrace-gpu/main.c | 19 ++++++++++++++++++-
> 1 file changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
> index 2b736cb82f38..7d5544280ed2 100644
> --- a/drivers/vfio/pci/nvgrace-gpu/main.c
> +++ b/drivers/vfio/pci/nvgrace-gpu/main.c
> @@ -58,6 +58,8 @@ struct nvgrace_gpu_pci_core_device {
> /* Lock to control device memory kernel mapping */
> struct mutex remap_lock;
> bool has_mig_hw_bug;
> + /* GPU has just been reset */
> + bool reset_done;
> };
>
> static void nvgrace_gpu_init_fake_bar_emu_regs(struct vfio_device *core_vdev)
> @@ -1047,12 +1049,27 @@ static const struct pci_device_id nvgrace_gpu_vfio_pci_table[] = {
>
> MODULE_DEVICE_TABLE(pci, nvgrace_gpu_vfio_pci_table);
>
/*
* Comment explaining why this can't use lockdep_assert_held_write but
* in vfio use cases relies on this for serialization against faults and
* read/write.
*/
Thanks,
Alex
> +static void nvgrace_gpu_vfio_pci_reset_done(struct pci_dev *pdev)
> +{
> + struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
> + struct nvgrace_gpu_pci_core_device *nvdev =
> + container_of(core_device, struct nvgrace_gpu_pci_core_device,
> + core_device);
> +
> + nvdev->reset_done = true;
> +}
> +
> +static const struct pci_error_handlers nvgrace_gpu_vfio_pci_err_handlers = {
> + .reset_done = nvgrace_gpu_vfio_pci_reset_done,
> + .error_detected = vfio_pci_core_aer_err_detected,
> +};
> +
> static struct pci_driver nvgrace_gpu_vfio_pci_driver = {
> .name = KBUILD_MODNAME,
> .id_table = nvgrace_gpu_vfio_pci_table,
> .probe = nvgrace_gpu_probe,
> .remove = nvgrace_gpu_remove,
> - .err_handler = &vfio_pci_core_err_handlers,
> + .err_handler = &nvgrace_gpu_vfio_pci_err_handlers,
> .driver_managed_dma = true,
> };
>
Powered by blists - more mailing lists