[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAPM=9tw-Mkw95g=VDgjvqQha8KaTM7e6Qs2b3bvTaNUdz-Q7Kg@mail.gmail.com>
Date: Wed, 21 Jan 2026 07:36:05 +1000
From: Dave Airlie <airlied@...il.com>
To: Li Chen <me@...ux.beauty>
Cc: Lyude Paul <lyude@...hat.com>, Danilo Krummrich <dakr@...nel.org>,
Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>, Maxime Ripard <mripard@...nel.org>,
Thomas Zimmermann <tzimmermann@...e.de>, Simona Vetter <simona@...ll.ch>, dri-devel@...ts.freedesktop.org,
nouveau@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] nouveau: pci: quiesce GPU on shutdown
On Tue, 20 Jan 2026 at 22:15, Li Chen <me@...ux.beauty> wrote:
>
> Kexec reboot does not reset PCI devices.
> Invoking the full DRM/TTM teardown from ->shutdown can trigger WARNs when
> userspace still holds DRM file descriptors.
>
> Quiesce the GPU through the suspend path and then power down the PCI
> function so the next kernel can re-initialize the device from a consistent
> state.
>
> WARNING: drivers/gpu/drm/drm_mode_config.c:578 at drm_mode_config_cleanup+0x2e7/0x300, CPU#2: kexec/1300
> Call Trace:
> <TASK>
> ? srso_return_thunk+0x5/0x5f
> ? enable_work+0x3a/0x100
> nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> pci_device_shutdown+0x35/0x60
> device_shutdown+0x11c/0x1b0
> kernel_kexec+0x13a/0x160
> __do_sys_reboot+0x209/0x240
> do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? __rtnl_unlock+0x37/0x70
> ? srso_return_thunk+0x5/0x5f
> ? netdev_run_todo+0x63/0x570
> ? netif_change_flags+0x54/0x70
> ? srso_return_thunk+0x5/0x5f
> ? devinet_ioctl+0x1e5/0x790
> ? srso_return_thunk+0x5/0x5f
> ? inet_ioctl+0x1e9/0x200
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x7d/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x23b/0x610
> ? srso_return_thunk+0x5/0x5f
> ? put_user_ifreq+0x7a/0x90
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x107/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7e/0x1a0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> nouveau 0000:26:00.0: [drm] drm_WARN_ON(!list_empty(&fb->filp_head))
> WARNING: drivers/gpu/drm/drm_framebuffer.c:833 at drm_framebuffer_free+0x73/0xa0, CPU#2: kexec/1300
> Call Trace:
> <TASK>
> drm_mode_config_cleanup+0x248/0x300
> ? __pfx___drm_printfn_dbg+0x10/0x10
> ? drm_mode_config_cleanup+0x1dc/0x300
> nouveau_display_destroy+0x39/0x70 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_device_fini+0x7b/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> pci_device_shutdown+0x35/0x60
> device_shutdown+0x11c/0x1b0
> kernel_kexec+0x13a/0x160
> __do_sys_reboot+0x209/0x240
> do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? __rtnl_unlock+0x37/0x70
> ? srso_return_thunk+0x5/0x5f
> ? netdev_run_todo+0x63/0x570
> ? netif_change_flags+0x54/0x70
> ? srso_return_thunk+0x5/0x5f
> ? devinet_ioctl+0x1e5/0x790
> ? srso_return_thunk+0x5/0x5f
> ? inet_ioctl+0x1e9/0x200
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x7d/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x23b/0x610
> ? srso_return_thunk+0x5/0x5f
> ? put_user_ifreq+0x7a/0x90
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x107/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7e/0x1a0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> WARNING: include/drm/ttm/ttm_resource.h:406 at nouveau_ttm_fini+0x257/0x270 [nouveau], CPU#2: kexec/1300
> Call Trace:
> <TASK>
> nouveau_drm_device_fini+0x93/0x1f0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> nouveau_drm_shutdown+0x52/0xc0 [nouveau c19e0da7fd83583a023f855c510d9a3903808734]
> pci_device_shutdown+0x35/0x60
> device_shutdown+0x11c/0x1b0
> kernel_kexec+0x13a/0x160
> __do_sys_reboot+0x209/0x240
> do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? __rtnl_unlock+0x37/0x70
> ? srso_return_thunk+0x5/0x5f
> ? netdev_run_todo+0x63/0x570
> ? netif_change_flags+0x54/0x70
> ? srso_return_thunk+0x5/0x5f
> ? devinet_ioctl+0x1e5/0x790
> ? srso_return_thunk+0x5/0x5f
> ? inet_ioctl+0x1e9/0x200
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x7d/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x23b/0x610
> ? srso_return_thunk+0x5/0x5f
> ? put_user_ifreq+0x7a/0x90
> ? srso_return_thunk+0x5/0x5f
> ? sock_do_ioctl+0x107/0x130
> ? srso_return_thunk+0x5/0x5f
> ? __x64_sys_ioctl+0x97/0xe0
> ? srso_return_thunk+0x5/0x5f
> ? do_syscall_64+0x81/0x610
> ? srso_return_thunk+0x5/0x5f
> ? exc_page_fault+0x7e/0x1a0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> Signed-off-by: Li Chen <me@...ux.beauty>
> ---
> drivers/gpu/drm/nouveau/nouveau_drm.c | 24 ++++++++++++++++++++++++
> 1 file changed, 24 insertions(+)
>
> diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
> index 1527b801f013..50384462723b 100644
> --- a/drivers/gpu/drm/nouveau/nouveau_drm.c
> +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
> @@ -1079,6 +1079,29 @@ nouveau_pmops_resume(struct device *dev)
> return ret;
> }
>
> +static void
> +nouveau_drm_shutdown(struct pci_dev *pdev)
> +{
> + struct nouveau_drm *drm = pci_get_drvdata(pdev);
> + int ret;
> +
> + if (!drm)
> + return;
> +
> + if (drm->dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
> + drm->dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
> + return;
> +
> + ret = nouveau_do_suspend(drm, false);
> + if (ret)
> + NV_ERROR(drm, "shutdown suspend failed with: %d\n", ret);
> +
> + pci_save_state(pdev);
> + pci_disable_device(pdev);
> + pci_set_power_state(pdev, PCI_D3hot);
> + usleep_range(200, 400);\
Why is this needed? it at least needs a comment.
Dave.
Powered by blists - more mailing lists