lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2ac2dc2f-552c-4084-872b-179a1e489156@amd.com>
Date: Mon, 9 Feb 2026 10:35:05 +0100
From: Christian König <christian.koenig@....com>
To: Hamza Mahfooz <someguy@...ective-light.com>,
 dri-devel@...ts.freedesktop.org
Cc: Timur Kristóf <timur.kristof@...il.com>,
 Michel Dänzer <michel.daenzer@...lbox.org>,
 Xaver Hugl <xaver.hugl@....org>, Harry Wentland <harry.wentland@....com>,
 Leo Li <sunpeng.li@....com>, Rodrigo Siqueira <siqueira@...lia.com>,
 Alex Deucher <alexander.deucher@....com>, David Airlie <airlied@...il.com>,
 Simona Vetter <simona@...ll.ch>,
 Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>,
 Maxime Ripard <mripard@...nel.org>, Thomas Zimmermann <tzimmermann@...e.de>,
 Ivan Lipski <ivan.lipski@....com>, Kenneth Feng <kenneth.feng@....com>,
 Alex Hung <alex.hung@....com>, Tom Chung <chiahsuan.chung@....com>,
 Melissa Wen <mwen@...lia.com>, Michel Dänzer
 <mdaenzer@...hat.com>, Fangzhi Zuo <Jerry.Zuo@....com>,
 amd-gfx@...ts.freedesktop.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 1/2] drm: introduce KMS recovery mechanism

On 2/7/26 00:58, Hamza Mahfooz wrote:
> There should be a mechanism for drivers to respond to flip_done
> timeouts. Since, as it stands it is possible for the display to stall
> indefinitely, necessitating a hard reset. So, introduce a new mechanism
> that tries various methods of recovery with increasing aggression, in
> the following order:
> 
> 1. Force a full modeset (have the compositor reprogram the state from
>    scratch).
> 3. As a last resort, have the driver attempt a vendor specific reset
>    (assuming it provides an implementation to
>    drm_crtc_funcs.page_flip_timeout()).
> 
> Signed-off-by: Hamza Mahfooz <someguy@...ective-light.com>
> ---
> v2: new to the series
> ---
>  drivers/gpu/drm/drm_atomic_helper.c | 36 ++++++++++++++++++++++++++---
>  include/drm/drm_crtc.h              |  9 ++++++++
>  include/drm/drm_device.h            | 24 +++++++++++++++++++
>  3 files changed, 66 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> index 5840e9cc6f66..f46d68418e32 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -42,6 +42,7 @@
>  #include <drm/drm_gem_atomic_helper.h>
>  #include <drm/drm_panic.h>
>  #include <drm/drm_print.h>
> +#include <drm/drm_probe_helper.h>
>  #include <drm/drm_self_refresh_helper.h>
>  #include <drm/drm_vblank.h>
>  #include <drm/drm_writeback.h>
> @@ -1881,11 +1882,40 @@ void drm_atomic_helper_wait_for_flip_done(struct drm_device *dev,
>  			continue;
>  
>  		ret = wait_for_completion_timeout(&commit->flip_done, 10 * HZ);
> -		if (ret == 0)
> -			drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
> -				crtc->base.id, crtc->name);
> +		if (!ret) {
> +			switch (dev->reset_phase) {
> +			case DRM_KMS_RESET_NONE:
> +				drm_err(dev, "[CRTC:%d:%s] flip_done timed out\n",
> +					crtc->base.id, crtc->name);
> +				dev->reset_phase = DRM_KMS_RESET_FORCE_MODESET;
> +				drm_kms_helper_hotplug_event(dev);
> +				break;
> +			case DRM_KMS_RESET_FORCE_MODESET:
> +				drm_err(dev, "[CRTC:%d:%s] force full modeset failed\n",
> +					crtc->base.id, crtc->name);
> +				dev->reset_phase = DRM_KMS_RESET_VENDOR;
> +				if (crtc->funcs->page_flip_timeout)
> +					crtc->funcs->page_flip_timeout(crtc);
> +				break;
> +			case DRM_KMS_RESET_VENDOR:
> +				drm_err(dev, "[CRTC:%d:%s] KMS recovery failed!\n",
> +					crtc->base.id, crtc->name);
> +				dev->reset_phase = DRM_KMS_RESET_GIVE_UP;
> +				break;
> +			default:
> +				break;
> +			}
> +
> +			goto exit;
> +		}
> +	}
> +
> +	if (dev->reset_phase) {
> +		drm_info(dev, "KMS recovery succeeded!\n");
> +		dev->reset_phase = DRM_KMS_RESET_NONE;
>  	}
>  
> +exit:
>  	if (state->fake_commit)
>  		complete_all(&state->fake_commit->flip_done);
>  }
> diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
> index 66278ffeebd6..45dc5a76e915 100644
> --- a/include/drm/drm_crtc.h
> +++ b/include/drm/drm_crtc.h
> @@ -609,6 +609,15 @@ struct drm_crtc_funcs {
>  				uint32_t flags, uint32_t target,
>  				struct drm_modeset_acquire_ctx *ctx);
>  
> +	/**
> +	 * @page_flip_timeout:
> +	 *
> +	 * This optional hook is called if &drm_crtc_commit.flip_done times out,
> +	 * and can be used by drivers to attempt to recover from a page flip
> +	 * timeout.
> +	 */
> +	void (*page_flip_timeout)(struct drm_crtc *crtc);

As far as I can see a callback is clearly not the right approach.

The drm_atomic_helper_wait_for_flip_done() helper is called by the driver, isn't it?

So what we need is just to give an error code back to the driver.

Regards,
Christian.

> +
>  	/**
>  	 * @set_property:
>  	 *
> diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h
> index bc78fb77cc27..1244d7527e7b 100644
> --- a/include/drm/drm_device.h
> +++ b/include/drm/drm_device.h
> @@ -66,6 +66,23 @@ enum switch_power_state {
>  	DRM_SWITCH_POWER_DYNAMIC_OFF = 3,
>  };
>  
> +/**
> + * enum drm_kms_reset_phase - reset phase of drm device
> + */
> +enum drm_kms_reset_phase {
> +	/** @DRM_KMS_RESET_NONE: Not currently attempting recovery */
> +	DRM_KMS_RESET_NONE,
> +
> +	/** @DRM_KMS_RESET_FORCE_MODESET: Force a full modeset */
> +	DRM_KMS_RESET_FORCE_MODESET,
> +
> +	/** @DRM_KMS_RESET_VENDOR: Attempt a vendor reset */
> +	DRM_KMS_RESET_VENDOR,
> +
> +	/** @DRM_KMS_RESET_GIVE_UP: All recovery methods failed */
> +	DRM_KMS_RESET_GIVE_UP,
> +};
> +
>  /**
>   * struct drm_device - DRM device structure
>   *
> @@ -375,6 +392,13 @@ struct drm_device {
>  	 * Root directory for debugfs files.
>  	 */
>  	struct dentry *debugfs_root;
> +
> +	/**
> +	 * @reset_phase:
> +	 *
> +	 * Reset phase that the device is in.
> +	 */
> +	enum drm_kms_reset_phase reset_phase;
>  };
>  
>  void drm_dev_set_dma_dev(struct drm_device *dev, struct device *dma_dev);


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ