linux-kernel - Re: [PATCH 2/2] drm/msm/devfreq: Add 1ms delay before clamping freq

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0b15a2b6-1fd1-1e52-4896-e588272b25d3@linaro.org>
Date:   Sat, 16 Oct 2021 17:08:59 +0100
From:   Caleb Connolly <caleb.connolly@...aro.org>
To:     Rob Clark <robdclark@...il.com>, dri-devel@...ts.freedesktop.org
Cc:     freedreno@...ts.freedesktop.org, linux-arm-msm@...r.kernel.org,
        Jordan Crouse <jordan@...micpenguin.net>,
        Rob Clark <robdclark@...omium.org>,
        Sean Paul <sean@...rly.run>, David Airlie <airlied@...ux.ie>,
        Daniel Vetter <daniel@...ll.ch>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] drm/msm/devfreq: Add 1ms delay before clamping freq


On 28/09/2021 00:04, Rob Clark wrote:
> From: Rob Clark <robdclark@...omium.org>
> 
> Add a short delay before clamping to idle frequency on active->idle
> transition.  It takes ~0.5ms to increase the freq again on the next
> idle->active transition, so this helps avoid extra freq transitions
> on workloads that bounce between CPU and GPU.
> 
> Signed-off-by: Rob Clark <robdclark@...omium.org>
> ---
> Note that this sort of re-introduces the theoretical race solved
> by [1].. but that should not be a problem with something along the
> lines of [2].
> 
> [1] https://patchwork.freedesktop.org/patch/455910/?series=95111&rev=1
> [2] https://patchwork.freedesktop.org/patch/455928/?series=95119&rev=1
> 
>   drivers/gpu/drm/msm/msm_gpu.h         |  7 +++++
>   drivers/gpu/drm/msm/msm_gpu_devfreq.c | 38 +++++++++++++++++++++------
>   2 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 32a859307e81..2fcb6c195865 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -120,6 +120,13 @@ struct msm_gpu_devfreq {
>   	 * it is inactive.
>   	 */
>   	unsigned long idle_freq;
> +
> +	/**
> +	 * idle_work:
> +	 *
> +	 * Used to delay clamping to idle freq on active->idle transition.
> +	 */
> +	struct msm_hrtimer_work idle_work;
>   };
> 
>   struct msm_gpu {
> diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> index 15b64f35c0f6..36e1930ee26d 100644
> --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> @@ -96,8 +96,12 @@ static struct devfreq_dev_profile msm_devfreq_profile = {
>   	.get_cur_freq = msm_devfreq_get_cur_freq,
>   };
> 
> +static void msm_devfreq_idle_work(struct kthread_work *work);
> +
>   void msm_devfreq_init(struct msm_gpu *gpu)
>   {
> +	struct msm_gpu_devfreq *df = &gpu->devfreq;
> +
>   	/* We need target support to do devfreq */
>   	if (!gpu->funcs->gpu_busy)
>   		return;
> @@ -113,25 +117,27 @@ void msm_devfreq_init(struct msm_gpu *gpu)
>   	msm_devfreq_profile.freq_table = NULL;
>   	msm_devfreq_profile.max_state = 0;
> 
> -	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
> +	df->devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
>   			&msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND,
>   			NULL);
> 
> -	if (IS_ERR(gpu->devfreq.devfreq)) {
> +	if (IS_ERR(df->devfreq)) {
>   		DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
> -		gpu->devfreq.devfreq = NULL;
> +		df->devfreq = NULL;
>   		return;
>   	}
> 
> -	devfreq_suspend_device(gpu->devfreq.devfreq);
> +	devfreq_suspend_device(df->devfreq);
> 
> -	gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node,
> -			gpu->devfreq.devfreq);
> +	gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, df->devfreq);
>   	if (IS_ERR(gpu->cooling)) {
>   		DRM_DEV_ERROR(&gpu->pdev->dev,
>   				"Couldn't register GPU cooling device\n");
>   		gpu->cooling = NULL;
>   	}
> +
> +	msm_hrtimer_work_init(&df->idle_work, gpu->worker, msm_devfreq_idle_work,
> +			      CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>   }
> 
>   void msm_devfreq_cleanup(struct msm_gpu *gpu)
> @@ -179,6 +185,11 @@ void msm_devfreq_active(struct msm_gpu *gpu)
>   	unsigned int idle_time;
>   	unsigned long target_freq = df->idle_freq;
> 
> +	/*
> +	 * Cancel any pending transition to idle frequency:
> +	 */
> +	hrtimer_cancel(&df->idle_work.timer);
> +
>   	/*
>   	 * Hold devfreq lock to synchronize with get_dev_status()/
>   	 * target() callbacks
> @@ -209,9 +220,12 @@ void msm_devfreq_active(struct msm_gpu *gpu)
>   	mutex_unlock(&df->devfreq->lock);
>   }
> 
> -void msm_devfreq_idle(struct msm_gpu *gpu)
> +
> +static void msm_devfreq_idle_work(struct kthread_work *work)
>   {
> -	struct msm_gpu_devfreq *df = &gpu->devfreq;
> +	struct msm_gpu_devfreq *df = container_of(work,
> +			struct msm_gpu_devfreq, idle_work.work);
> +	struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq);
>   	unsigned long idle_freq, target_freq = 0;
> 
>   	/*
> @@ -229,3 +243,11 @@ void msm_devfreq_idle(struct msm_gpu *gpu)
> 
>   	mutex_unlock(&df->devfreq->lock);
>   }
> +
> +void msm_devfreq_idle(struct msm_gpu *gpu)
> +{
> +	struct msm_gpu_devfreq *df = &gpu->devfreq;
> +
> +	msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1),
> +			       HRTIMER_MODE_ABS);
> +}
> --
> 2.31.1
> 

Hi Rob,

I tested this patch on the OnePlus 6, with it I'm still able to reproduce the crash introduced by
("drm/msm: Devfreq tuning").

Adjusting the delay from 1ms to 5ms seems to help, at least from some very basic testing.

Perhaps the increased power reliability of the external power supply on dev boards is helping to mask the issue (hence 
why it's harder to reproduce on db845c).

-- 
Kind Regards,
Caleb (they/them)