lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0b15a2b6-1fd1-1e52-4896-e588272b25d3@linaro.org>
Date:   Sat, 16 Oct 2021 17:08:59 +0100
From:   Caleb Connolly <caleb.connolly@...aro.org>
To:     Rob Clark <robdclark@...il.com>, dri-devel@...ts.freedesktop.org
Cc:     freedreno@...ts.freedesktop.org, linux-arm-msm@...r.kernel.org,
        Jordan Crouse <jordan@...micpenguin.net>,
        Rob Clark <robdclark@...omium.org>,
        Sean Paul <sean@...rly.run>, David Airlie <airlied@...ux.ie>,
        Daniel Vetter <daniel@...ll.ch>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] drm/msm/devfreq: Add 1ms delay before clamping freq


On 28/09/2021 00:04, Rob Clark wrote:
> From: Rob Clark <robdclark@...omium.org>
> 
> Add a short delay before clamping to idle frequency on active->idle
> transition.  It takes ~0.5ms to increase the freq again on the next
> idle->active transition, so this helps avoid extra freq transitions
> on workloads that bounce between CPU and GPU.
> 
> Signed-off-by: Rob Clark <robdclark@...omium.org>
> ---
> Note that this sort of re-introduces the theoretical race solved
> by [1].. but that should not be a problem with something along the
> lines of [2].
> 
> [1] https://patchwork.freedesktop.org/patch/455910/?series=95111&rev=1
> [2] https://patchwork.freedesktop.org/patch/455928/?series=95119&rev=1
> 
>   drivers/gpu/drm/msm/msm_gpu.h         |  7 +++++
>   drivers/gpu/drm/msm/msm_gpu_devfreq.c | 38 +++++++++++++++++++++------
>   2 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 32a859307e81..2fcb6c195865 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -120,6 +120,13 @@ struct msm_gpu_devfreq {
>   	 * it is inactive.
>   	 */
>   	unsigned long idle_freq;
> +
> +	/**
> +	 * idle_work:
> +	 *
> +	 * Used to delay clamping to idle freq on active->idle transition.
> +	 */
> +	struct msm_hrtimer_work idle_work;
>   };
> 
>   struct msm_gpu {
> diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> index 15b64f35c0f6..36e1930ee26d 100644
> --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c
> @@ -96,8 +96,12 @@ static struct devfreq_dev_profile msm_devfreq_profile = {
>   	.get_cur_freq = msm_devfreq_get_cur_freq,
>   };
> 
> +static void msm_devfreq_idle_work(struct kthread_work *work);
> +
>   void msm_devfreq_init(struct msm_gpu *gpu)
>   {
> +	struct msm_gpu_devfreq *df = &gpu->devfreq;
> +
>   	/* We need target support to do devfreq */
>   	if (!gpu->funcs->gpu_busy)
>   		return;
> @@ -113,25 +117,27 @@ void msm_devfreq_init(struct msm_gpu *gpu)
>   	msm_devfreq_profile.freq_table = NULL;
>   	msm_devfreq_profile.max_state = 0;
> 
> -	gpu->devfreq.devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
> +	df->devfreq = devm_devfreq_add_device(&gpu->pdev->dev,
>   			&msm_devfreq_profile, DEVFREQ_GOV_SIMPLE_ONDEMAND,
>   			NULL);
> 
> -	if (IS_ERR(gpu->devfreq.devfreq)) {
> +	if (IS_ERR(df->devfreq)) {
>   		DRM_DEV_ERROR(&gpu->pdev->dev, "Couldn't initialize GPU devfreq\n");
> -		gpu->devfreq.devfreq = NULL;
> +		df->devfreq = NULL;
>   		return;
>   	}
> 
> -	devfreq_suspend_device(gpu->devfreq.devfreq);
> +	devfreq_suspend_device(df->devfreq);
> 
> -	gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node,
> -			gpu->devfreq.devfreq);
> +	gpu->cooling = of_devfreq_cooling_register(gpu->pdev->dev.of_node, df->devfreq);
>   	if (IS_ERR(gpu->cooling)) {
>   		DRM_DEV_ERROR(&gpu->pdev->dev,
>   				"Couldn't register GPU cooling device\n");
>   		gpu->cooling = NULL;
>   	}
> +
> +	msm_hrtimer_work_init(&df->idle_work, gpu->worker, msm_devfreq_idle_work,
> +			      CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>   }
> 
>   void msm_devfreq_cleanup(struct msm_gpu *gpu)
> @@ -179,6 +185,11 @@ void msm_devfreq_active(struct msm_gpu *gpu)
>   	unsigned int idle_time;
>   	unsigned long target_freq = df->idle_freq;
> 
> +	/*
> +	 * Cancel any pending transition to idle frequency:
> +	 */
> +	hrtimer_cancel(&df->idle_work.timer);
> +
>   	/*
>   	 * Hold devfreq lock to synchronize with get_dev_status()/
>   	 * target() callbacks
> @@ -209,9 +220,12 @@ void msm_devfreq_active(struct msm_gpu *gpu)
>   	mutex_unlock(&df->devfreq->lock);
>   }
> 
> -void msm_devfreq_idle(struct msm_gpu *gpu)
> +
> +static void msm_devfreq_idle_work(struct kthread_work *work)
>   {
> -	struct msm_gpu_devfreq *df = &gpu->devfreq;
> +	struct msm_gpu_devfreq *df = container_of(work,
> +			struct msm_gpu_devfreq, idle_work.work);
> +	struct msm_gpu *gpu = container_of(df, struct msm_gpu, devfreq);
>   	unsigned long idle_freq, target_freq = 0;
> 
>   	/*
> @@ -229,3 +243,11 @@ void msm_devfreq_idle(struct msm_gpu *gpu)
> 
>   	mutex_unlock(&df->devfreq->lock);
>   }
> +
> +void msm_devfreq_idle(struct msm_gpu *gpu)
> +{
> +	struct msm_gpu_devfreq *df = &gpu->devfreq;
> +
> +	msm_hrtimer_queue_work(&df->idle_work, ms_to_ktime(1),
> +			       HRTIMER_MODE_ABS);
> +}
> --
> 2.31.1
> 

Hi Rob,

I tested this patch on the OnePlus 6, with it I'm still able to reproduce the crash introduced by
("drm/msm: Devfreq tuning").

Adjusting the delay from 1ms to 5ms seems to help, at least from some very basic testing.

Perhaps the increased power reliability of the external power supply on dev boards is helping to mask the issue (hence 
why it's harder to reproduce on db845c).

-- 
Kind Regards,
Caleb (they/them)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ