[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ecdd1f2d-7192-4fce-9fb7-854cdfe2ab88@amd.com>
Date: Wed, 29 Oct 2025 09:28:37 -0500
From: Mario Limonciello <mario.limonciello@....com>
To: Lizhi Hou <lizhi.hou@....com>, ogabbay@...nel.org,
 quic_jhugo@...cinc.com, maciej.falkowski@...ux.intel.com,
 dri-devel@...ts.freedesktop.org
Cc: linux-kernel@...r.kernel.org, max.zhen@....com, sonal.santan@....com
Subject: Re: [PATCH] accel/amdxdna: Fix incorrect command state for timed out
 job
On 10/28/25 12:54 PM, Lizhi Hou wrote:
> When a command times out, mark it as ERT_CMD_STATE_TIMEOUT. Any other
> commands that are canceled due to this timeout should be marked as
> ERT_CMD_STATE_ABORT.
> 
> Fixes: aac243092b70 ("accel/amdxdna: Add command execution")
> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
> ---
>   drivers/accel/amdxdna/aie2_ctx.c    | 12 ++++++++++--
>   drivers/accel/amdxdna/amdxdna_ctx.h |  1 +
>   2 files changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> index c6c473c78352..958a64bb5251 100644
> --- a/drivers/accel/amdxdna/aie2_ctx.c
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -204,10 +204,12 @@ aie2_sched_resp_handler(void *handle, void __iomem *data, size_t size)
>   
>   	cmd_abo = job->cmd_bo;
>   
> -	if (unlikely(!data))
> +	if (unlikely(job->job_timeout)) {
> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
>   		goto out;
> +	}
>   
> -	if (unlikely(size != sizeof(u32))) {
> +	if (unlikely(!data) || unlikely(size != sizeof(u32))) {
>   		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>   		ret = -EINVAL;
>   		goto out;
> @@ -258,6 +260,11 @@ aie2_sched_cmdlist_resp_handler(void *handle, void __iomem *data, size_t size)
>   	int ret = 0;
>   
>   	cmd_abo = job->cmd_bo;
> +	if (unlikely(job->job_timeout)) {
> +		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_TIMEOUT);
Don't you need to set ret here?
> +		goto out;
> +	}
> +
>   	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
>   		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
>   		ret = -EINVAL;
> @@ -370,6 +377,7 @@ aie2_sched_job_timedout(struct drm_sched_job *sched_job)
>   
>   	xdna = hwctx->client->xdna;
>   	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
> +	job->job_timeout = true;
>   	mutex_lock(&xdna->dev_lock);
>   	aie2_hwctx_stop(xdna, hwctx, sched_job);
>   
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> index cbe60efbe60b..919c654dfea6 100644
> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -116,6 +116,7 @@ struct amdxdna_sched_job {
>   	/* user can wait on this fence */
>   	struct dma_fence	*out_fence;
>   	bool			job_done;
> +	bool			job_timeout;
>   	u64			seq;
>   	struct amdxdna_drv_cmd	*drv_cmd;
>   	struct amdxdna_gem_obj	*cmd_bo;
Powered by blists - more mailing lists
 
