[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f3337f36-0dfe-a103-5198-3fb640750adf@amd.com>
Date: Sun, 3 Aug 2025 12:13:19 -0700
From: Lizhi Hou <lizhi.hou@....com>
To: "Falkowski, Maciej" <maciej.falkowski@...ux.intel.com>,
<ogabbay@...nel.org>, <quic_jhugo@...cinc.com>,
<jacek.lawrynowicz@...ux.intel.com>, <dri-devel@...ts.freedesktop.org>
CC: <linux-kernel@...r.kernel.org>, <max.zhen@....com>,
<sonal.santan@....com>, <mario.limonciello@....com>
Subject: Re: [PATCH V1] accel/amdxdna: Unify pm and rpm suspend and resume
callbacks
On 8/1/25 07:48, Falkowski, Maciej wrote:
> On 7/31/2025 6:35 PM, Lizhi Hou wrote:
>
>> The suspend and resume callbacks for pm and runtime pm should be same.
>> During suspending, it needs to stop all hardware contexts first. And
>> the hardware contexts will be restarted after the device is resumed.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
>> ---
>> drivers/accel/amdxdna/aie2_ctx.c | 32 +++++++----
>> drivers/accel/amdxdna/aie2_pci.c | 41 ++++++++++++--
>> drivers/accel/amdxdna/aie2_pci.h | 4 +-
>> drivers/accel/amdxdna/amdxdna_ctx.c | 26 ---------
>> drivers/accel/amdxdna/amdxdna_ctx.h | 2 -
>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 74 +++----------------------
>> drivers/accel/amdxdna/amdxdna_pci_drv.h | 4 +-
>> 7 files changed, 69 insertions(+), 114 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c
>> b/drivers/accel/amdxdna/aie2_ctx.c
>> index 2cff5419bd2f..7444117f0e17 100644
>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>> @@ -141,9 +141,11 @@ static void aie2_hwctx_wait_for_idle(struct
>> amdxdna_hwctx *hwctx)
>> dma_fence_put(fence);
>> }
>> -void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx)
>> +void aie2_hwctx_suspend(struct amdxdna_client *client)
>> {
>> - struct amdxdna_dev *xdna = hwctx->client->xdna;
>> + struct amdxdna_dev *xdna = client->xdna;
>> + struct amdxdna_hwctx *hwctx;
>> + unsigned long hwctx_id;
>> /*
>> * Command timeout is unlikely. But if it happens, it doesn't
>> @@ -151,15 +153,21 @@ void aie2_hwctx_suspend(struct amdxdna_hwctx
>> *hwctx)
>> * and abort all commands.
>> */
>> drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> - aie2_hwctx_wait_for_idle(hwctx);
>> - aie2_hwctx_stop(xdna, hwctx, NULL);
>> - hwctx->old_status = hwctx->status;
>> - hwctx->status = HWCTX_STAT_STOP;
>> + mutex_lock(&client->hwctx_lock);
>> + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
>> + aie2_hwctx_wait_for_idle(hwctx);
>> + aie2_hwctx_stop(xdna, hwctx, NULL);
>> + hwctx->old_status = hwctx->status;
>> + hwctx->status = HWCTX_STAT_STOP;
>
> nit: You might refactor this status shift for maintainability to
> something like
> aie2_hwctx_shift_status(HWCTX_STAT_STOP);
>
> But that is just a nit, otherwise
>
> Reviewed-by: Maciej Falkowski <maciej.falkowski@...ux.intel.com>
Thanks. I will cleanup.
Lizhi
>> + }
>> + mutex_unlock(&client->hwctx_lock);
>> }
>> -void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
>> +void aie2_hwctx_resume(struct amdxdna_client *client)
>> {
>> - struct amdxdna_dev *xdna = hwctx->client->xdna;
>> + struct amdxdna_dev *xdna = client->xdna;
>> + struct amdxdna_hwctx *hwctx;
>> + unsigned long hwctx_id;
>> /*
>> * The resume path cannot guarantee that mailbox channel can be
>> @@ -167,8 +175,12 @@ void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx)
>> * mailbox channel, error will return.
>> */
>> drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> - hwctx->status = hwctx->old_status;
>> - aie2_hwctx_restart(xdna, hwctx);
>> + mutex_lock(&client->hwctx_lock);
>> + amdxdna_for_each_hwctx(client, hwctx_id, hwctx) {
>> + hwctx->status = hwctx->old_status;
>> + aie2_hwctx_restart(xdna, hwctx);
>> + }
>> + mutex_unlock(&client->hwctx_lock);
>> }
>> static void
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index 3474a8d4e560..8b6f17430d3a 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -440,6 +440,41 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>> return ret;
>> }
>> +static int aie2_hw_suspend(struct amdxdna_dev *xdna)
>> +{
>> + struct amdxdna_client *client;
>> +
>> + mutex_lock(&xdna->dev_lock);
>> + list_for_each_entry(client, &xdna->client_list, node)
>> + aie2_hwctx_suspend(client);
>> +
>> + aie2_hw_stop(xdna);
>> + mutex_unlock(&xdna->dev_lock);
>> +
>> + return 0;
>> +}
>> +
>> +static int aie2_hw_resume(struct amdxdna_dev *xdna)
>> +{
>> + struct amdxdna_client *client;
>> + int ret;
>> +
>> + mutex_lock(&xdna->dev_lock);
>> + ret = aie2_hw_start(xdna);
>> + if (ret) {
>> + XDNA_ERR(xdna, "Start hardware failed, %d", ret);
>> + goto unlock;
>> + }
>> +
>> + list_for_each_entry(client, &xdna->client_list, node)
>> + aie2_hwctx_resume(client);
>> +
>> +unlock:
>> + mutex_unlock(&xdna->dev_lock);
>> +
>> + return ret;
>> +}
>> +
>> static int aie2_init(struct amdxdna_dev *xdna)
>> {
>> struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
>> @@ -901,8 +936,8 @@ static int aie2_set_state(struct amdxdna_client
>> *client,
>> const struct amdxdna_dev_ops aie2_ops = {
>> .init = aie2_init,
>> .fini = aie2_fini,
>> - .resume = aie2_hw_start,
>> - .suspend = aie2_hw_stop,
>> + .resume = aie2_hw_resume,
>> + .suspend = aie2_hw_suspend,
>> .get_aie_info = aie2_get_info,
>> .set_aie_state = aie2_set_state,
>> .hwctx_init = aie2_hwctx_init,
>> @@ -910,6 +945,4 @@ const struct amdxdna_dev_ops aie2_ops = {
>> .hwctx_config = aie2_hwctx_config,
>> .cmd_submit = aie2_cmd_submit,
>> .hmm_invalidate = aie2_hmm_invalidate,
>> - .hwctx_suspend = aie2_hwctx_suspend,
>> - .hwctx_resume = aie2_hwctx_resume,
>> };
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index 385914840eaa..6b21909841e0 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -288,8 +288,8 @@ int aie2_sync_bo(struct amdxdna_hwctx *hwctx,
>> struct amdxdna_sched_job *job,
>> int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
>> void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
>> int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64
>> value, void *buf, u32 size);
>> -void aie2_hwctx_suspend(struct amdxdna_hwctx *hwctx);
>> -void aie2_hwctx_resume(struct amdxdna_hwctx *hwctx);
>> +void aie2_hwctx_suspend(struct amdxdna_client *client);
>> +void aie2_hwctx_resume(struct amdxdna_client *client);
>> int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job, u64 *seq);
>> void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long
>> cur_seq);
>> void aie2_restart_ctx(struct amdxdna_client *client);
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c
>> b/drivers/accel/amdxdna/amdxdna_ctx.c
>> index be073224bd69..b47a7f8e9017 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.c
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
>> @@ -60,32 +60,6 @@ static struct dma_fence
>> *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
>> return &fence->base;
>> }
>> -void amdxdna_hwctx_suspend(struct amdxdna_client *client)
>> -{
>> - struct amdxdna_dev *xdna = client->xdna;
>> - struct amdxdna_hwctx *hwctx;
>> - unsigned long hwctx_id;
>> -
>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> - mutex_lock(&client->hwctx_lock);
>> - amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
>> - xdna->dev_info->ops->hwctx_suspend(hwctx);
>> - mutex_unlock(&client->hwctx_lock);
>> -}
>> -
>> -void amdxdna_hwctx_resume(struct amdxdna_client *client)
>> -{
>> - struct amdxdna_dev *xdna = client->xdna;
>> - struct amdxdna_hwctx *hwctx;
>> - unsigned long hwctx_id;
>> -
>> - drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>> - mutex_lock(&client->hwctx_lock);
>> - amdxdna_for_each_hwctx(client, hwctx_id, hwctx)
>> - xdna->dev_info->ops->hwctx_resume(hwctx);
>> - mutex_unlock(&client->hwctx_lock);
>> -}
>> -
>> static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
>> struct srcu_struct *ss)
>> {
>> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h
>> b/drivers/accel/amdxdna/amdxdna_ctx.h
>> index f0a4a8586d85..c652229547a3 100644
>> --- a/drivers/accel/amdxdna/amdxdna_ctx.h
>> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
>> @@ -147,8 +147,6 @@ static inline u32 amdxdna_hwctx_col_map(struct
>> amdxdna_hwctx *hwctx)
>> void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
>> void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
>> -void amdxdna_hwctx_suspend(struct amdxdna_client *client);
>> -void amdxdna_hwctx_resume(struct amdxdna_client *client);
>> int amdxdna_cmd_submit(struct amdxdna_client *client,
>> u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index f2bf1d374cc7..fbca94183f96 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -343,89 +343,29 @@ static void amdxdna_remove(struct pci_dev *pdev)
>> mutex_unlock(&xdna->dev_lock);
>> }
>> -static int amdxdna_dev_suspend_nolock(struct amdxdna_dev *xdna)
>> -{
>> - if (xdna->dev_info->ops->suspend)
>> - xdna->dev_info->ops->suspend(xdna);
>> -
>> - return 0;
>> -}
>> -
>> -static int amdxdna_dev_resume_nolock(struct amdxdna_dev *xdna)
>> -{
>> - if (xdna->dev_info->ops->resume)
>> - return xdna->dev_info->ops->resume(xdna);
>> -
>> - return 0;
>> -}
>> -
>> static int amdxdna_pmops_suspend(struct device *dev)
>> {
>> struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>> - struct amdxdna_client *client;
>> -
>> - mutex_lock(&xdna->dev_lock);
>> - list_for_each_entry(client, &xdna->client_list, node)
>> - amdxdna_hwctx_suspend(client);
>> - amdxdna_dev_suspend_nolock(xdna);
>> - mutex_unlock(&xdna->dev_lock);
>> + if (!xdna->dev_info->ops->suspend)
>> + return -EOPNOTSUPP;
>> - return 0;
>> + return xdna->dev_info->ops->suspend(xdna);
>> }
>> static int amdxdna_pmops_resume(struct device *dev)
>> {
>> struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>> - struct amdxdna_client *client;
>> - int ret;
>> -
>> - XDNA_INFO(xdna, "firmware resuming...");
>> - mutex_lock(&xdna->dev_lock);
>> - ret = amdxdna_dev_resume_nolock(xdna);
>> - if (ret) {
>> - XDNA_ERR(xdna, "resume NPU firmware failed");
>> - mutex_unlock(&xdna->dev_lock);
>> - return ret;
>> - }
>> - XDNA_INFO(xdna, "hardware context resuming...");
>> - list_for_each_entry(client, &xdna->client_list, node)
>> - amdxdna_hwctx_resume(client);
>> - mutex_unlock(&xdna->dev_lock);
>> -
>> - return 0;
>> -}
>> -
>> -static int amdxdna_rpmops_suspend(struct device *dev)
>> -{
>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>> - int ret;
>> -
>> - mutex_lock(&xdna->dev_lock);
>> - ret = amdxdna_dev_suspend_nolock(xdna);
>> - mutex_unlock(&xdna->dev_lock);
>> -
>> - XDNA_DBG(xdna, "Runtime suspend done ret: %d", ret);
>> - return ret;
>> -}
>> -
>> -static int amdxdna_rpmops_resume(struct device *dev)
>> -{
>> - struct amdxdna_dev *xdna = pci_get_drvdata(to_pci_dev(dev));
>> - int ret;
>> -
>> - mutex_lock(&xdna->dev_lock);
>> - ret = amdxdna_dev_resume_nolock(xdna);
>> - mutex_unlock(&xdna->dev_lock);
>> + if (!xdna->dev_info->ops->resume)
>> + return -EOPNOTSUPP;
>> - XDNA_DBG(xdna, "Runtime resume done ret: %d", ret);
>> - return ret;
>> + return xdna->dev_info->ops->resume(xdna);
>> }
>> static const struct dev_pm_ops amdxdna_pm_ops = {
>> SYSTEM_SLEEP_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume)
>> - RUNTIME_PM_OPS(amdxdna_rpmops_suspend, amdxdna_rpmops_resume, NULL)
>> + RUNTIME_PM_OPS(amdxdna_pmops_suspend, amdxdna_pmops_resume, NULL)
>> };
>> static struct pci_driver amdxdna_pci_driver = {
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index ab79600911aa..40bbb3c06320 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -50,13 +50,11 @@ struct amdxdna_dev_ops {
>> int (*init)(struct amdxdna_dev *xdna);
>> void (*fini)(struct amdxdna_dev *xdna);
>> int (*resume)(struct amdxdna_dev *xdna);
>> - void (*suspend)(struct amdxdna_dev *xdna);
>> + int (*suspend)(struct amdxdna_dev *xdna);
>> int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
>> void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
>> int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64
>> value, void *buf, u32 size);
>> void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned
>> long cur_seq);
>> - void (*hwctx_suspend)(struct amdxdna_hwctx *hwctx);
>> - void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
>> int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct
>> amdxdna_sched_job *job, u64 *seq);
>> int (*get_aie_info)(struct amdxdna_client *client, struct
>> amdxdna_drm_get_info *args);
>> int (*set_aie_state)(struct amdxdna_client *client, struct
>> amdxdna_drm_set_state *args);
> Best regards,
> Maciej
Powered by blists - more mailing lists