[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b18d6c28-fd7a-43df-b2a5-a8af3050c2a1@amd.com>
Date: Mon, 1 Dec 2025 23:51:59 -0600
From: Mario Limonciello <mario.limonciello@....com>
To: Lizhi Hou <lizhi.hou@....com>, ogabbay@...nel.org,
quic_jhugo@...cinc.com, dri-devel@...ts.freedesktop.org,
maciej.falkowski@...ux.intel.com
Cc: linux-kernel@...r.kernel.org, max.zhen@....com, sonal.santan@....com
Subject: Re: [PATCH V1] accel/amdxdna: Poll MPNPU_PWAITMODE after requesting
firmware suspend
On 12/1/2025 11:37 PM, Lizhi Hou wrote:
> After issuing a firmware suspend request, the driver must ensure that the
> suspend operation has completed before proceeding. Add polling of the
> MPNPU_PWAITMODE register to confirm that the firmware has fully entered
> the suspended state. This prevents race conditions where subsequent
> operations assume the firmware is idle before it has actually completed
> its suspend sequence.
>
> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
> ---
> drivers/accel/amdxdna/aie2_message.c | 9 ++++++++-
> drivers/accel/amdxdna/aie2_pci.h | 2 ++
> drivers/accel/amdxdna/aie2_psp.c | 15 +++++++++++++++
> drivers/accel/amdxdna/npu1_regs.c | 2 ++
> drivers/accel/amdxdna/npu2_regs.c | 2 ++
> drivers/accel/amdxdna/npu4_regs.c | 2 ++
> drivers/accel/amdxdna/npu5_regs.c | 4 +++-
> drivers/accel/amdxdna/npu6_regs.c | 2 ++
> 8 files changed, 36 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index d493bb1c3360..fee3b0627aba 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -59,8 +59,15 @@ static int aie2_send_mgmt_msg_wait(struct amdxdna_dev_hdl *ndev,
> int aie2_suspend_fw(struct amdxdna_dev_hdl *ndev)
> {
> DECLARE_AIE2_MSG(suspend, MSG_OP_SUSPEND);
> + int ret;
>
> - return aie2_send_mgmt_msg_wait(ndev, &msg);
> + ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> + if (ret) {
> + XDNA_ERR(ndev->xdna, "Failed to suspend fw, ret %d", ret);
> + return ret;
> + }
> +
> + return aie2_psp_waitmode_poll(ndev->psp_hdl);
> }
>
> int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index a5f9c42155d1..cc9f933f80b2 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -70,6 +70,7 @@ enum psp_reg_idx {
> PSP_INTR_REG = PSP_NUM_IN_REGS,
> PSP_STATUS_REG,
> PSP_RESP_REG,
> + PSP_PWAITMODE_REG,
> PSP_MAX_REGS /* Keep this at the end */
> };
>
> @@ -290,6 +291,7 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum amdxdna_power_mode_type
> struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct psp_config *conf);
> int aie2_psp_start(struct psp_device *psp);
> void aie2_psp_stop(struct psp_device *psp);
> +int aie2_psp_waitmode_poll(struct psp_device *psp);
>
> /* aie2_error.c */
> int aie2_error_async_events_alloc(struct amdxdna_dev_hdl *ndev);
> diff --git a/drivers/accel/amdxdna/aie2_psp.c b/drivers/accel/amdxdna/aie2_psp.c
> index f28a060a8810..4bc60f458fd4 100644
> --- a/drivers/accel/amdxdna/aie2_psp.c
> +++ b/drivers/accel/amdxdna/aie2_psp.c
> @@ -76,6 +76,21 @@ static int psp_exec(struct psp_device *psp, u32 *reg_vals)
> return 0;
> }
>
> +int aie2_psp_waitmode_poll(struct psp_device *psp)
> +{
> + int mode_reg = -1, ret;
It seems like from the usage mode_reg shouldn't be a signed integer.
> +
> + ret = readx_poll_timeout(readl, PSP_REG(psp, PSP_PWAITMODE_REG), mode_reg,
> + (mode_reg & 0x1) == 1,
> + PSP_POLL_INTERVAL, PSP_POLL_TIMEOUT);
> + if (ret) {
> + drm_err(psp->ddev, "fw waitmode reg error, ret 0x%x", ret);
Aren't these return codes going to be negative in the case of an error?
IE they shouldn't be 0x%x, they should be %d.
Also shouldn't you be using XDNA_ERR()?
> + return ret;
> + }
> +
> + return 0;
You can probably simplify as:
if (ret)
drm_err();
return ret;
> +}
> +
> void aie2_psp_stop(struct psp_device *psp)
> {
> u32 reg_vals[PSP_NUM_IN_REGS] = { PSP_RELEASE_TMR, };
> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/amdxdna/npu1_regs.c
> index ec407f3b48fc..ebc6e2802297 100644
> --- a/drivers/accel/amdxdna/npu1_regs.c
> +++ b/drivers/accel/amdxdna/npu1_regs.c
> @@ -13,6 +13,7 @@
> #include "amdxdna_pci_drv.h"
>
> /* Address definition from NPU1 docs */
> +#define MPNPU_PWAITMODE 0x3010034
> #define MPNPU_PUB_SEC_INTR 0x3010090
> #define MPNPU_PUB_PWRMGMT_INTR 0x3010094
> #define MPNPU_PUB_SCRATCH2 0x30100A0
> @@ -92,6 +93,7 @@ static const struct amdxdna_dev_priv npu1_dev_priv = {
> DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU1_PSP, MPNPU_PUB_SEC_INTR),
> DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU1_PSP, MPNPU_PUB_SCRATCH2),
> DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU1_PSP, MPNPU_PUB_SCRATCH3),
> + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU1_PSP, MPNPU_PWAITMODE),
> },
> .smu_regs_off = {
> DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU1_SMU, MPNPU_PUB_SCRATCH5),
> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/amdxdna/npu2_regs.c
> index 86f87d0d1354..ad0743fb06d5 100644
> --- a/drivers/accel/amdxdna/npu2_regs.c
> +++ b/drivers/accel/amdxdna/npu2_regs.c
> @@ -13,6 +13,7 @@
> #include "amdxdna_pci_drv.h"
>
> /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
> +#define MPNPU_PWAITMODE 0x301003C
> #define MPNPU_PUB_SEC_INTR 0x3010060
> #define MPNPU_PUB_PWRMGMT_INTR 0x3010064
> #define MPNPU_PUB_SCRATCH0 0x301006C
> @@ -85,6 +86,7 @@ static const struct amdxdna_dev_priv npu2_dev_priv = {
> DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU2_PSP, MP0_C2PMSG_73),
> DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU2_PSP, MP0_C2PMSG_123),
> DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU2_REG, MPNPU_PUB_SCRATCH3),
> + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU2_REG, MPNPU_PWAITMODE),
> },
> .smu_regs_off = {
> DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU2_SMU, MP1_C2PMSG_0),
> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/amdxdna/npu4_regs.c
> index 986a5f28ba24..4ca21db70478 100644
> --- a/drivers/accel/amdxdna/npu4_regs.c
> +++ b/drivers/accel/amdxdna/npu4_regs.c
> @@ -13,6 +13,7 @@
> #include "amdxdna_pci_drv.h"
>
> /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
> +#define MPNPU_PWAITMODE 0x301003C
> #define MPNPU_PUB_SEC_INTR 0x3010060
> #define MPNPU_PUB_PWRMGMT_INTR 0x3010064
> #define MPNPU_PUB_SCRATCH0 0x301006C
> @@ -116,6 +117,7 @@ static const struct amdxdna_dev_priv npu4_dev_priv = {
> DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU4_PSP, MP0_C2PMSG_73),
> DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU4_PSP, MP0_C2PMSG_123),
> DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU4_REG, MPNPU_PUB_SCRATCH3),
> + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU4_REG, MPNPU_PWAITMODE),
> },
> .smu_regs_off = {
> DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU4_SMU, MP1_C2PMSG_0),
> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/amdxdna/npu5_regs.c
> index 75ad97f0b937..f761a6661f40 100644
> --- a/drivers/accel/amdxdna/npu5_regs.c
> +++ b/drivers/accel/amdxdna/npu5_regs.c
> @@ -13,6 +13,7 @@
> #include "amdxdna_pci_drv.h"
>
> /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
> +#define MPNPU_PWAITMODE 0x301003C
> #define MPNPU_PUB_SEC_INTR 0x3010060
> #define MPNPU_PUB_PWRMGMT_INTR 0x3010064
> #define MPNPU_PUB_SCRATCH0 0x301006C
> @@ -62,7 +63,7 @@
> #define NPU5_SRAM_BAR_BASE MMNPU_APERTURE1_BASE
>
> static const struct amdxdna_dev_priv npu5_dev_priv = {
> - .fw_path = "amdnpu/17f0_11/npu.sbin",
> + .fw_path = "amdnpu/17f0_11/npu.dev.sbin",
This seems like an unintended change.
> .protocol_major = 0x6,
> .protocol_minor = 12,
> .rt_config = npu4_default_rt_cfg,
> @@ -85,6 +86,7 @@ static const struct amdxdna_dev_priv npu5_dev_priv = {
> DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU5_PSP, MP0_C2PMSG_73),
> DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU5_PSP, MP0_C2PMSG_123),
> DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU5_REG, MPNPU_PUB_SCRATCH3),
> + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU5_REG, MPNPU_PWAITMODE),
> },
> .smu_regs_off = {
> DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU5_SMU, MP1_C2PMSG_0),
> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/amdxdna/npu6_regs.c
> index 758dc013fe13..1f71285655b2 100644
> --- a/drivers/accel/amdxdna/npu6_regs.c
> +++ b/drivers/accel/amdxdna/npu6_regs.c
> @@ -13,6 +13,7 @@
> #include "amdxdna_pci_drv.h"
>
> /* NPU Public Registers on MpNPUAxiXbar (refer to Diag npu_registers.h) */
> +#define MPNPU_PWAITMODE 0x301003C
> #define MPNPU_PUB_SEC_INTR 0x3010060
> #define MPNPU_PUB_PWRMGMT_INTR 0x3010064
> #define MPNPU_PUB_SCRATCH0 0x301006C
> @@ -85,6 +86,7 @@ static const struct amdxdna_dev_priv npu6_dev_priv = {
> DEFINE_BAR_OFFSET(PSP_INTR_REG, NPU6_PSP, MP0_C2PMSG_73),
> DEFINE_BAR_OFFSET(PSP_STATUS_REG, NPU6_PSP, MP0_C2PMSG_123),
> DEFINE_BAR_OFFSET(PSP_RESP_REG, NPU6_REG, MPNPU_PUB_SCRATCH3),
> + DEFINE_BAR_OFFSET(PSP_PWAITMODE_REG, NPU6_REG, MPNPU_PWAITMODE),
> },
> .smu_regs_off = {
> DEFINE_BAR_OFFSET(SMU_CMD_REG, NPU6_SMU, MP1_C2PMSG_0),
Powered by blists - more mailing lists