lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <de19aefd-7e46-5653-b4b9-71a811597347@amd.com>
Date: Tue, 9 Dec 2025 11:55:51 -0800
From: Lizhi Hou <lizhi.hou@....com>
To: "Falkowski, Maciej" <maciej.falkowski@...ux.intel.com>,
	<ogabbay@...nel.org>, <quic_jhugo@...cinc.com>,
	<dri-devel@...ts.freedesktop.org>
CC: <linux-kernel@...r.kernel.org>, <max.zhen@....com>,
	<sonal.santan@....com>, <mario.limonciello@....com>
Subject: Re: [PATCH V2] accel/amdxdna: Fix race condition when checking rpm_on

Applied to drm-misc-next.

On 12/9/25 00:28, Falkowski, Maciej wrote:
> Reviewed-by: Maciej Falkowski <maciej.falkowski@...ux.intel.com>
>
> On 12/8/2025 5:53 PM, Lizhi Hou wrote:
>> When autosuspend is triggered, driver rpm_on flag is set to indicate 
>> that
>> a suspend/resume is already in progress. However, when a userspace
>> application submits a command during this narrow window,
>> amdxdna_pm_resume_get() may incorrectly skip the resume operation 
>> because
>> the rpm_on flag is still set. This results in commands being submitted
>> while the device has not actually resumed, causing unexpected behavior.
>>
>> The set_dpm() is called by suspend/resume, it relied on rpm_on flag to
>> avoid calling into rpm suspend/resume recursivly. So to fix this, remove
>> the use of the rpm_on flag entirely. Instead, introduce 
>> aie2_pm_set_dpm()
>> which explicitly resumes the device before invoking set_dpm(). With this
>> change, set_dpm() is called directly inside the suspend or resume 
>> execution
>> path. Otherwise, aie2_pm_set_dpm() is called.
>>
>> Fixes: 063db451832b ("accel/amdxdna: Enhance runtime power management")
>> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
>> ---
>> v2:
>>      Removed drm_WARN_ON() from aie2_send_mgmt_msg_wait().
>>      Revise the description.
>>
>>   drivers/accel/amdxdna/aie2_message.c    |  1 -
>>   drivers/accel/amdxdna/aie2_pci.c        |  2 +-
>>   drivers/accel/amdxdna/aie2_pci.h        |  1 +
>>   drivers/accel/amdxdna/aie2_pm.c         | 17 +++++++++++++++-
>>   drivers/accel/amdxdna/aie2_smu.c        | 27 ++++---------------------
>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |  1 -
>>   drivers/accel/amdxdna/amdxdna_pm.c      | 22 ++------------------
>>   7 files changed, 24 insertions(+), 47 deletions(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_message.c 
>> b/drivers/accel/amdxdna/aie2_message.c
>> index fee3b0627aba..a75156800467 100644
>> --- a/drivers/accel/amdxdna/aie2_message.c
>> +++ b/drivers/accel/amdxdna/aie2_message.c
>> @@ -39,7 +39,6 @@ static int aie2_send_mgmt_msg_wait(struct 
>> amdxdna_dev_hdl *ndev,
>>       if (!ndev->mgmt_chann)
>>           return -ENODEV;
>>   -    drm_WARN_ON(&xdna->ddev, xdna->rpm_on && 
>> !mutex_is_locked(&xdna->dev_lock));
>>       ret = xdna_send_msg_wait(xdna, ndev->mgmt_chann, msg);
>>       if (ret == -ETIME) {
>>           xdna_mailbox_stop_channel(ndev->mgmt_chann);
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c 
>> b/drivers/accel/amdxdna/aie2_pci.c
>> index ceef1c502e9e..81a8e4137bfd 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -321,7 +321,7 @@ static int aie2_xrs_set_dft_dpm_level(struct 
>> drm_device *ddev, u32 dpm_level)
>>       if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == 
>> dpm_level)
>>           return 0;
>>   -    return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>> +    return aie2_pm_set_dpm(ndev, dpm_level);
>>   }
>>     static struct xrs_action_ops aie2_xrs_actions = {
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h 
>> b/drivers/accel/amdxdna/aie2_pci.h
>> index cc9f933f80b2..c6b5cf4ae5c4 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -286,6 +286,7 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level);
>>   /* aie2_pm.c */
>>   int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
>>   int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>> amdxdna_power_mode_type target);
>> +int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>     /* aie2_psp.c */
>>   struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct 
>> psp_config *conf);
>> diff --git a/drivers/accel/amdxdna/aie2_pm.c 
>> b/drivers/accel/amdxdna/aie2_pm.c
>> index 426c38fce848..afcd6d4683e5 100644
>> --- a/drivers/accel/amdxdna/aie2_pm.c
>> +++ b/drivers/accel/amdxdna/aie2_pm.c
>> @@ -10,6 +10,7 @@
>>     #include "aie2_pci.h"
>>   #include "amdxdna_pci_drv.h"
>> +#include "amdxdna_pm.h"
>>     #define AIE2_CLK_GATING_ENABLE    1
>>   #define AIE2_CLK_GATING_DISABLE    0
>> @@ -26,6 +27,20 @@ static int aie2_pm_set_clk_gating(struct 
>> amdxdna_dev_hdl *ndev, u32 val)
>>       return 0;
>>   }
>>   +int aie2_pm_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>> +{
>> +    int ret;
>> +
>> +    ret = amdxdna_pm_resume_get(ndev->xdna);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>> +    amdxdna_pm_suspend_put(ndev->xdna);
>> +
>> +    return ret;
>> +}
>> +
>>   int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>>   {
>>       int ret;
>> @@ -94,7 +109,7 @@ int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, 
>> enum amdxdna_power_mode_type
>>           return -EOPNOTSUPP;
>>       }
>>   -    ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>> +    ret = aie2_pm_set_dpm(ndev, dpm_level);
>>       if (ret)
>>           return ret;
>>   diff --git a/drivers/accel/amdxdna/aie2_smu.c 
>> b/drivers/accel/amdxdna/aie2_smu.c
>> index bd94ee96c2bc..2d195e41f83d 100644
>> --- a/drivers/accel/amdxdna/aie2_smu.c
>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>> @@ -11,7 +11,6 @@
>>     #include "aie2_pci.h"
>>   #include "amdxdna_pci_drv.h"
>> -#include "amdxdna_pm.h"
>>     #define SMU_RESULT_OK        1
>>   @@ -67,16 +66,12 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>       u32 freq;
>>       int ret;
>>   -    ret = amdxdna_pm_resume_get(ndev->xdna);
>> -    if (ret)
>> -        return ret;
>> -
>>       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>> ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>       if (ret) {
>>           XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>>                ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>> -        goto suspend_put;
>> +        return ret;
>>       }
>>       ndev->npuclk_freq = freq;
>>   @@ -85,10 +80,9 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>       if (ret) {
>>           XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>                ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>> -        goto suspend_put;
>> +        return ret;
>>       }
>>   -    amdxdna_pm_suspend_put(ndev->xdna);
>>       ndev->hclk_freq = freq;
>>       ndev->dpm_level = dpm_level;
>>       ndev->max_tops = 2 * ndev->total_col;
>> @@ -98,35 +92,26 @@ int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>            ndev->npuclk_freq, ndev->hclk_freq);
>>         return 0;
>> -
>> -suspend_put:
>> -    amdxdna_pm_suspend_put(ndev->xdna);
>> -    return ret;
>>   }
>>     int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>   {
>>       int ret;
>>   -    ret = amdxdna_pm_resume_get(ndev->xdna);
>> -    if (ret)
>> -        return ret;
>> -
>>       ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, 
>> dpm_level, NULL);
>>       if (ret) {
>>           XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
>>                dpm_level, ret);
>> -        goto suspend_put;
>> +        return ret;
>>       }
>>         ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, 
>> dpm_level, NULL);
>>       if (ret) {
>>           XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>>                dpm_level, ret);
>> -        goto suspend_put;
>> +        return ret;
>>       }
>>   -    amdxdna_pm_suspend_put(ndev->xdna);
>>       ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>       ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>       ndev->dpm_level = dpm_level;
>> @@ -137,10 +122,6 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, 
>> u32 dpm_level)
>>            ndev->npuclk_freq, ndev->hclk_freq);
>>         return 0;
>> -
>> -suspend_put:
>> -    amdxdna_pm_suspend_put(ndev->xdna);
>> -    return ret;
>>   }
>>     int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h 
>> b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> index c99477f5e454..0d50c4c8b353 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>> @@ -101,7 +101,6 @@ struct amdxdna_dev {
>>       struct amdxdna_fw_ver        fw_ver;
>>       struct rw_semaphore        notifier_lock; /* for mmu notifier*/
>>       struct workqueue_struct        *notifier_wq;
>> -    bool                rpm_on;
>>   };
>>     /*
>> diff --git a/drivers/accel/amdxdna/amdxdna_pm.c 
>> b/drivers/accel/amdxdna/amdxdna_pm.c
>> index fa38e65d617c..d024d480521c 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pm.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pm.c
>> @@ -15,14 +15,9 @@ int amdxdna_pm_suspend(struct device *dev)
>>   {
>>       struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>       int ret = -EOPNOTSUPP;
>> -    bool rpm;
>>   -    if (xdna->dev_info->ops->suspend) {
>> -        rpm = xdna->rpm_on;
>> -        xdna->rpm_on = false;
>> +    if (xdna->dev_info->ops->suspend)
>>           ret = xdna->dev_info->ops->suspend(xdna);
>> -        xdna->rpm_on = rpm;
>> -    }
>>         XDNA_DBG(xdna, "Suspend done ret %d", ret);
>>       return ret;
>> @@ -32,14 +27,9 @@ int amdxdna_pm_resume(struct device *dev)
>>   {
>>       struct amdxdna_dev *xdna = to_xdna_dev(dev_get_drvdata(dev));
>>       int ret = -EOPNOTSUPP;
>> -    bool rpm;
>>   -    if (xdna->dev_info->ops->resume) {
>> -        rpm = xdna->rpm_on;
>> -        xdna->rpm_on = false;
>> +    if (xdna->dev_info->ops->resume)
>>           ret = xdna->dev_info->ops->resume(xdna);
>> -        xdna->rpm_on = rpm;
>> -    }
>>         XDNA_DBG(xdna, "Resume done ret %d", ret);
>>       return ret;
>> @@ -50,9 +40,6 @@ int amdxdna_pm_resume_get(struct amdxdna_dev *xdna)
>>       struct device *dev = xdna->ddev.dev;
>>       int ret;
>>   -    if (!xdna->rpm_on)
>> -        return 0;
>> -
>>       ret = pm_runtime_resume_and_get(dev);
>>       if (ret) {
>>           XDNA_ERR(xdna, "Resume failed: %d", ret);
>> @@ -66,9 +53,6 @@ void amdxdna_pm_suspend_put(struct amdxdna_dev *xdna)
>>   {
>>       struct device *dev = xdna->ddev.dev;
>>   -    if (!xdna->rpm_on)
>> -        return;
>> -
>>       pm_runtime_put_autosuspend(dev);
>>   }
>>   @@ -81,14 +65,12 @@ void amdxdna_pm_init(struct amdxdna_dev *xdna)
>>       pm_runtime_use_autosuspend(dev);
>>       pm_runtime_allow(dev);
>>       pm_runtime_put_autosuspend(dev);
>> -    xdna->rpm_on = true;
>>   }
>>     void amdxdna_pm_fini(struct amdxdna_dev *xdna)
>>   {
>>       struct device *dev = xdna->ddev.dev;
>>   -    xdna->rpm_on = false;
>>       pm_runtime_get_noresume(dev);
>>       pm_runtime_forbid(dev);
>>   }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ