lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0a1fa486-6922-4369-863e-6d1ab09a6dba@amd.com>
Date: Wed, 11 Dec 2024 14:55:46 -0600
From: Mario Limonciello <mario.limonciello@....com>
To: Lizhi Hou <lizhi.hou@....com>, ogabbay@...nel.org,
 quic_jhugo@...cinc.com, dri-devel@...ts.freedesktop.org
Cc: linux-kernel@...r.kernel.org, min.ma@....com, max.zhen@....com,
 sonal.santan@....com, king.tam@....com,
 Narendra Gutta <VenkataNarendraKumar.Gutta@....com>,
 George Yang <George.Yang@....com>
Subject: Re: [PATCH V2 6/8] accel/amdxdna: Enhance power management settings

On 12/10/2024 23:28, Lizhi Hou wrote:
> 
> On 12/10/24 16:28, Mario Limonciello wrote:
>> On 12/6/2024 15:59, Lizhi Hou wrote:
>>> Add SET_STATE ioctl to configure device power mode for aie2 device.
>>> Three modes are supported initially.
>>>
>>> POWER_MODE_DEFAULT: Enable clock gating and set DPM (Dynamic Power
>>> Management) level to value which has been set by resource solver or
>>> maximum DPM level the device supports.
>>>
>>> POWER_MODE_HIGH: Enable clock gating and set DPM level to maximum DPM
>>> level the device supports.
>>>
>>> POWER_MODE_TURBO: Disable clock gating and set DPM level to maximum DPM
>>> level the device supports.
>>>
>>> Disabling clock gating means all clocks always run on full speed. And
>>> the different clock frequency are used based on DPM level been set.
>>> Initially, the driver set the power mode to default mode.
>>>
>>> Co-developed-by: Narendra Gutta <VenkataNarendraKumar.Gutta@....com>
>>> Signed-off-by: Narendra Gutta <VenkataNarendraKumar.Gutta@....com>
>>> Co-developed-by: George Yang <George.Yang@....com>
>>> Signed-off-by: George Yang <George.Yang@....com>
>>> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
>>> ---
>>>   drivers/accel/amdxdna/Makefile          |   1 +
>>>   drivers/accel/amdxdna/TODO              |   1 -
>>>   drivers/accel/amdxdna/aie2_ctx.c        |   6 ++
>>>   drivers/accel/amdxdna/aie2_message.c    |   9 +-
>>>   drivers/accel/amdxdna/aie2_pci.c        | 136 +++++++++++++++++++-----
>>>   drivers/accel/amdxdna/aie2_pci.h        |  55 ++++++++--
>>>   drivers/accel/amdxdna/aie2_pm.c         | 108 +++++++++++++++++++
>>>   drivers/accel/amdxdna/aie2_smu.c        |  85 +++++++++------
>>>   drivers/accel/amdxdna/aie2_solver.c     |  59 +++++++++-
>>>   drivers/accel/amdxdna/aie2_solver.h     |   1 +
>>>   drivers/accel/amdxdna/amdxdna_pci_drv.c |  19 ++++
>>>   drivers/accel/amdxdna/amdxdna_pci_drv.h |   2 +
>>>   drivers/accel/amdxdna/npu1_regs.c       |  29 +++--
>>>   drivers/accel/amdxdna/npu2_regs.c       |  15 +--
>>>   drivers/accel/amdxdna/npu4_regs.c       |  32 ++++--
>>>   drivers/accel/amdxdna/npu5_regs.c       |  15 +--
>>>   drivers/accel/amdxdna/npu6_regs.c       |  19 ++--
>>>   include/uapi/drm/amdxdna_accel.h        |  52 +++++++++
>>>   18 files changed, 516 insertions(+), 128 deletions(-)
>>>   create mode 100644 drivers/accel/amdxdna/aie2_pm.c
>>>
>>> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/ 
>>> Makefile
>>> index 6baf181298de..0e9adf6890a0 100644
>>> --- a/drivers/accel/amdxdna/Makefile
>>> +++ b/drivers/accel/amdxdna/Makefile
>>> @@ -5,6 +5,7 @@ amdxdna-y := \
>>>       aie2_error.o \
>>>       aie2_message.o \
>>>       aie2_pci.o \
>>> +    aie2_pm.o \
>>>       aie2_psp.o \
>>>       aie2_smu.o \
>>>       aie2_solver.o \
>>> diff --git a/drivers/accel/amdxdna/TODO b/drivers/accel/amdxdna/TODO
>>> index de4e1dbc8868..5119bccd1917 100644
>>> --- a/drivers/accel/amdxdna/TODO
>>> +++ b/drivers/accel/amdxdna/TODO
>>> @@ -1,4 +1,3 @@
>>>   - Add import and export BO support
>>>   - Add debugfs support
>>>   - Add debug BO support
>>> -- Improve power management
>>> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/ 
>>> amdxdna/aie2_ctx.c
>>> index 07eecb40767f..6b4e6fcb7794 100644
>>> --- a/drivers/accel/amdxdna/aie2_ctx.c
>>> +++ b/drivers/accel/amdxdna/aie2_ctx.c
>>> @@ -518,6 +518,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>       struct drm_gpu_scheduler *sched;
>>>       struct amdxdna_hwctx_priv *priv;
>>>       struct amdxdna_gem_obj *heap;
>>> +    struct amdxdna_dev_hdl *ndev;
>>>       int i, ret;
>>>         priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
>>> @@ -612,6 +613,8 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>       }
>>>         hwctx->status = HWCTX_STAT_INIT;
>>> +    ndev = xdna->dev_handle;
>>> +    ndev->hwctx_num++;
>>>         XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
>>>   @@ -641,10 +644,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
>>>     void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
>>>   {
>>> +    struct amdxdna_dev_hdl *ndev;
>>>       struct amdxdna_dev *xdna;
>>>       int idx;
>>>         xdna = hwctx->client->xdna;
>>> +    ndev = xdna->dev_handle;
>>> +    ndev->hwctx_num--;
>>>       drm_sched_wqueue_stop(&hwctx->priv->sched);
>>>         /* Now, scheduler will not send command to device. */
>>> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/ 
>>> amdxdna/aie2_message.c
>>> index fc33a158d223..13b5a96f8d25 100644
>>> --- a/drivers/accel/amdxdna/aie2_message.c
>>> +++ b/drivers/accel/amdxdna/aie2_message.c
>>> @@ -70,11 +70,18 @@ int aie2_resume_fw(struct amdxdna_dev_hdl *ndev)
>>>   int aie2_set_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>>> u64 value)
>>>   {
>>>       DECLARE_AIE2_MSG(set_runtime_cfg, MSG_OP_SET_RUNTIME_CONFIG);
>>> +    int ret;
>>>         req.type = type;
>>>       req.value = value;
>>>   -    return aie2_send_mgmt_msg_wait(ndev, &msg);
>>> +    ret = aie2_send_mgmt_msg_wait(ndev, &msg);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->xdna, "Failed to set runtime config, ret %d", 
>>> ret);
>>> +        return ret;
>>> +    }
>>> +
>>> +    return 0;
>>>   }
>>>     int aie2_get_runtime_cfg(struct amdxdna_dev_hdl *ndev, u32 type, 
>>> u64 *value)
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/ 
>>> amdxdna/aie2_pci.c
>>> index 83abd16ade11..489744a2e226 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.c
>>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>>> @@ -109,28 +109,26 @@ static int aie2_get_mgmt_chann_info(struct 
>>> amdxdna_dev_hdl *ndev)
>>>       return 0;
>>>   }
>>>   -static int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev)
>>> +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>>> +             enum rt_config_category category, u32 *val)
>>>   {
>>> -    const struct rt_config *cfg = &ndev->priv->rt_config;
>>> -    u64 value;
>>> +    const struct rt_config *cfg;
>>> +    u32 value;
>>>       int ret;
>>>   -    ret = aie2_set_runtime_cfg(ndev, cfg->type, cfg->value);
>>> -    if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Set runtime type %d value %d failed",
>>> -             cfg->type, cfg->value);
>>> -        return ret;
>>> -    }
>>> +    for (cfg = ndev->priv->rt_config; cfg->type; cfg++) {
>>> +        if (cfg->category != category)
>>> +            continue;
>>>   -    ret = aie2_get_runtime_cfg(ndev, cfg->type, &value);
>>> -    if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Get runtime cfg failed");
>>> -        return ret;
>>> +        value = val ? *val : cfg->value;
>>> +        ret = aie2_set_runtime_cfg(ndev, cfg->type, value);
>>> +        if (ret) {
>>> +            XDNA_ERR(ndev->xdna, "Set type %d value %d failed",
>>> +                 cfg->type, value);
>>> +            return ret;
>>> +        }
>>>       }
>>>   -    if (value != cfg->value)
>>> -        return -EINVAL;
>>> -
>>>       return 0;
>>>   }
>>>   @@ -163,7 +161,7 @@ static int aie2_mgmt_fw_init(struct 
>>> amdxdna_dev_hdl *ndev)
>>>           return ret;
>>>       }
>>>   -    ret = aie2_runtime_cfg(ndev);
>>> +    ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_INIT, NULL);
>>>       if (ret) {
>>>           XDNA_ERR(ndev->xdna, "Runtime config failed");
>>>           return ret;
>>> @@ -257,9 +255,25 @@ static int aie2_xrs_unload(void *cb_arg)
>>>       return ret;
>>>   }
>>>   +static int aie2_xrs_set_dft_dpm_level(struct drm_device *ddev, u32 
>>> dpm_level)
>>> +{
>>> +    struct amdxdna_dev *xdna = to_xdna_dev(ddev);
>>> +    struct amdxdna_dev_hdl *ndev;
>>> +
>>> +    drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>
>> This is a reinvented lockdep_assert_held() no?
>> Or is there some nuance I'm missing?
>>
>> I would suggest switching to lockdep_assert_held().
> 
> lockdep_assert_held() relies on CONFIG_LOCKDEP which might be off.
> 
> And there are similar use cases in drm, e.g.
> 
> In drm_probe_helper.c:
> 
>     drm_WARN_ON(dev, !mutex_is_locked(&dev->mode_config.mutex));
> 

OK thanks. In that case I don't have any concerns.

Reviewed-by: Mario Limonciello <mario.limonciello@....com>

>>
>>> +
>>> +    ndev = xdna->dev_handle;
>>> +    ndev->dft_dpm_level = dpm_level;
>>> +    if (ndev->pw_mode != POWER_MODE_DEFAULT || ndev->dpm_level == 
>>> dpm_level)
>>> +        return 0;
>>> +
>>> +    return ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>>> +}
>>> +
>>>   static struct xrs_action_ops aie2_xrs_actions = {
>>>       .load = aie2_xrs_load,
>>>       .unload = aie2_xrs_unload,
>>> +    .set_dft_dpm_level = aie2_xrs_set_dft_dpm_level,
>>>   };
>>>     static void aie2_hw_stop(struct amdxdna_dev *xdna)
>>> @@ -354,6 +368,12 @@ static int aie2_hw_start(struct amdxdna_dev *xdna)
>>>           goto stop_psp;
>>>       }
>>>   +    ret = aie2_pm_init(ndev);
>>> +    if (ret) {
>>> +        XDNA_ERR(xdna, "failed to init pm, ret %d", ret);
>>> +        goto destroy_mgmt_chann;
>>> +    }
>>> +
>>>       ret = aie2_mgmt_fw_init(ndev);
>>>       if (ret) {
>>>           XDNA_ERR(xdna, "initial mgmt firmware failed, ret %d", ret);
>>> @@ -480,10 +500,9 @@ static int aie2_init(struct amdxdna_dev *xdna)
>>>       }
>>>       ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
>>>   -    xrs_cfg.clk_list.num_levels = 3;
>>> -    xrs_cfg.clk_list.cu_clk_list[0] = 0;
>>> -    xrs_cfg.clk_list.cu_clk_list[1] = 800;
>>> -    xrs_cfg.clk_list.cu_clk_list[2] = 1000;
>>> +    xrs_cfg.clk_list.num_levels = ndev->max_dpm_level + 1;
>>> +    for (i = 0; i < xrs_cfg.clk_list.num_levels; i++)
>>> +        xrs_cfg.clk_list.cu_clk_list[i] = ndev->priv- 
>>> >dpm_clk_tbl[i].hclk;
>>>       xrs_cfg.sys_eff_factor = 1;
>>>       xrs_cfg.ddev = &xdna->ddev;
>>>       xrs_cfg.actions = &aie2_xrs_actions;
>>> @@ -657,6 +676,22 @@ static int aie2_get_firmware_version(struct 
>>> amdxdna_client *client,
>>>       return 0;
>>>   }
>>>   +static int aie2_get_power_mode(struct amdxdna_client *client,
>>> +                   struct amdxdna_drm_get_info *args)
>>> +{
>>> +    struct amdxdna_drm_get_power_mode mode = {};
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +    struct amdxdna_dev_hdl *ndev;
>>> +
>>> +    ndev = xdna->dev_handle;
>>> +    mode.power_mode = ndev->pw_mode;
>>> +
>>> +    if (copy_to_user(u64_to_user_ptr(args->buffer), &mode, 
>>> sizeof(mode)))
>>> +        return -EFAULT;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>   static int aie2_get_clock_metadata(struct amdxdna_client *client,
>>>                      struct amdxdna_drm_get_info *args)
>>>   {
>>> @@ -670,11 +705,11 @@ static int aie2_get_clock_metadata(struct 
>>> amdxdna_client *client,
>>>       if (!clock)
>>>           return -ENOMEM;
>>>   -    memcpy(clock->mp_npu_clock.name, ndev->mp_npu_clock.name,
>>> -           sizeof(clock->mp_npu_clock.name));
>>> -    clock->mp_npu_clock.freq_mhz = ndev->mp_npu_clock.freq_mhz;
>>> -    memcpy(clock->h_clock.name, ndev->h_clock.name, sizeof(clock- 
>>> >h_clock.name));
>>> -    clock->h_clock.freq_mhz = ndev->h_clock.freq_mhz;
>>> +    snprintf(clock->mp_npu_clock.name, sizeof(clock- 
>>> >mp_npu_clock.name),
>>> +         "MP-NPU Clock");
>>> +    clock->mp_npu_clock.freq_mhz = ndev->npuclk_freq;
>>> +    snprintf(clock->h_clock.name, sizeof(clock->h_clock.name), "H 
>>> Clock");
>>> +    clock->h_clock.freq_mhz = ndev->hclk_freq;
>>>         if (copy_to_user(u64_to_user_ptr(args->buffer), clock, 
>>> sizeof(*clock)))
>>>           ret = -EFAULT;
>>> @@ -772,6 +807,9 @@ static int aie2_get_info(struct amdxdna_client 
>>> *client, struct amdxdna_drm_get_i
>>>       case DRM_AMDXDNA_QUERY_FIRMWARE_VERSION:
>>>           ret = aie2_get_firmware_version(client, args);
>>>           break;
>>> +    case DRM_AMDXDNA_GET_POWER_MODE:
>>> +        ret = aie2_get_power_mode(client, args);
>>> +        break;
>>>       default:
>>>           XDNA_ERR(xdna, "Not supported request parameter %u", args- 
>>> >param);
>>>           ret = -EOPNOTSUPP;
>>> @@ -782,12 +820,58 @@ static int aie2_get_info(struct amdxdna_client 
>>> *client, struct amdxdna_drm_get_i
>>>       return ret;
>>>   }
>>>   +static int aie2_set_power_mode(struct amdxdna_client *client,
>>> +                   struct amdxdna_drm_set_state *args)
>>> +{
>>> +    struct amdxdna_drm_set_power_mode power_state;
>>> +    enum amdxdna_power_mode_type power_mode;
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +
>>> +    if (copy_from_user(&power_state, u64_to_user_ptr(args->buffer),
>>> +               sizeof(power_state))) {
>>> +        XDNA_ERR(xdna, "Failed to copy power mode request into 
>>> kernel");
>>> +        return -EFAULT;
>>> +    }
>>> +
>>> +    power_mode = power_state.power_mode;
>>> +    if (power_mode > POWER_MODE_TURBO) {
>>> +        XDNA_ERR(xdna, "Invalid power mode %d", power_mode);
>>> +        return -EINVAL;
>>> +    }
>>> +
>>> +    return aie2_pm_set_mode(xdna->dev_handle, power_mode);
>>> +}
>>> +
>>> +static int aie2_set_state(struct amdxdna_client *client,
>>> +              struct amdxdna_drm_set_state *args)
>>> +{
>>> +    struct amdxdna_dev *xdna = client->xdna;
>>> +    int ret, idx;
>>> +
>>> +    if (!drm_dev_enter(&xdna->ddev, &idx))
>>> +        return -ENODEV;
>>> +
>>> +    switch (args->param) {
>>> +    case DRM_AMDXDNA_SET_POWER_MODE:
>>> +        ret = aie2_set_power_mode(client, args);
>>> +        break;
>>> +    default:
>>> +        XDNA_ERR(xdna, "Not supported request parameter %u", args- 
>>> >param);
>>> +        ret = -EOPNOTSUPP;
>>> +        break;
>>> +    }
>>> +
>>> +    drm_dev_exit(idx);
>>> +    return ret;
>>> +}
>>> +
>>>   const struct amdxdna_dev_ops aie2_ops = {
>>>       .init           = aie2_init,
>>>       .fini           = aie2_fini,
>>>       .resume         = aie2_hw_start,
>>>       .suspend        = aie2_hw_stop,
>>>       .get_aie_info   = aie2_get_info,
>>> +    .set_aie_state    = aie2_set_state,
>>>       .hwctx_init     = aie2_hwctx_init,
>>>       .hwctx_fini     = aie2_hwctx_fini,
>>>       .hwctx_config   = aie2_hwctx_config,
>>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/ 
>>> amdxdna/aie2_pci.h
>>> index 1c6f07d9b805..8c17b74654ce 100644
>>> --- a/drivers/accel/amdxdna/aie2_pci.h
>>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>>> @@ -6,6 +6,7 @@
>>>   #ifndef _AIE2_PCI_H_
>>>   #define _AIE2_PCI_H_
>>>   +#include <drm/amdxdna_accel.h>
>>>   #include <linux/semaphore.h>
>>>     #include "amdxdna_mailbox.h"
>>> @@ -48,9 +49,6 @@
>>>       pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info- 
>>> >mbox_bar); \
>>>   })
>>>   -#define SMU_MPNPUCLK_FREQ_MAX(ndev) ((ndev)->priv- 
>>> >smu_mpnpuclk_freq_max)
>>> -#define SMU_HCLK_FREQ_MAX(ndev) ((ndev)->priv->smu_hclk_freq_max)
>>> -
>>>   enum aie2_smu_reg_idx {
>>>       SMU_CMD_REG = 0,
>>>       SMU_ARG_REG,
>>> @@ -112,14 +110,20 @@ struct aie_metadata {
>>>       struct aie_tile_metadata shim;
>>>   };
>>>   -struct clock_entry {
>>> -    char name[16];
>>> -    u32 freq_mhz;
>>> +enum rt_config_category {
>>> +    AIE2_RT_CFG_INIT,
>>> +    AIE2_RT_CFG_CLK_GATING,
>>>   };
>>>     struct rt_config {
>>>       u32    type;
>>>       u32    value;
>>> +    u32    category;
>>> +};
>>> +
>>> +struct dpm_clk_freq {
>>> +    u32    npuclk;
>>> +    u32    hclk;
>>>   };
>>>     /*
>>> @@ -150,6 +154,7 @@ struct amdxdna_hwctx_priv {
>>>   };
>>>     enum aie2_dev_status {
>>> +    AIE2_DEV_UNINIT,
>>>       AIE2_DEV_INIT,
>>>       AIE2_DEV_START,
>>>   };
>>> @@ -169,8 +174,15 @@ struct amdxdna_dev_hdl {
>>>       u32                total_col;
>>>       struct aie_version        version;
>>>       struct aie_metadata        metadata;
>>> -    struct clock_entry        mp_npu_clock;
>>> -    struct clock_entry        h_clock;
>>> +
>>> +    /* power management and clock*/
>>> +    enum amdxdna_power_mode_type    pw_mode;
>>> +    u32                dpm_level;
>>> +    u32                dft_dpm_level;
>>> +    u32                max_dpm_level;
>>> +    u32                clk_gating;
>>> +    u32                npuclk_freq;
>>> +    u32                hclk_freq;
>>>         /* Mailbox and the management channel */
>>>       struct mailbox            *mbox;
>>> @@ -178,6 +190,7 @@ struct amdxdna_dev_hdl {
>>>       struct async_events        *async_events;
>>>         enum aie2_dev_status        dev_status;
>>> +    u32                hwctx_num;
>>>   };
>>>     #define DEFINE_BAR_OFFSET(reg_name, bar, reg_addr) \
>>> @@ -188,11 +201,17 @@ struct aie2_bar_off_pair {
>>>       u32    offset;
>>>   };
>>>   +struct aie2_hw_ops {
>>> +    int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> +};
>>> +
>>>   struct amdxdna_dev_priv {
>>>       const char            *fw_path;
>>>       u64                protocol_major;
>>>       u64                protocol_minor;
>>> -    struct rt_config        rt_config;
>>> +    const struct rt_config        *rt_config;
>>> +    const struct dpm_clk_freq    *dpm_clk_tbl;
>>> +
>>>   #define COL_ALIGN_NONE   0
>>>   #define COL_ALIGN_NATURE 1
>>>       u32                col_align;
>>> @@ -203,15 +222,29 @@ struct amdxdna_dev_priv {
>>>       struct aie2_bar_off_pair    sram_offs[SRAM_MAX_INDEX];
>>>       struct aie2_bar_off_pair    psp_regs_off[PSP_MAX_REGS];
>>>       struct aie2_bar_off_pair    smu_regs_off[SMU_MAX_REGS];
>>> -    u32                smu_mpnpuclk_freq_max;
>>> -    u32                smu_hclk_freq_max;
>>> +    struct aie2_hw_ops        hw_ops;
>>>   };
>>>     extern const struct amdxdna_dev_ops aie2_ops;
>>>   +int aie2_runtime_cfg(struct amdxdna_dev_hdl *ndev,
>>> +             enum rt_config_category category, u32 *val);
>>> +
>>> +/* aie2 npu hw config */
>>> +extern const struct dpm_clk_freq npu1_dpm_clk_table[];
>>> +extern const struct dpm_clk_freq npu4_dpm_clk_table[];
>>> +extern const struct rt_config npu1_default_rt_cfg[];
>>> +extern const struct rt_config npu4_default_rt_cfg[];
>>> +
>>>   /* aie2_smu.c */
>>>   int aie2_smu_init(struct amdxdna_dev_hdl *ndev);
>>>   void aie2_smu_fini(struct amdxdna_dev_hdl *ndev);
>>> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
>>> +
>>> +/* aie2_pm.c */
>>> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev);
>>> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>>> amdxdna_power_mode_type target);
>>>     /* aie2_psp.c */
>>>   struct psp_device *aie2m_psp_create(struct drm_device *ddev, struct 
>>> psp_config *conf);
>>> diff --git a/drivers/accel/amdxdna/aie2_pm.c b/drivers/accel/amdxdna/ 
>>> aie2_pm.c
>>> new file mode 100644
>>> index 000000000000..426c38fce848
>>> --- /dev/null
>>> +++ b/drivers/accel/amdxdna/aie2_pm.c
>>> @@ -0,0 +1,108 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (C) 2024, Advanced Micro Devices, Inc.
>>> + */
>>> +
>>> +#include <drm/amdxdna_accel.h>
>>> +#include <drm/drm_device.h>
>>> +#include <drm/drm_print.h>
>>> +#include <drm/gpu_scheduler.h>
>>> +
>>> +#include "aie2_pci.h"
>>> +#include "amdxdna_pci_drv.h"
>>> +
>>> +#define AIE2_CLK_GATING_ENABLE    1
>>> +#define AIE2_CLK_GATING_DISABLE    0
>>> +
>>> +static int aie2_pm_set_clk_gating(struct amdxdna_dev_hdl *ndev, u32 
>>> val)
>>> +{
>>> +    int ret;
>>> +
>>> +    ret = aie2_runtime_cfg(ndev, AIE2_RT_CFG_CLK_GATING, &val);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ndev->clk_gating = val;
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_pm_init(struct amdxdna_dev_hdl *ndev)
>>> +{
>>> +    int ret;
>>> +
>>> +    if (ndev->dev_status != AIE2_DEV_UNINIT) {
>>> +        /* Resume device */
>>> +        ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->dpm_level);
>>> +        if (ret)
>>> +            return ret;
>>> +
>>> +        ret = aie2_pm_set_clk_gating(ndev, ndev->clk_gating);
>>> +        if (ret)
>>> +            return ret;
>>> +
>>> +        return 0;
>>> +    }
>>> +
>>> +    while (ndev->priv->dpm_clk_tbl[ndev->max_dpm_level].hclk)
>>> +        ndev->max_dpm_level++;
>>> +    ndev->max_dpm_level--;
>>> +
>>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, ndev->max_dpm_level);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ret = aie2_pm_set_clk_gating(ndev, AIE2_CLK_GATING_ENABLE);
>>> +    if (ret)
>>> +        return ret;
>>
>> In the event of a failure do you want to try to restore dpm where it was?
> 
> This is initialization routine. If it fails, that indicates a firmware/ 
> hardware issue. It might not need to do more on a broken fw/hw.
> 
> And the driver will not be probe in this case.
> 
> 
> Thanks,
> 
> Lizhi
> 
>>
>>> +
>>> +    ndev->pw_mode = POWER_MODE_DEFAULT;
>>> +    ndev->dft_dpm_level = ndev->max_dpm_level;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_pm_set_mode(struct amdxdna_dev_hdl *ndev, enum 
>>> amdxdna_power_mode_type target)
>>> +{
>>> +    struct amdxdna_dev *xdna = ndev->xdna;
>>> +    u32 clk_gating, dpm_level;
>>> +    int ret;
>>> +
>>> +    drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
>>
>> lockdep_assert_held()
>>
>>> +
>>> +    if (ndev->pw_mode == target)
>>> +        return 0;
>>> +
>>> +    switch (target) {
>>> +    case POWER_MODE_TURBO:
>>> +        if (ndev->hwctx_num) {
>>> +            XDNA_ERR(xdna, "Can not set turbo when there is active 
>>> hwctx");
>>> +            return -EINVAL;
>>> +        }
>>> +
>>> +        clk_gating = AIE2_CLK_GATING_DISABLE;
>>> +        dpm_level = ndev->max_dpm_level;
>>> +        break;
>>> +    case POWER_MODE_HIGH:
>>> +        clk_gating = AIE2_CLK_GATING_ENABLE;
>>> +        dpm_level = ndev->max_dpm_level;
>>> +        break;
>>> +    case POWER_MODE_DEFAULT:
>>> +        clk_gating = AIE2_CLK_GATING_ENABLE;
>>> +        dpm_level = ndev->dft_dpm_level;
>>> +        break;
>>> +    default:
>>> +        return -EOPNOTSUPP;
>>> +    }
>>> +
>>> +    ret = ndev->priv->hw_ops.set_dpm(ndev, dpm_level);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ret = aie2_pm_set_clk_gating(ndev, clk_gating);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    ndev->pw_mode = target;
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/drivers/accel/amdxdna/aie2_smu.c b/drivers/accel/ 
>>> amdxdna/aie2_smu.c
>>> index 91893d438da7..73388443c676 100644
>>> --- a/drivers/accel/amdxdna/aie2_smu.c
>>> +++ b/drivers/accel/amdxdna/aie2_smu.c
>>> @@ -19,8 +19,11 @@
>>>   #define AIE2_SMU_POWER_OFF        0x4
>>>   #define AIE2_SMU_SET_MPNPUCLK_FREQ    0x5
>>>   #define AIE2_SMU_SET_HCLK_FREQ        0x6
>>> +#define AIE2_SMU_SET_SOFT_DPMLEVEL    0x7
>>> +#define AIE2_SMU_SET_HARD_DPMLEVEL    0x8
>>>   -static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 
>>> reg_cmd, u32 reg_arg)
>>> +static int aie2_smu_exec(struct amdxdna_dev_hdl *ndev, u32 reg_cmd,
>>> +             u32 reg_arg, u32 *out)
>>>   {
>>>       u32 resp;
>>>       int ret;
>>> @@ -40,6 +43,9 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl 
>>> *ndev, u32 reg_cmd, u32 reg_arg)
>>>           return ret;
>>>       }
>>>   +    if (out)
>>> +        *out = readl(SMU_REG(ndev, SMU_OUT_REG));
>>> +
>>>       if (resp != SMU_RESULT_OK) {
>>>           XDNA_ERR(ndev->xdna, "smu cmd %d failed, 0x%x", reg_cmd, 
>>> resp);
>>>           return -EINVAL;
>>> @@ -48,63 +54,71 @@ static int aie2_smu_exec(struct amdxdna_dev_hdl 
>>> *ndev, u32 reg_cmd, u32 reg_arg)
>>>       return 0;
>>>   }
>>>   -static int aie2_smu_set_mpnpu_clock_freq(struct amdxdna_dev_hdl 
>>> *ndev, u32 freq_mhz)
>>> +int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>   {
>>> +    u32 freq;
>>>       int ret;
>>>   -    if (!freq_mhz || freq_mhz > SMU_MPNPUCLK_FREQ_MAX(ndev)) {
>>> -        XDNA_ERR(ndev->xdna, "invalid mpnpu clock freq %d", freq_mhz);
>>> -        return -EINVAL;
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ,
>>> + ndev->priv->dpm_clk_tbl[dpm_level].npuclk, &freq);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->xdna, "Set npu clock to %d failed, ret %d\n",
>>> +             ndev->priv->dpm_clk_tbl[dpm_level].npuclk, ret);
>>>       }
>>> +    ndev->npuclk_freq = freq;
>>>   -    ndev->mp_npu_clock.freq_mhz = freq_mhz;
>>> -    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_MPNPUCLK_FREQ, freq_mhz);
>>> -    if (!ret)
>>> -        XDNA_INFO_ONCE(ndev->xdna, "set mpnpu_clock = %d mhz", 
>>> freq_mhz);
>>> -
>>> -    return ret;
>>> -}
>>> -
>>> -static int aie2_smu_set_hclock_freq(struct amdxdna_dev_hdl *ndev, 
>>> u32 freq_mhz)
>>> -{
>>> -    int ret;
>>> -
>>> -    if (!freq_mhz || freq_mhz > SMU_HCLK_FREQ_MAX(ndev)) {
>>> -        XDNA_ERR(ndev->xdna, "invalid hclock freq %d", freq_mhz);
>>> -        return -EINVAL;
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ,
>>> +                ndev->priv->dpm_clk_tbl[dpm_level].hclk, &freq);
>>> +    if (ret) {
>>> +        XDNA_ERR(ndev->xdna, "Set h clock to %d failed, ret %d\n",
>>> +             ndev->priv->dpm_clk_tbl[dpm_level].hclk, ret);
>>>       }
>>> +    ndev->hclk_freq = freq;
>>> +    ndev->dpm_level = dpm_level;
>>>   -    ndev->h_clock.freq_mhz = freq_mhz;
>>> -    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HCLK_FREQ, freq_mhz);
>>> -    if (!ret)
>>> -        XDNA_INFO_ONCE(ndev->xdna, "set npu_hclock = %d mhz", 
>>> freq_mhz);
>>> +    XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
>>> +         ndev->npuclk_freq, ndev->hclk_freq);
>>>   -    return ret;
>>> +    return 0;
>>>   }
>>>   -int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>> +int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
>>>   {
>>>       int ret;
>>>   -    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0);
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_HARD_DPMLEVEL, dpm_level, 
>>> NULL);
>>>       if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>>> +        XDNA_ERR(ndev->xdna, "Set hard dpm level %d failed, ret %d ",
>>> +             dpm_level, ret);
>>>           return ret;
>>>       }
>>>   -    ret = aie2_smu_set_mpnpu_clock_freq(ndev, 
>>> SMU_MPNPUCLK_FREQ_MAX(ndev));
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_SET_SOFT_DPMLEVEL, dpm_level, 
>>> NULL);
>>>       if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Set mpnpu clk freq failed, ret %d", ret);
>>> +        XDNA_ERR(ndev->xdna, "Set soft dpm level %d failed, ret %d",
>>> +             dpm_level, ret);
>>>           return ret;
>>>       }
>>> -    snprintf(ndev->mp_npu_clock.name, sizeof(ndev- 
>>> >mp_npu_clock.name), "MP-NPU Clock");
>>>   -    ret = aie2_smu_set_hclock_freq(ndev, SMU_HCLK_FREQ_MAX(ndev));
>>> +    ndev->npuclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].npuclk;
>>> +    ndev->hclk_freq = ndev->priv->dpm_clk_tbl[dpm_level].hclk;
>>> +    ndev->dpm_level = dpm_level;
>>> +
>>> +    XDNA_DBG(ndev->xdna, "MP-NPU clock %d, H clock %d\n",
>>> +         ndev->npuclk_freq, ndev->hclk_freq);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +int aie2_smu_init(struct amdxdna_dev_hdl *ndev)
>>> +{
>>> +    int ret;
>>> +
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_ON, 0, NULL);
>>>       if (ret) {
>>> -        XDNA_ERR(ndev->xdna, "Set hclk freq failed, ret %d", ret);
>>> +        XDNA_ERR(ndev->xdna, "Power on failed, ret %d", ret);
>>>           return ret;
>>>       }
>>> -    snprintf(ndev->h_clock.name, sizeof(ndev->h_clock.name), "H 
>>> Clock");
>>>         return 0;
>>>   }
>>> @@ -113,7 +127,8 @@ void aie2_smu_fini(struct amdxdna_dev_hdl *ndev)
>>>   {
>>>       int ret;
>>>   -    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0);
>>> +    ndev->priv->hw_ops.set_dpm(ndev, 0);
>>> +    ret = aie2_smu_exec(ndev, AIE2_SMU_POWER_OFF, 0, NULL);
>>>       if (ret)
>>>           XDNA_ERR(ndev->xdna, "Power off failed, ret %d", ret);
>>>   }
>>> diff --git a/drivers/accel/amdxdna/aie2_solver.c b/drivers/accel/ 
>>> amdxdna/aie2_solver.c
>>> index a537c66589a4..1939625d6027 100644
>>> --- a/drivers/accel/amdxdna/aie2_solver.c
>>> +++ b/drivers/accel/amdxdna/aie2_solver.c
>>> @@ -25,6 +25,7 @@ struct solver_node {
>>>         struct partition_node    *pt_node;
>>>       void            *cb_arg;
>>> +    u32            dpm_level;
>>>       u32            cols_len;
>>>       u32            start_cols[] __counted_by(cols_len);
>>>   };
>>> @@ -95,6 +96,51 @@ static int sanity_check(struct solver_state *xrs, 
>>> struct alloc_requests *req)
>>>       return 0;
>>>   }
>>>   +static bool is_valid_qos_dpm_params(struct aie_qos *rqos)
>>> +{
>>> +    /*
>>> +     * gops is retrieved from the xmodel, so it's always set
>>> +     * fps and latency are the configurable params from the application
>>> +     */
>>> +    if (rqos->gops > 0 && (rqos->fps > 0 ||  rqos->latency > 0))
>>> +        return true;
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +static int set_dpm_level(struct solver_state *xrs, struct 
>>> alloc_requests *req, u32 *dpm_level)
>>> +{
>>> +    struct solver_rgroup *rgp = &xrs->rgp;
>>> +    struct cdo_parts *cdop = &req->cdo;
>>> +    struct aie_qos *rqos = &req->rqos;
>>> +    u32 freq, max_dpm_level, level;
>>> +    struct solver_node *node;
>>> +
>>> +    max_dpm_level = xrs->cfg.clk_list.num_levels - 1;
>>> +    /* If no QoS parameters are passed, set it to the max DPM level */
>>> +    if (!is_valid_qos_dpm_params(rqos)) {
>>> +        level = max_dpm_level;
>>> +        goto set_dpm;
>>> +    }
>>> +
>>> +    /* Find one CDO group that meet the GOPs requirement. */
>>> +    for (level = 0; level < max_dpm_level; level++) {
>>> +        freq = xrs->cfg.clk_list.cu_clk_list[level];
>>> +        if (!qos_meet(xrs, rqos, cdop->qos_cap.opc * freq / 1000))
>>> +            break;
>>> +    }
>>> +
>>> +    /* set the dpm level which fits all the sessions */
>>> +    list_for_each_entry(node, &rgp->node_list, list) {
>>> +        if (node->dpm_level > level)
>>> +            level = node->dpm_level;
>>> +    }
>>> +
>>> +set_dpm:
>>> +    *dpm_level = level;
>>> +    return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level);
>>> +}
>>> +
>>>   static struct solver_node *rg_search_node(struct solver_rgroup 
>>> *rgp, u64 rid)
>>>   {
>>>       struct solver_node *node;
>>> @@ -159,12 +205,9 @@ static int get_free_partition(struct 
>>> solver_state *xrs,
>>>       pt_node->ncols = ncols;
>>>         /*
>>> -     * Before fully support latency in QoS, if a request
>>> -     * specifies a non-zero latency value, it will not share
>>> -     * the partition with other requests.
>>> +     * Always set exclusive to false for now.
>>>        */
>>> -    if (req->rqos.latency)
>>> -        pt_node->exclusive = true;
>>> +    pt_node->exclusive = false;
>>>         list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
>>>       xrs->rgp.npartition_node++;
>>> @@ -257,6 +300,7 @@ int xrs_allocate_resource(void *hdl, struct 
>>> alloc_requests *req, void *cb_arg)
>>>       struct xrs_action_load load_act;
>>>       struct solver_node *snode;
>>>       struct solver_state *xrs;
>>> +    u32 dpm_level;
>>>       int ret;
>>>         xrs = (struct solver_state *)hdl;
>>> @@ -281,6 +325,11 @@ int xrs_allocate_resource(void *hdl, struct 
>>> alloc_requests *req, void *cb_arg)
>>>       if (ret)
>>>           goto free_node;
>>>   +    ret = set_dpm_level(xrs, req, &dpm_level);
>>> +    if (ret)
>>> +        goto free_node;
>>> +
>>> +    snode->dpm_level = dpm_level;
>>>       snode->cb_arg = cb_arg;
>>>         drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
>>> diff --git a/drivers/accel/amdxdna/aie2_solver.h b/drivers/accel/ 
>>> amdxdna/aie2_solver.h
>>> index 9b1847bb46a6..a2e3c52229e9 100644
>>> --- a/drivers/accel/amdxdna/aie2_solver.h
>>> +++ b/drivers/accel/amdxdna/aie2_solver.h
>>> @@ -99,6 +99,7 @@ struct clk_list_info {
>>>   struct xrs_action_ops {
>>>       int (*load)(void *cb_arg, struct xrs_action_load *action);
>>>       int (*unload)(void *cb_arg);
>>> +    int (*set_dft_dpm_level)(struct drm_device *ddev, u32 level);
>>>   };
>>>     /*
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/ 
>>> amdxdna/amdxdna_pci_drv.c
>>> index c3541796d189..6bbd437d48d8 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>>> @@ -160,6 +160,24 @@ static int amdxdna_drm_get_info_ioctl(struct 
>>> drm_device *dev, void *data, struct
>>>       return ret;
>>>   }
>>>   +static int amdxdna_drm_set_state_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>> +{
>>> +    struct amdxdna_client *client = filp->driver_priv;
>>> +    struct amdxdna_dev *xdna = to_xdna_dev(dev);
>>> +    struct amdxdna_drm_set_state *args = data;
>>> +    int ret;
>>> +
>>> +    if (!xdna->dev_info->ops->set_aie_state)
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    XDNA_DBG(xdna, "Request parameter %u", args->param);
>>> +    mutex_lock(&xdna->dev_lock);
>>> +    ret = xdna->dev_info->ops->set_aie_state(client, args);
>>> +    mutex_unlock(&xdna->dev_lock);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>   static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
>>>       /* Context */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, 
>>> amdxdna_drm_create_hwctx_ioctl, 0),
>>> @@ -173,6 +191,7 @@ static const struct drm_ioctl_desc 
>>> amdxdna_drm_ioctls[] = {
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, 
>>> amdxdna_drm_submit_cmd_ioctl, 0),
>>>       /* AIE hardware */
>>>       DRM_IOCTL_DEF_DRV(AMDXDNA_GET_INFO, amdxdna_drm_get_info_ioctl, 
>>> 0),
>>> +    DRM_IOCTL_DEF_DRV(AMDXDNA_SET_STATE, 
>>> amdxdna_drm_set_state_ioctl, DRM_ROOT_ONLY),
>>>   };
>>>     static const struct file_operations amdxdna_fops = {
>>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.h b/drivers/accel/ 
>>> amdxdna/amdxdna_pci_drv.h
>>> index f5b830fb14bb..e2071e31d949 100644
>>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.h
>>> @@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
>>>   struct amdxdna_client;
>>>   struct amdxdna_dev;
>>>   struct amdxdna_drm_get_info;
>>> +struct amdxdna_drm_set_state;
>>>   struct amdxdna_gem_obj;
>>>   struct amdxdna_hwctx;
>>>   struct amdxdna_sched_job;
>>> @@ -40,6 +41,7 @@ struct amdxdna_dev_ops {
>>>       void (*hwctx_resume)(struct amdxdna_hwctx *hwctx);
>>>       int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct 
>>> amdxdna_sched_job *job, u64 *seq);
>>>       int (*get_aie_info)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_get_info *args);
>>> +    int (*set_aie_state)(struct amdxdna_client *client, struct 
>>> amdxdna_drm_set_state *args);
>>>   };
>>>     /*
>>> diff --git a/drivers/accel/amdxdna/npu1_regs.c b/drivers/accel/ 
>>> amdxdna/npu1_regs.c
>>> index f00c50461b09..c8f4d1cac65d 100644
>>> --- a/drivers/accel/amdxdna/npu1_regs.c
>>> +++ b/drivers/accel/amdxdna/npu1_regs.c
>>> @@ -44,18 +44,30 @@
>>>   #define NPU1_SMU_BAR_BASE  MPNPU_APERTURE0_BASE
>>>   #define NPU1_SRAM_BAR_BASE MPNPU_APERTURE1_BASE
>>>   -#define NPU1_RT_CFG_TYPE_PDI_LOAD 2
>>> -#define NPU1_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU1_RT_CFG_VAL_PDI_LOAD_APP 1
>>> +const struct rt_config npu1_default_rt_cfg[] = {
>>> +    { 2, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>>> +    { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 0 },
>>> +};
>>>   -#define NPU1_MPNPUCLK_FREQ_MAX  600
>>> -#define NPU1_HCLK_FREQ_MAX      1024
>>> +const struct dpm_clk_freq npu1_dpm_clk_table[] = {
>>> +    {400, 800},
>>> +    {600, 1024},
>>> +    {600, 1024},
>>> +    {600, 1024},
>>> +    {600, 1024},
>>> +    {720, 1309},
>>> +    {720, 1309},
>>> +    {847, 1600},
>>> +    { 0 }
>>> +};
>>>     const struct amdxdna_dev_priv npu1_dev_priv = {
>>>       .fw_path        = "amdnpu/1502_00/npu.sbin",
>>>       .protocol_major = 0x5,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU1_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU1_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu1_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu1_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NONE,
>>>       .mbox_dev_addr  = NPU1_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -80,8 +92,9 @@ const struct amdxdna_dev_priv npu1_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU1_SMU, MPNPU_PUB_SCRATCH6),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU1_SMU, MPNPU_PUB_SCRATCH7),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU1_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU1_HCLK_FREQ_MAX,
>>> +    .hw_ops        = {
>>> +        .set_dpm = npu1_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu1_info = {
>>> diff --git a/drivers/accel/amdxdna/npu2_regs.c b/drivers/accel/ 
>>> amdxdna/npu2_regs.c
>>> index 00cb381031d2..ac63131f9c7c 100644
>>> --- a/drivers/accel/amdxdna/npu2_regs.c
>>> +++ b/drivers/accel/amdxdna/npu2_regs.c
>>> @@ -61,18 +61,12 @@
>>>   #define NPU2_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU2_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU2_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU2_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU2_RT_CFG_VAL_PDI_LOAD_APP 1
>>> -
>>> -#define NPU2_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU2_HCLK_FREQ_MAX      1800
>>> -
>>>   const struct amdxdna_dev_priv npu2_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_00/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU2_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU2_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU2_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu2_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU2_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU2_SMU, MP1_C2PMSG_60),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU2_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU2_HCLK_FREQ_MAX,
>>> +    .hw_ops    =     {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu2_info = {
>>> diff --git a/drivers/accel/amdxdna/npu4_regs.c b/drivers/accel/ 
>>> amdxdna/npu4_regs.c
>>> index b6dae9667cca..a713ac18adfc 100644
>>> --- a/drivers/accel/amdxdna/npu4_regs.c
>>> +++ b/drivers/accel/amdxdna/npu4_regs.c
>>> @@ -61,18 +61,33 @@
>>>   #define NPU4_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU4_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU4_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU4_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU4_RT_CFG_VAL_PDI_LOAD_APP 1
>>> +const struct rt_config npu4_default_rt_cfg[] = {
>>> +    { 5, 1, AIE2_RT_CFG_INIT }, /* PDI APP LOAD MODE */
>>> +    { 1, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 2, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 3, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 4, 1, AIE2_RT_CFG_CLK_GATING }, /* Clock gating on */
>>> +    { 0 },
>>> +};
>>>   -#define NPU4_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU4_HCLK_FREQ_MAX      1800
>>> +const struct dpm_clk_freq npu4_dpm_clk_table[] = {
>>> +    {396, 792},
>>> +    {600, 1056},
>>> +    {792, 1152},
>>> +    {975, 1267},
>>> +    {975, 1267},
>>> +    {1056, 1408},
>>> +    {1152, 1584},
>>> +    {1267, 1800},
>>> +    { 0 }
>>> +};
>>>     const struct amdxdna_dev_priv npu4_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU4_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU4_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU4_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -97,8 +112,9 @@ const struct amdxdna_dev_priv npu4_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU4_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU4_SMU, MP1_C2PMSG_60),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU4_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU4_HCLK_FREQ_MAX,
>>> +    .hw_ops        = {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu4_info = {
>>> diff --git a/drivers/accel/amdxdna/npu5_regs.c b/drivers/accel/ 
>>> amdxdna/npu5_regs.c
>>> index bed1baf8e160..67a5d5bc8a49 100644
>>> --- a/drivers/accel/amdxdna/npu5_regs.c
>>> +++ b/drivers/accel/amdxdna/npu5_regs.c
>>> @@ -61,18 +61,12 @@
>>>   #define NPU5_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU5_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU5_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU5_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU5_RT_CFG_VAL_PDI_LOAD_APP 1
>>> -
>>> -#define NPU5_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU5_HCLK_FREQ_MAX      1800
>>> -
>>>   const struct amdxdna_dev_priv npu5_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_11/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 0x1,
>>> -    .rt_config    = {NPU5_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU5_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU5_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -97,8 +91,9 @@ const struct amdxdna_dev_priv npu5_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU5_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU5_SMU, MP1_C2PMSG_60),
>>>       },
>>> -    .smu_mpnpuclk_freq_max = NPU5_MPNPUCLK_FREQ_MAX,
>>> -    .smu_hclk_freq_max     = NPU5_HCLK_FREQ_MAX,
>>> +    .hw_ops        = {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>>   };
>>>     const struct amdxdna_dev_info dev_npu5_info = {
>>> diff --git a/drivers/accel/amdxdna/npu6_regs.c b/drivers/accel/ 
>>> amdxdna/npu6_regs.c
>>> index d1168fc55533..f46c760cefc7 100644
>>> --- a/drivers/accel/amdxdna/npu6_regs.c
>>> +++ b/drivers/accel/amdxdna/npu6_regs.c
>>> @@ -61,23 +61,12 @@
>>>   #define NPU6_SMU_BAR_BASE    MMNPU_APERTURE4_BASE
>>>   #define NPU6_SRAM_BAR_BASE    MMNPU_APERTURE1_BASE
>>>   -#define NPU6_RT_CFG_TYPE_PDI_LOAD 5
>>> -#define NPU6_RT_CFG_TYPE_DEBUG_BO 10
>>> -
>>> -#define NPU6_RT_CFG_VAL_PDI_LOAD_MGMT 0
>>> -#define NPU6_RT_CFG_VAL_PDI_LOAD_APP 1
>>> -
>>> -#define NPU6_RT_CFG_VAL_DEBUG_BO_DEFAULT 0
>>> -#define NPU6_RT_CFG_VAL_DEBUG_BO_LARGE   1
>>> -
>>> -#define NPU6_MPNPUCLK_FREQ_MAX  1267
>>> -#define NPU6_HCLK_FREQ_MAX      1800
>>> -
>>>   const struct amdxdna_dev_priv npu6_dev_priv = {
>>>       .fw_path        = "amdnpu/17f0_10/npu.sbin",
>>>       .protocol_major = 0x6,
>>>       .protocol_minor = 12,
>>> -    .rt_config    = {NPU6_RT_CFG_TYPE_PDI_LOAD, 
>>> NPU6_RT_CFG_VAL_PDI_LOAD_APP},
>>> +    .rt_config    = npu4_default_rt_cfg,
>>> +    .dpm_clk_tbl    = npu4_dpm_clk_table,
>>>       .col_align    = COL_ALIGN_NATURE,
>>>       .mbox_dev_addr  = NPU6_MBOX_BAR_BASE,
>>>       .mbox_size      = 0, /* Use BAR size */
>>> @@ -102,6 +91,10 @@ const struct amdxdna_dev_priv npu6_dev_priv = {
>>>           DEFINE_BAR_OFFSET(SMU_RESP_REG, NPU6_SMU, MP1_C2PMSG_61),
>>>           DEFINE_BAR_OFFSET(SMU_OUT_REG,  NPU6_SMU, MP1_C2PMSG_60),
>>>       },
>>> +    .hw_ops         = {
>>> +        .set_dpm = npu4_set_dpm,
>>> +    },
>>> +
>>>   };
>>>     const struct amdxdna_dev_info dev_npu6_info = {
>>> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/ 
>>> amdxdna_accel.h
>>> index 4f15e53a548d..9af9302baf90 100644
>>> --- a/include/uapi/drm/amdxdna_accel.h
>>> +++ b/include/uapi/drm/amdxdna_accel.h
>>> @@ -33,6 +33,7 @@ enum amdxdna_drm_ioctl_id {
>>>       DRM_AMDXDNA_SYNC_BO,
>>>       DRM_AMDXDNA_EXEC_CMD,
>>>       DRM_AMDXDNA_GET_INFO,
>>> +    DRM_AMDXDNA_SET_STATE,
>>>   };
>>>     /**
>>> @@ -375,6 +376,24 @@ struct amdxdna_drm_query_hwctx {
>>>       __u64 errors;
>>>   };
>>>   +enum amdxdna_power_mode_type {
>>> +    POWER_MODE_DEFAULT, /* Fallback to calculated DPM */
>>> +    POWER_MODE_LOW,     /* Set frequency to lowest DPM */
>>> +    POWER_MODE_MEDIUM,  /* Set frequency to medium DPM */
>>> +    POWER_MODE_HIGH,    /* Set frequency to highest DPM */
>>> +    POWER_MODE_TURBO,   /* Maximum power */
>>> +};
>>> +
>>> +/**
>>> + * struct amdxdna_drm_get_power_mode - Get the configured power mode
>>> + * @power_mode: The mode type from enum amdxdna_power_mode_type
>>> + * @pad: MBZ.
>>> + */
>>> +struct amdxdna_drm_get_power_mode {
>>> +    __u8 power_mode;
>>> +    __u8 pad[7];
>>> +};
>>> +
>>>   /**
>>>    * struct amdxdna_drm_query_firmware_version - Query the firmware 
>>> version
>>>    * @major: The major version number
>>> @@ -397,6 +416,7 @@ enum amdxdna_drm_get_param {
>>>       DRM_AMDXDNA_QUERY_SENSORS,
>>>       DRM_AMDXDNA_QUERY_HW_CONTEXTS,
>>>       DRM_AMDXDNA_QUERY_FIRMWARE_VERSION = 8,
>>> +    DRM_AMDXDNA_GET_POWER_MODE,
>>>   };
>>>     /**
>>> @@ -411,6 +431,34 @@ struct amdxdna_drm_get_info {
>>>       __u64 buffer; /* in/out */
>>>   };
>>>   +enum amdxdna_drm_set_param {
>>> +    DRM_AMDXDNA_SET_POWER_MODE,
>>> +    DRM_AMDXDNA_WRITE_AIE_MEM,
>>> +    DRM_AMDXDNA_WRITE_AIE_REG,
>>> +};
>>> +
>>> +/**
>>> + * struct amdxdna_drm_set_state - Set the state of the AIE hardware.
>>> + * @param: Value in enum amdxdna_drm_set_param.
>>> + * @buffer_size: Size of the input param.
>>> + * @buffer: Input param.
>>> + */
>>> +struct amdxdna_drm_set_state {
>>> +    __u32 param; /* in */
>>> +    __u32 buffer_size; /* in */
>>> +    __u64 buffer; /* in */
>>> +};
>>> +
>>> +/**
>>> + * struct amdxdna_drm_set_power_mode - Set the power mode of the AIE 
>>> hardware
>>> + * @power_mode: The sensor type from enum amdxdna_power_mode_type
>>> + * @pad: MBZ.
>>> + */
>>> +struct amdxdna_drm_set_power_mode {
>>> +    __u8 power_mode;
>>> +    __u8 pad[7];
>>> +};
>>> +
>>>   #define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
>>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
>>>            struct amdxdna_drm_create_hwctx)
>>> @@ -443,6 +491,10 @@ struct amdxdna_drm_get_info {
>>>       DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_GET_INFO, \
>>>            struct amdxdna_drm_get_info)
>>>   +#define DRM_IOCTL_AMDXDNA_SET_STATE \
>>> +    DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SET_STATE, \
>>> +         struct amdxdna_drm_set_state)
>>> +
>>>   #if defined(__cplusplus)
>>>   } /* extern c end */
>>>   #endif
>>


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ