[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <88a18f23-04d9-db8c-9c2c-8bf36afa8d63@amd.com>
Date: Thu, 11 Dec 2025 11:06:58 -0800
From: Lizhi Hou <lizhi.hou@....com>
To: Ilpo Järvinen <ilpo.jarvinen@...ux.intel.com>
CC: Hans de Goede <hansg@...nel.org>, <ogabbay@...nel.org>,
<quic_jhugo@...cinc.com>, <maciej.falkowski@...ux.intel.com>, LKML
<linux-kernel@...r.kernel.org>, <max.zhen@....com>, <sonal.santan@....com>,
<mario.limonciello@....com>, <platform-driver-x86@...r.kernel.org>,
<dri-devel@...ts.freedesktop.org>, <Shyam-sundar.S-k@....com>,
<VinitKumar.Shukla@....com>
Subject: Re: [PATCH V2 2/2] accel/amdxdna: Add IOCTL to retrieve realtime NPU
power estimate
On 12/11/25 10:24, Ilpo Järvinen wrote:
> On Thu, 11 Dec 2025, Lizhi Hou wrote:
>
>> The AMD PMF driver provides an interface to obtain realtime power
>> estimates for the NPU. Expose this information to userspace through a
>> new DRM_IOCTL_AMDXDNA_GET_INFO parameter, allowing applications to query
>> the current NPU power level.
>>
>> Signed-off-by: Lizhi Hou <lizhi.hou@....com>
>> ---
>> drivers/accel/amdxdna/aie2_pci.c | 29 +++++++++++++++++++++++++
>> drivers/accel/amdxdna/aie2_pci.h | 18 +++++++++++++++
>> drivers/accel/amdxdna/amdxdna_pci_drv.c | 3 ++-
>> 3 files changed, 49 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
>> index 81a8e4137bfd..4a2c7addcd79 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.c
>> +++ b/drivers/accel/amdxdna/aie2_pci.c
>> @@ -10,6 +10,7 @@
>> #include <drm/drm_managed.h>
>> #include <drm/drm_print.h>
>> #include <drm/gpu_scheduler.h>
>> +#include <linux/amd-pmf-io.h>
>> #include <linux/cleanup.h>
>> #include <linux/errno.h>
>> #include <linux/firmware.h>
>> @@ -777,6 +778,31 @@ static int aie2_get_clock_metadata(struct amdxdna_client *client,
>> return ret;
>> }
>>
>> +static int aie2_get_sensors(struct amdxdna_client *client,
>> + struct amdxdna_drm_get_info *args)
>> +{
>> + struct amdxdna_drm_query_sensor sensor = { 0 };
> = {} is enough to initialize to defaults.
Will fix.
>
>> + int ret;
>> +
>> + if (args->buffer_size < sizeof(sensor))
>> + return -EINVAL;
>> +
>> + ret = AIE2_GET_PMF_NPU_DATA(npu_power, sensor.input);
>> + if (ret)
>> + return ret;
>> + sensor.type = AMDXDNA_SENSOR_TYPE_POWER;
>> + sensor.unitm = -3;
>> + snprintf(sensor.label, sizeof(sensor.label), "Total Power");
>> + snprintf(sensor.units, sizeof(sensor.units), "mW");
> scnprintf() x2
Will fix.
>> +
>> + if (copy_to_user(u64_to_user_ptr(args->buffer), &sensor, sizeof(sensor)))
>> + return -EFAULT;
>> +
>> + args->buffer_size = sizeof(sensor);
>> +
>> + return 0;
>> +}
>> +
>> static int aie2_hwctx_status_cb(struct amdxdna_hwctx *hwctx, void *arg)
>> {
>> struct amdxdna_drm_hwctx_entry *tmp __free(kfree) = NULL;
>> @@ -980,6 +1006,9 @@ static int aie2_get_info(struct amdxdna_client *client, struct amdxdna_drm_get_i
>> case DRM_AMDXDNA_QUERY_CLOCK_METADATA:
>> ret = aie2_get_clock_metadata(client, args);
>> break;
>> + case DRM_AMDXDNA_QUERY_SENSORS:
>> + ret = aie2_get_sensors(client, args);
>> + break;
>> case DRM_AMDXDNA_QUERY_HW_CONTEXTS:
>> ret = aie2_get_hwctx_status(client, args);
>> break;
>> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
>> index c6b5cf4ae5c4..edf6f2e00dea 100644
>> --- a/drivers/accel/amdxdna/aie2_pci.h
>> +++ b/drivers/accel/amdxdna/aie2_pci.h
>> @@ -46,6 +46,24 @@
>> pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
>> })
>>
>> +#if IS_ENABLED(CONFIG_AMD_PMF)
>> +#define AIE2_GET_PMF_NPU_DATA(field, val) \
>> +({ \
>> + struct amd_pmf_npu_metrics _npu_metrics; \
>> + int _ret; \
>> + _ret = amd_pmf_get_npu_data(&_npu_metrics); \
>> + val = _ret ? U32_MAX : _npu_metrics.field; \
> #include needed for U32_MAX.
Will fix.
>
> Unrelated to this patch, this files is also missing include for at least
> SZ_* (so likely works by chance) and types.h.
Thanks for pointing this out. I will fix it in future patch.
>
>> + (_ret); \
>> +})
> Please try to align the backslashed to right so that they don't mix up
> with the code.
>
> Add an empty lines after variables declaration (with the backslash in
> the end obviously) so these almost looks like "normal" coding style.
Will fix.
Thanks,
Lizhi
>
>> +#else
>> +#define SENSOR_DEFAULT_npu_power U32_MAX
>> +#define AIE2_GET_PMF_NPU_DATA(field, val) \
>> +({ \
>> + val = SENSOR_DEFAULT_##field; \
>> + (-EOPNOTSUPP); \
>> +})
>> +#endif
>> +
>> enum aie2_smu_reg_idx {
>> SMU_CMD_REG = 0,
>> SMU_ARG_REG,
>> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> index 1973ab67721b..643ebd387074 100644
>> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
>> @@ -32,9 +32,10 @@ MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin");
>> * 0.4: Support getting resource information
>> * 0.5: Support getting telemetry data
>> * 0.6: Support preemption
>> + * 0.7: Support getting power data
>> */
>> #define AMDXDNA_DRIVER_MAJOR 0
>> -#define AMDXDNA_DRIVER_MINOR 6
>> +#define AMDXDNA_DRIVER_MINOR 7
>>
>> /*
>> * Bind the driver base on (vendor_id, device_id) pair and later use the
>>
Powered by blists - more mailing lists