[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <72bba6ba-1d69-44fb-bac5-edd7f383b66f@intel.com>
Date: Thu, 19 Jun 2025 11:24:34 +0530
From: "Nilawar, Badal" <badal.nilawar@...el.com>
To: Daniele Ceraolo Spurio <daniele.ceraolospurio@...el.com>,
<intel-xe@...ts.freedesktop.org>, <dri-devel@...ts.freedesktop.org>,
<linux-kernel@...r.kernel.org>
CC: <anshuman.gupta@...el.com>, <rodrigo.vivi@...el.com>,
<alexander.usyskin@...el.com>, <gregkh@...uxfoundation.org>, <jgg@...dia.com>
Subject: Re: [PATCH v3 05/10] drm/xe/xe_late_bind_fw: Load late binding
firmware
On 19-06-2025 02:27, Daniele Ceraolo Spurio wrote:
>
>
> On 6/18/2025 12:00 PM, Badal Nilawar wrote:
>> Load late binding firmware
>>
>> v2:
>> - s/EAGAIN/EBUSY/
>> - Flush worker in suspend and driver unload (Daniele)
>> v3:
>> - Use retry interval of 6s, in steps of 200ms, to allow
>> other OS components release MEI CL handle (Sasha)
>>
>> Signed-off-by: Badal Nilawar <badal.nilawar@...el.com>
>> ---
>> drivers/gpu/drm/xe/xe_late_bind_fw.c | 113 ++++++++++++++++++++-
>> drivers/gpu/drm/xe/xe_late_bind_fw.h | 1 +
>> drivers/gpu/drm/xe/xe_late_bind_fw_types.h | 5 +
>> 3 files changed, 118 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> b/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> index d416d6cc1fa2..54aa08c6bdfd 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
>> @@ -16,6 +16,20 @@
>> #include "xe_late_bind_fw.h"
>> #include "xe_pcode.h"
>> #include "xe_pcode_api.h"
>> +#include "xe_pm.h"
>> +
>> +/*
>> + * The component should load quite quickly in most cases, but it
>> could take
>> + * a bit. Using a very big timeout just to cover the worst case
>> scenario
>> + */
>> +#define LB_INIT_TIMEOUT_MS 20000
>> +
>> +/*
>> + * Retry interval set to 6 seconds, in steps of 200 ms, to allow
>> time for
>> + * other OS components to release the MEI CL handle
>> + */
>> +#define LB_FW_LOAD_RETRY_MAXCOUNT 30
>> +#define LB_FW_LOAD_RETRY_PAUSE_MS 200
>> static const u32 fw_id_to_type[] = {
>> [FAN_CONTROL_FW_ID] = CSC_LATE_BINDING_TYPE_FAN_CONTROL,
>> @@ -44,6 +58,85 @@ static int xe_late_bind_fw_num_fans(struct
>> xe_late_bind *late_bind)
>> return 0;
>> }
>> +static void xe_late_bind_wait_for_worker_completion(struct
>> xe_late_bind *late_bind)
>> +{
>> + struct xe_device *xe = late_bind_to_xe(late_bind);
>> + struct xe_late_bind_fw *lbfw;
>> + int fw_id;
>> +
>> + for (fw_id = 0; fw_id < MAX_FW_ID; fw_id++) {
>> + lbfw = &late_bind->late_bind_fw[fw_id];
>> + if (lbfw->valid && late_bind->wq) {
>> + drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
>> + fw_id_to_name[lbfw->id]);
>> + flush_work(&lbfw->work);
>
> incorrect indent here for flush_work
>
>> + }
>> + }
>> +}
>> +
>> +static void late_bind_work(struct work_struct *work)
>> +{
>> + struct xe_late_bind_fw *lbfw = container_of(work, struct
>> xe_late_bind_fw, work);
>> + struct xe_late_bind *late_bind = container_of(lbfw, struct
>> xe_late_bind,
>> + late_bind_fw[lbfw->id]);
>> + struct xe_device *xe = late_bind_to_xe(late_bind);
>> + int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
>> + int ret;
>> + int slept;
>> +
>> + /* we can queue this before the component is bound */
>> + for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
>> + if (late_bind->component.ops)
>> + break;
>> + msleep(100);
>> + }
>> +
>> + xe_pm_runtime_get(xe);
>> + mutex_lock(&late_bind->mutex);
>> +
>> + if (!late_bind->component.ops) {
>> + drm_err(&xe->drm, "Late bind component not bound\n");
>> + goto out;
>> + }
>> +
>> + drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]);
>> +
>> + do {
>> + ret =
>> late_bind->component.ops->push_config(late_bind->component.mei_dev,
>> + lbfw->type, lbfw->flags,
>> + lbfw->payload, lbfw->payload_size);
>> + if (!ret)
>> + break;
>> + msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
>> + } while (--retry && ret == -EBUSY);
>> +
>> + if (ret)
>> + drm_err(&xe->drm, "Load %s firmware failed with err %d\n",
>> + fw_id_to_name[lbfw->id], ret);
>> + else
>> + drm_dbg(&xe->drm, "Load %s firmware successful\n",
>> + fw_id_to_name[lbfw->id]);
>> +out:
>> + mutex_unlock(&late_bind->mutex);
>> + xe_pm_runtime_put(xe);
>> +}
>> +
>> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
>> +{
>> + struct xe_late_bind_fw *lbfw;
>> + int fw_id;
>> +
>> + if (!late_bind->component_added)
>> + return -EINVAL;
>
> -ENODEV instead? This should only happen if we don't support late_bind
> or if the component was not built.
Sure.
>
>> +
>> + for (fw_id = 0; fw_id < MAX_FW_ID; fw_id++) {
>> + lbfw = &late_bind->late_bind_fw[fw_id];
>> + if (lbfw->valid)
>> + queue_work(late_bind->wq, &lbfw->work);
>> + }
>> + return 0;
>> +}
>> +
>> static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind,
>> u32 fw_id)
>> {
>> struct xe_device *xe = late_bind_to_xe(late_bind);
>> @@ -93,6 +186,7 @@ static int __xe_late_bind_fw_init(struct
>> xe_late_bind *late_bind, u32 fw_id)
>> memcpy(lb_fw->payload, fw->data, lb_fw->payload_size);
>> release_firmware(fw);
>> + INIT_WORK(&lb_fw->work, late_bind_work);
>> lb_fw->valid = true;
>> return 0;
>> @@ -103,11 +197,16 @@ static int xe_late_bind_fw_init(struct
>> xe_late_bind *late_bind)
>> int ret;
>> int fw_id;
>> + late_bind->wq =
>> alloc_ordered_workqueue("late-bind-ordered-wq", 0);
>> + if (!late_bind->wq)
>> + return -ENOMEM;
>> +
>> for (fw_id = 0; fw_id < MAX_FW_ID; fw_id++) {
>> ret = __xe_late_bind_fw_init(late_bind, fw_id);
>> if (ret)
>> return ret;
>> }
>> +
>
> nit: this newline could be moved to the patch that adds this code.
>
>> return ret;
>> }
>> @@ -131,6 +230,8 @@ static void
>> xe_late_bind_component_unbind(struct device *xe_kdev,
>> struct xe_device *xe = kdev_to_xe_device(xe_kdev);
>> struct xe_late_bind *late_bind = &xe->late_bind;
>> + xe_late_bind_wait_for_worker_completion(late_bind);
>
> I don't see this called for full suspend (not rpm), even if follow up
> patches. Am I just not seeing it, or is it missing?
I missed this, I will address in follow up patch.
Badal
>
> Daniele
>
>> +
>> mutex_lock(&late_bind->mutex);
>> late_bind->component.ops = NULL;
>> mutex_unlock(&late_bind->mutex);
>> @@ -146,8 +247,14 @@ static void xe_late_bind_remove(void *arg)
>> struct xe_late_bind *late_bind = arg;
>> struct xe_device *xe = late_bind_to_xe(late_bind);
>> + xe_late_bind_wait_for_worker_completion(late_bind);
>> +
>> component_del(xe->drm.dev, &xe_late_bind_component_ops);
>> late_bind->component_added = false;
>> + if (late_bind->wq) {
>> + destroy_workqueue(late_bind->wq);
>> + late_bind->wq = NULL;
>> + }
>> mutex_destroy(&late_bind->mutex);
>> }
>> @@ -184,5 +291,9 @@ int xe_late_bind_init(struct xe_late_bind
>> *late_bind)
>> if (err)
>> return err;
>> - return xe_late_bind_fw_init(late_bind);
>> + err = xe_late_bind_fw_init(late_bind);
>> + if (err)
>> + return err;
>> +
>> + return xe_late_bind_fw_load(late_bind);
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> b/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> index 4c73571c3e62..28d56ed2bfdc 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
>> @@ -11,5 +11,6 @@
>> struct xe_late_bind;
>> int xe_late_bind_init(struct xe_late_bind *late_bind);
>> +int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
>> #endif
>> diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> index c6fd33fd5484..d256f53d59e6 100644
>> --- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> +++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
>> @@ -9,6 +9,7 @@
>> #include <linux/iosys-map.h>
>> #include <linux/mutex.h>
>> #include <linux/types.h>
>> +#include <linux/workqueue.h>
>> #define MAX_PAYLOAD_SIZE (1024 * 4)
>> @@ -38,6 +39,8 @@ struct xe_late_bind_fw {
>> u8 payload[MAX_PAYLOAD_SIZE];
>> /** @late_bind_fw.payload_size: late binding blob payload_size */
>> size_t payload_size;
>> + /** @late_bind_fw.work: worker to upload latebind blob */
>> + struct work_struct work;
>> };
>> /**
>> @@ -66,6 +69,8 @@ struct xe_late_bind {
>> struct mutex mutex;
>> /** @late_bind.late_bind_fw: late binding firmware array */
>> struct xe_late_bind_fw late_bind_fw[MAX_FW_ID];
>> + /** @late_bind.wq: workqueue to submit request to download late
>> bind blob */
>> + struct workqueue_struct *wq;
>> };
>> #endif
>
Powered by blists - more mailing lists