[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <lr75gtj62yktbguw6iv3fmnpjoxuhrwbdj2wkbp3na6uqobwgz@3hm2cgijeavd>
Date: Thu, 11 Sep 2025 13:54:34 +0100
From: Adrián Larumbe <adrian.larumbe@...labora.com>
To: Steven Price <steven.price@....com>
Cc: linux-kernel@...r.kernel.org, dri-devel@...ts.freedesktop.org,
Boris Brezillon <boris.brezillon@...labora.com>, kernel@...labora.com, Rob Herring <robh@...nel.org>,
Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>, Maxime Ripard <mripard@...nel.org>,
Thomas Zimmermann <tzimmermann@...e.de>, David Airlie <airlied@...il.com>,
Simona Vetter <simona@...ll.ch>
Subject: Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging
job resources
Hi Steven,
On 10.09.2025 16:42, Steven Price wrote:
> On 04/09/2025 01:08, Adrián Larumbe wrote:
> > From: Boris Brezillon <boris.brezillon@...labora.com>
> >
> > A JM context describes GPU HW resourdce allocation for the jobs bound to
> > it. At the time of writing this, it only holds the JS and queue
> > scheduler priorities.
> >
> > When a context is created, all the scheduler entities created for job
> > slots will have the same priority.
> >
> > Until context creation and destruction and attaching a context ID to
> > a job submission are exposed to UM, all jobs shall be bound to the
> > default Panfrost file context, which has medium priority.
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@...labora.com>
> > Signed-off-by: Adrián Larumbe <adrian.larumbe@...labora.com>
> > ---
> > drivers/gpu/drm/panfrost/panfrost_device.h | 4 +-
> > drivers/gpu/drm/panfrost/panfrost_drv.c | 19 +-
> > drivers/gpu/drm/panfrost/panfrost_job.c | 203 +++++++++++++++++----
> > drivers/gpu/drm/panfrost/panfrost_job.h | 25 ++-
> > 4 files changed, 210 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> > index 077525a3ad68..5b164871eb95 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> > @@ -15,6 +15,7 @@
> > #include <drm/gpu_scheduler.h>
> >
> > #include "panfrost_devfreq.h"
> > +#include "panfrost_job.h"
> >
> > struct panfrost_device;
> > struct panfrost_mmu;
> > @@ -22,7 +23,6 @@ struct panfrost_job_slot;
> > struct panfrost_job;
> > struct panfrost_perfcnt;
> >
> > -#define NUM_JOB_SLOTS 3
> > #define MAX_PM_DOMAINS 5
> >
> > enum panfrost_drv_comp_bits {
> > @@ -206,7 +206,7 @@ struct panfrost_engine_usage {
> > struct panfrost_file_priv {
> > struct panfrost_device *pfdev;
> >
> > - struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
> > + struct xarray jm_ctxs;
> >
> > struct panfrost_mmu *mmu;
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index 1ea6c509a5d5..398c067457d9 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -279,6 +279,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> > struct panfrost_file_priv *file_priv = file->driver_priv;
> > struct drm_panfrost_submit *args = data;
> > struct drm_syncobj *sync_out = NULL;
> > + struct panfrost_jm_ctx *jm_ctx;
> > struct panfrost_job *job;
> > int ret = 0, slot;
> >
> > @@ -294,10 +295,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> > return -ENODEV;
> > }
> >
> > + /* TODO: Use the default JM context until ctx management IOCTLs are exposed */
> > + jm_ctx = panfrost_jm_ctx_from_handle(file, 0);
> > + if (!jm_ctx) {
> > + ret = -EINVAL;
> > + goto out_put_syncout;
> > + }
> > +
> > job = kzalloc(sizeof(*job), GFP_KERNEL);
> > if (!job) {
> > ret = -ENOMEM;
> > - goto out_put_syncout;
> > + goto out_put_jm_ctx;
> > }
> >
> > kref_init(&job->refcount);
> > @@ -307,12 +315,13 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> > job->requirements = args->requirements;
> > job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
> > job->mmu = file_priv->mmu;
> > + job->ctx = panfrost_jm_ctx_get(jm_ctx);
> > job->engine_usage = &file_priv->engine_usage;
> >
> > slot = panfrost_job_get_slot(job);
> >
> > ret = drm_sched_job_init(&job->base,
> > - &file_priv->sched_entity[slot],
> > + &jm_ctx->slots[slot].sched_entity,
> > 1, NULL, file->client_id);
> > if (ret)
> > goto out_put_job;
> > @@ -338,6 +347,8 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> > drm_sched_job_cleanup(&job->base);
> > out_put_job:
> > panfrost_job_put(job);
> > +out_put_jm_ctx:
> > + panfrost_jm_ctx_put(jm_ctx);
> > out_put_syncout:
> > if (sync_out)
> > drm_syncobj_put(sync_out);
> > @@ -564,7 +575,7 @@ panfrost_open(struct drm_device *dev, struct drm_file *file)
> > goto err_free;
> > }
> >
> > - ret = panfrost_job_open(panfrost_priv);
> > + ret = panfrost_job_open(file);
> > if (ret)
> > goto err_job;
> >
> > @@ -583,7 +594,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
> > struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> >
> > panfrost_perfcnt_close(file);
> > - panfrost_job_close(panfrost_priv);
> > + panfrost_job_close(file);
> >
> > panfrost_mmu_ctx_put(panfrost_priv->mmu);
> > kfree(panfrost_priv);
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 82acabb21b27..b6853add307c 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -8,6 +8,7 @@
> > #include <linux/platform_device.h>
> > #include <linux/pm_runtime.h>
> > #include <linux/dma-resv.h>
> > +#include <drm/drm_auth.h>
> > #include <drm/gpu_scheduler.h>
> > #include <drm/panfrost_drm.h>
> >
> > @@ -22,6 +23,7 @@
> > #include "panfrost_mmu.h"
> > #include "panfrost_dump.h"
> >
> > +#define MAX_JM_CTX_PER_FILE 128
>
> 128 seems like a very large number, do we have a reason to set this so high?
Maybe I can drop it down to 64, to make sure it remains a power of two.
>
> > #define JOB_TIMEOUT_MS 500
> >
> > #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
> > @@ -222,7 +224,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >
> > /* start MMU, medium priority, cache clean/flush on end, clean/flush on
> > * start */
> > - cfg |= JS_CONFIG_THREAD_PRI(8) |
> > + cfg |= job->ctx->config |
>
> Note that the thread priority on Midgard/Bifrost is pretty pointless. I
> don't believe kbase ever set this to anything other than 8. In theory it
> should balance priority between different slots but "How the priority
> values affect issue rates from different slots is implementation
> dependent" (which I think is wording for 'not implemented'!).
>
> My main concern is that since kbase never used it there's a possibility
> for undiscovered hardware bugs.
In that case, to avoid unexpected behaviour, I'll remove this part in the next revision.
> > JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
> > JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
> > panfrost_get_job_chain_flag(job);
> > @@ -359,6 +361,7 @@ static void panfrost_job_cleanup(struct kref *ref)
> > kvfree(job->bos);
> > }
> >
> > + panfrost_jm_ctx_put(job->ctx);
> > kfree(job);
> > }
> >
> > @@ -917,39 +920,184 @@ void panfrost_job_fini(struct panfrost_device *pfdev)
> > destroy_workqueue(pfdev->reset.wq);
> > }
> >
> > -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
> > +int panfrost_job_open(struct drm_file *file)
> > +{
> > + struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> > + int ret;
> > +
> > + struct drm_panfrost_jm_ctx_create default_jm_ctx = {
> > + .priority = PANFROST_JM_CTX_PRIORITY_MEDIUM,
> > + };
> > +
> > + xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC);
> > +
> > + ret = panfrost_jm_ctx_create(file, &default_jm_ctx);
> > + if (ret)
> > + return ret;
> > +
> > + /* We expect the default context to be assigned handle 0. */
> > + if (WARN_ON(default_jm_ctx.handle))
> > + return -EINVAL;
> > +
> > + return 0;
> > +}
> > +
> > +void panfrost_job_close(struct drm_file *file)
> > +{
> > + struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> > + struct panfrost_jm_ctx *jm_ctx;
> > + unsigned long i;
> > +
> > + xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx)
> > + panfrost_jm_ctx_destroy(file, i);
> > +
> > + xa_destroy(&panfrost_priv->jm_ctxs);
> > +}
> > +
> > +int panfrost_job_is_idle(struct panfrost_device *pfdev)
> > {
> > - struct panfrost_device *pfdev = panfrost_priv->pfdev;
> > struct panfrost_job_slot *js = pfdev->js;
> > - struct drm_gpu_scheduler *sched;
> > - int ret, i;
> > + int i;
> >
> > for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > - sched = &js->queue[i].sched;
> > - ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
> > - DRM_SCHED_PRIORITY_NORMAL, &sched,
> > - 1, NULL);
> > - if (WARN_ON(ret))
> > - return ret;
> > + /* If there are any jobs in the HW queue, we're not idle */
> > + if (atomic_read(&js->queue[i].sched.credit_count))
> > + return false;
> > }
> > +
> > + return true;
> > +}
> > +
> > +static void panfrost_jm_ctx_release(struct kref *kref)
> > +{
> > + struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt);
> > +
> > + kfree(jm_ctx);
> > +}
> > +
> > +void
> > +panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx)
> > +{
> > + if (jm_ctx)
> > + kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release);
> > +}
> > +
> > +struct panfrost_jm_ctx *
> > +panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx)
> > +{
> > + if (jm_ctx)
> > + kref_get(&jm_ctx->refcnt);
> > +
> > + return jm_ctx;
> > +}
> > +
> > +struct panfrost_jm_ctx *
> > +panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle)
> > +{
> > + struct panfrost_file_priv *priv = file->driver_priv;
> > + struct panfrost_jm_ctx *jm_ctx;
> > +
> > + xa_lock(&priv->jm_ctxs);
> > + jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle));
> > + xa_unlock(&priv->jm_ctxs);
> > +
> > + return jm_ctx;
> > +}
> > +
> > +static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file,
> > + enum drm_panfrost_jm_ctx_priority in,
> > + enum drm_sched_priority *out)
> > +{
> > + switch (in) {
> > + case PANFROST_JM_CTX_PRIORITY_LOW:
> > + *out = DRM_SCHED_PRIORITY_LOW;
> > + return 0;
> > + case PANFROST_JM_CTX_PRIORITY_MEDIUM:
> > + *out = DRM_SCHED_PRIORITY_NORMAL;
> > + return 0;
> > + case PANFROST_JM_CTX_PRIORITY_HIGH:
> > + /* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
> > + if (!capable(CAP_SYS_NICE) && !drm_is_current_master(file))
> > + return -EACCES;
> > +
> > + *out = DRM_SCHED_PRIORITY_HIGH;
> > + return 0;
> > + default:
> > + return -EINVAL;
> > + }
> > +}
> > +
> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > + struct drm_panfrost_jm_ctx_create *args)
> > +{
> > + struct panfrost_file_priv *priv = file->driver_priv;
> > + struct panfrost_device *pfdev = priv->pfdev;
> > + enum drm_sched_priority sched_prio;
> > + struct panfrost_jm_ctx *jm_ctx;
> > +
> > + int ret;
> > +
> > + jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> > + if (!jm_ctx)
> > + return -ENOMEM;
> > +
> > + kref_init(&jm_ctx->refcnt);
> > +
> > + /* Same priority for all JS within a single context */
> > + jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> > +
> > + ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> > + if (ret)
> > + goto err_put_jm_ctx;
> > +
> > + for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> > + struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> > + struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> > +
> > + ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> > + &sched, 1, NULL);
> > + if (ret)
> > + goto err_put_jm_ctx;
> > +
> > + js_ctx->enabled = true;
> > + }
> > +
> > + ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> > + XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> > + if (ret)
> > + goto err_put_jm_ctx;
>
> On error here we just jump down and call panfrost_jm_ctx_put() which
> will free jm_ctx but won't destroy any of the drm_sched_entities. There
> seems to be something a bit off with the lifetime management here.
>
> Should panfrost_jm_ctx_release() be responsible for tearing down the
> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
> reference?
>
> Steve
>
> > +
> > return 0;
> > +
> > +err_put_jm_ctx:
> > + panfrost_jm_ctx_put(jm_ctx);
> > + return ret;
> > }
> >
> > -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> > +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle)
> > {
> > - struct panfrost_device *pfdev = panfrost_priv->pfdev;
> > - int i;
> > + struct panfrost_file_priv *priv = file->driver_priv;
> > + struct panfrost_device *pfdev = priv->pfdev;
> > + struct panfrost_jm_ctx *jm_ctx;
> >
> > - for (i = 0; i < NUM_JOB_SLOTS; i++)
> > - drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
> > + jm_ctx = xa_erase(&priv->jm_ctxs, handle);
> > + if (!jm_ctx)
> > + return -EINVAL;
> > +
> > + for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> > + if (jm_ctx->slots[i].enabled)
> > + drm_sched_entity_destroy(&jm_ctx->slots[i].sched_entity);
> > + }
> >
> > /* Kill in-flight jobs */
> > spin_lock(&pfdev->js->job_lock);
> > - for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > - struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
> > - int j;
> > + for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> > + struct drm_sched_entity *entity = &jm_ctx->slots[i].sched_entity;
> > +
> > + if (!jm_ctx->slots[i].enabled)
> > + continue;
> >
> > - for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> > + for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> > struct panfrost_job *job = pfdev->jobs[i][j];
> > u32 cmd;
> >
> > @@ -980,18 +1128,7 @@ void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> > }
> > }
> > spin_unlock(&pfdev->js->job_lock);
> > -}
> >
> > -int panfrost_job_is_idle(struct panfrost_device *pfdev)
> > -{
> > - struct panfrost_job_slot *js = pfdev->js;
> > - int i;
> > -
> > - for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > - /* If there are any jobs in the HW queue, we're not idle */
> > - if (atomic_read(&js->queue[i].sched.credit_count))
> > - return false;
> > - }
> > -
> > - return true;
> > + panfrost_jm_ctx_put(jm_ctx);
> > + return 0;
> > }
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> > index ec581b97852b..f3e5010e6cf5 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> > @@ -18,6 +18,7 @@ struct panfrost_job {
> >
> > struct panfrost_device *pfdev;
> > struct panfrost_mmu *mmu;
> > + struct panfrost_jm_ctx *ctx;
> >
> > /* Fence to be signaled by IRQ handler when the job is complete. */
> > struct dma_fence *done_fence;
> > @@ -39,10 +40,30 @@ struct panfrost_job {
> > u64 start_cycles;
> > };
> >
> > +struct panfrost_js_ctx {
> > + struct drm_sched_entity sched_entity;
> > + bool enabled;
> > +};
> > +
> > +#define NUM_JOB_SLOTS 3
> > +
> > +struct panfrost_jm_ctx {
> > + struct kref refcnt;
> > + u32 config;
> > + struct panfrost_js_ctx slots[NUM_JOB_SLOTS];
> > +};
> > +
> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > + struct drm_panfrost_jm_ctx_create *args);
> > +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle);
> > +void panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx);
> > +struct panfrost_jm_ctx *panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx);
> > +struct panfrost_jm_ctx *panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle);
> > +
> > int panfrost_job_init(struct panfrost_device *pfdev);
> > void panfrost_job_fini(struct panfrost_device *pfdev);
> > -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
> > -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
> > +int panfrost_job_open(struct drm_file *file);
> > +void panfrost_job_close(struct drm_file *file);
> > int panfrost_job_get_slot(struct panfrost_job *job);
> > int panfrost_job_push(struct panfrost_job *job);
> > void panfrost_job_put(struct panfrost_job *job);
Adrian Larumbe
Powered by blists - more mailing lists