linux-kernel - Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging job resources

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <lr75gtj62yktbguw6iv3fmnpjoxuhrwbdj2wkbp3na6uqobwgz@3hm2cgijeavd>
Date: Thu, 11 Sep 2025 13:54:34 +0100
From: Adrián Larumbe <adrian.larumbe@...labora.com>
To: Steven Price <steven.price@....com>
Cc: linux-kernel@...r.kernel.org, dri-devel@...ts.freedesktop.org, 
	Boris Brezillon <boris.brezillon@...labora.com>, kernel@...labora.com, Rob Herring <robh@...nel.org>, 
	Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>, Maxime Ripard <mripard@...nel.org>, 
	Thomas Zimmermann <tzimmermann@...e.de>, David Airlie <airlied@...il.com>, 
	Simona Vetter <simona@...ll.ch>
Subject: Re: [PATCH v2 2/4] drm/panfrost: Introduce JM contexts for manging
 job resources

Hi Steven,

On 10.09.2025 16:42, Steven Price wrote:
> On 04/09/2025 01:08, Adrián Larumbe wrote:
> > From: Boris Brezillon <boris.brezillon@...labora.com>
> >
> > A JM context describes GPU HW resourdce allocation for the jobs bound to
> > it. At the time of writing this, it only holds the JS and queue
> > scheduler priorities.
> >
> > When a context is created, all the scheduler entities created for job
> > slots will have the same priority.
> >
> > Until context creation and destruction and attaching a context ID to
> > a job submission are exposed to UM, all jobs shall be bound to the
> > default Panfrost file context, which has medium priority.
> >
> > Signed-off-by: Boris Brezillon <boris.brezillon@...labora.com>
> > Signed-off-by: Adrián Larumbe <adrian.larumbe@...labora.com>
> > ---
> >  drivers/gpu/drm/panfrost/panfrost_device.h |   4 +-
> >  drivers/gpu/drm/panfrost/panfrost_drv.c    |  19 +-
> >  drivers/gpu/drm/panfrost/panfrost_job.c    | 203 +++++++++++++++++----
> >  drivers/gpu/drm/panfrost/panfrost_job.h    |  25 ++-
> >  4 files changed, 210 insertions(+), 41 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> > index 077525a3ad68..5b164871eb95 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> > @@ -15,6 +15,7 @@
> >  #include <drm/gpu_scheduler.h>
> >
> >  #include "panfrost_devfreq.h"
> > +#include "panfrost_job.h"
> >
> >  struct panfrost_device;
> >  struct panfrost_mmu;
> > @@ -22,7 +23,6 @@ struct panfrost_job_slot;
> >  struct panfrost_job;
> >  struct panfrost_perfcnt;
> >
> > -#define NUM_JOB_SLOTS 3
> >  #define MAX_PM_DOMAINS 5
> >
> >  enum panfrost_drv_comp_bits {
> > @@ -206,7 +206,7 @@ struct panfrost_engine_usage {
> >  struct panfrost_file_priv {
> >  	struct panfrost_device *pfdev;
> >
> > -	struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
> > +	struct xarray jm_ctxs;
> >
> >  	struct panfrost_mmu *mmu;
> >
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > index 1ea6c509a5d5..398c067457d9 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
> > @@ -279,6 +279,7 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  	struct panfrost_file_priv *file_priv = file->driver_priv;
> >  	struct drm_panfrost_submit *args = data;
> >  	struct drm_syncobj *sync_out = NULL;
> > +	struct panfrost_jm_ctx *jm_ctx;
> >  	struct panfrost_job *job;
> >  	int ret = 0, slot;
> >
> > @@ -294,10 +295,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  			return -ENODEV;
> >  	}
> >
> > +	/* TODO: Use the default JM context until ctx management IOCTLs are exposed */
> > +	jm_ctx = panfrost_jm_ctx_from_handle(file, 0);
> > +	if (!jm_ctx) {
> > +		ret = -EINVAL;
> > +		goto out_put_syncout;
> > +	}
> > +
> >  	job = kzalloc(sizeof(*job), GFP_KERNEL);
> >  	if (!job) {
> >  		ret = -ENOMEM;
> > -		goto out_put_syncout;
> > +		goto out_put_jm_ctx;
> >  	}
> >
> >  	kref_init(&job->refcount);
> > @@ -307,12 +315,13 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  	job->requirements = args->requirements;
> >  	job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
> >  	job->mmu = file_priv->mmu;
> > +	job->ctx = panfrost_jm_ctx_get(jm_ctx);
> >  	job->engine_usage = &file_priv->engine_usage;
> >
> >  	slot = panfrost_job_get_slot(job);
> >
> >  	ret = drm_sched_job_init(&job->base,
> > -				 &file_priv->sched_entity[slot],
> > +				 &jm_ctx->slots[slot].sched_entity,
> >  				 1, NULL, file->client_id);
> >  	if (ret)
> >  		goto out_put_job;
> > @@ -338,6 +347,8 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data,
> >  		drm_sched_job_cleanup(&job->base);
> >  out_put_job:
> >  	panfrost_job_put(job);
> > +out_put_jm_ctx:
> > +	panfrost_jm_ctx_put(jm_ctx);
> >  out_put_syncout:
> >  	if (sync_out)
> >  		drm_syncobj_put(sync_out);
> > @@ -564,7 +575,7 @@ panfrost_open(struct drm_device *dev, struct drm_file *file)
> >  		goto err_free;
> >  	}
> >
> > -	ret = panfrost_job_open(panfrost_priv);
> > +	ret = panfrost_job_open(file);
> >  	if (ret)
> >  		goto err_job;
> >
> > @@ -583,7 +594,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
> >  	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> >
> >  	panfrost_perfcnt_close(file);
> > -	panfrost_job_close(panfrost_priv);
> > +	panfrost_job_close(file);
> >
> >  	panfrost_mmu_ctx_put(panfrost_priv->mmu);
> >  	kfree(panfrost_priv);
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> > index 82acabb21b27..b6853add307c 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> > @@ -8,6 +8,7 @@
> >  #include <linux/platform_device.h>
> >  #include <linux/pm_runtime.h>
> >  #include <linux/dma-resv.h>
> > +#include <drm/drm_auth.h>
> >  #include <drm/gpu_scheduler.h>
> >  #include <drm/panfrost_drm.h>
> >
> > @@ -22,6 +23,7 @@
> >  #include "panfrost_mmu.h"
> >  #include "panfrost_dump.h"
> >
> > +#define MAX_JM_CTX_PER_FILE 128
>
> 128 seems like a very large number, do we have a reason to set this so high?

Maybe I can drop it down to 64, to make sure it remains a power of two.

>
> >  #define JOB_TIMEOUT_MS 500
> >
> >  #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
> > @@ -222,7 +224,7 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
> >
> >  	/* start MMU, medium priority, cache clean/flush on end, clean/flush on
> >  	 * start */
> > -	cfg |= JS_CONFIG_THREAD_PRI(8) |
> > +	cfg |= job->ctx->config |
>
> Note that the thread priority on Midgard/Bifrost is pretty pointless. I
> don't believe kbase ever set this to anything other than 8. In theory it
> should balance priority between different slots but "How the priority
> values affect issue rates from different slots is implementation
> dependent" (which I think is wording for 'not implemented'!).
>
> My main concern is that since kbase never used it there's a possibility
> for undiscovered hardware bugs.

In that case, to avoid unexpected behaviour, I'll remove this part in the next revision.

> >  		JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
> >  		JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE |
> >  		panfrost_get_job_chain_flag(job);
> > @@ -359,6 +361,7 @@ static void panfrost_job_cleanup(struct kref *ref)
> >  		kvfree(job->bos);
> >  	}
> >
> > +	panfrost_jm_ctx_put(job->ctx);
> >  	kfree(job);
> >  }
> >
> > @@ -917,39 +920,184 @@ void panfrost_job_fini(struct panfrost_device *pfdev)
> >  	destroy_workqueue(pfdev->reset.wq);
> >  }
> >
> > -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
> > +int panfrost_job_open(struct drm_file *file)
> > +{
> > +	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> > +	int ret;
> > +
> > +	struct drm_panfrost_jm_ctx_create default_jm_ctx = {
> > +		.priority = PANFROST_JM_CTX_PRIORITY_MEDIUM,
> > +	};
> > +
> > +	xa_init_flags(&panfrost_priv->jm_ctxs, XA_FLAGS_ALLOC);
> > +
> > +	ret = panfrost_jm_ctx_create(file, &default_jm_ctx);
> > +	if (ret)
> > +		return ret;
> > +
> > +	/* We expect the default context to be assigned handle 0. */
> > +	if (WARN_ON(default_jm_ctx.handle))
> > +		return -EINVAL;
> > +
> > +	return 0;
> > +}
> > +
> > +void panfrost_job_close(struct drm_file *file)
> > +{
> > +	struct panfrost_file_priv *panfrost_priv = file->driver_priv;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +	unsigned long i;
> > +
> > +	xa_for_each(&panfrost_priv->jm_ctxs, i, jm_ctx)
> > +		panfrost_jm_ctx_destroy(file, i);
> > +
> > +	xa_destroy(&panfrost_priv->jm_ctxs);
> > +}
> > +
> > +int panfrost_job_is_idle(struct panfrost_device *pfdev)
> >  {
> > -	struct panfrost_device *pfdev = panfrost_priv->pfdev;
> >  	struct panfrost_job_slot *js = pfdev->js;
> > -	struct drm_gpu_scheduler *sched;
> > -	int ret, i;
> > +	int i;
> >
> >  	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > -		sched = &js->queue[i].sched;
> > -		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
> > -					    DRM_SCHED_PRIORITY_NORMAL, &sched,
> > -					    1, NULL);
> > -		if (WARN_ON(ret))
> > -			return ret;
> > +		/* If there are any jobs in the HW queue, we're not idle */
> > +		if (atomic_read(&js->queue[i].sched.credit_count))
> > +			return false;
> >  	}
> > +
> > +	return true;
> > +}
> > +
> > +static void panfrost_jm_ctx_release(struct kref *kref)
> > +{
> > +	struct panfrost_jm_ctx *jm_ctx = container_of(kref, struct panfrost_jm_ctx, refcnt);
> > +
> > +	kfree(jm_ctx);
> > +}
> > +
> > +void
> > +panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx)
> > +{
> > +	if (jm_ctx)
> > +		kref_put(&jm_ctx->refcnt, panfrost_jm_ctx_release);
> > +}
> > +
> > +struct panfrost_jm_ctx *
> > +panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx)
> > +{
> > +	if (jm_ctx)
> > +		kref_get(&jm_ctx->refcnt);
> > +
> > +	return jm_ctx;
> > +}
> > +
> > +struct panfrost_jm_ctx *
> > +panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle)
> > +{
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +
> > +	xa_lock(&priv->jm_ctxs);
> > +	jm_ctx = panfrost_jm_ctx_get(xa_load(&priv->jm_ctxs, handle));
> > +	xa_unlock(&priv->jm_ctxs);
> > +
> > +	return jm_ctx;
> > +}
> > +
> > +static int jm_ctx_prio_to_drm_sched_prio(struct drm_file *file,
> > +					 enum drm_panfrost_jm_ctx_priority in,
> > +					 enum drm_sched_priority *out)
> > +{
> > +	switch (in) {
> > +	case PANFROST_JM_CTX_PRIORITY_LOW:
> > +		*out = DRM_SCHED_PRIORITY_LOW;
> > +		return 0;
> > +	case PANFROST_JM_CTX_PRIORITY_MEDIUM:
> > +		*out = DRM_SCHED_PRIORITY_NORMAL;
> > +		return 0;
> > +	case PANFROST_JM_CTX_PRIORITY_HIGH:
> > +		/* Higher priorities require CAP_SYS_NICE or DRM_MASTER */
> > +		if (!capable(CAP_SYS_NICE) && !drm_is_current_master(file))
> > +			return -EACCES;
> > +
> > +		*out = DRM_SCHED_PRIORITY_HIGH;
> > +		return 0;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +}
> > +
> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > +			   struct drm_panfrost_jm_ctx_create *args)
> > +{
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_device *pfdev = priv->pfdev;
> > +	enum drm_sched_priority sched_prio;
> > +	struct panfrost_jm_ctx *jm_ctx;
> > +
> > +	int ret;
> > +
> > +	jm_ctx = kzalloc(sizeof(*jm_ctx), GFP_KERNEL);
> > +	if (!jm_ctx)
> > +		return -ENOMEM;
> > +
> > +	kref_init(&jm_ctx->refcnt);
> > +
> > +	/* Same priority for all JS within a single context */
> > +	jm_ctx->config = JS_CONFIG_THREAD_PRI(args->priority);
> > +
> > +	ret = jm_ctx_prio_to_drm_sched_prio(file, args->priority, &sched_prio);
> > +	if (ret)
> > +		goto err_put_jm_ctx;
> > +
> > +	for (u32 i = 0; i < NUM_JOB_SLOTS - 1; i++) {
> > +		struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
> > +		struct panfrost_js_ctx *js_ctx = &jm_ctx->slots[i];
> > +
> > +		ret = drm_sched_entity_init(&js_ctx->sched_entity, sched_prio,
> > +					    &sched, 1, NULL);
> > +		if (ret)
> > +			goto err_put_jm_ctx;
> > +
> > +		js_ctx->enabled = true;
> > +	}
> > +
> > +	ret = xa_alloc(&priv->jm_ctxs, &args->handle, jm_ctx,
> > +		       XA_LIMIT(0, MAX_JM_CTX_PER_FILE), GFP_KERNEL);
> > +	if (ret)
> > +		goto err_put_jm_ctx;
>
> On error here we just jump down and call panfrost_jm_ctx_put() which
> will free jm_ctx but won't destroy any of the drm_sched_entities. There
> seems to be something a bit off with the lifetime management here.
>
> Should panfrost_jm_ctx_release() be responsible for tearing down the
> context, and panfrost_jm_ctx_destroy() be nothing more than dropping the
> reference?
>
> Steve
>
> > +
> >  	return 0;
> > +
> > +err_put_jm_ctx:
> > +	panfrost_jm_ctx_put(jm_ctx);
> > +	return ret;
> >  }
> >
> > -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> > +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle)
> >  {
> > -	struct panfrost_device *pfdev = panfrost_priv->pfdev;
> > -	int i;
> > +	struct panfrost_file_priv *priv = file->driver_priv;
> > +	struct panfrost_device *pfdev = priv->pfdev;
> > +	struct panfrost_jm_ctx *jm_ctx;
> >
> > -	for (i = 0; i < NUM_JOB_SLOTS; i++)
> > -		drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
> > +	jm_ctx = xa_erase(&priv->jm_ctxs, handle);
> > +	if (!jm_ctx)
> > +		return -EINVAL;
> > +
> > +	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> > +		if (jm_ctx->slots[i].enabled)
> > +			drm_sched_entity_destroy(&jm_ctx->slots[i].sched_entity);
> > +	}
> >
> >  	/* Kill in-flight jobs */
> >  	spin_lock(&pfdev->js->job_lock);
> > -	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > -		struct drm_sched_entity *entity = &panfrost_priv->sched_entity[i];
> > -		int j;
> > +	for (u32 i = 0; i < ARRAY_SIZE(jm_ctx->slots); i++) {
> > +		struct drm_sched_entity *entity = &jm_ctx->slots[i].sched_entity;
> > +
> > +		if (!jm_ctx->slots[i].enabled)
> > +			continue;
> >
> > -		for (j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> > +		for (int j = ARRAY_SIZE(pfdev->jobs[0]) - 1; j >= 0; j--) {
> >  			struct panfrost_job *job = pfdev->jobs[i][j];
> >  			u32 cmd;
> >
> > @@ -980,18 +1128,7 @@ void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
> >  		}
> >  	}
> >  	spin_unlock(&pfdev->js->job_lock);
> > -}
> >
> > -int panfrost_job_is_idle(struct panfrost_device *pfdev)
> > -{
> > -	struct panfrost_job_slot *js = pfdev->js;
> > -	int i;
> > -
> > -	for (i = 0; i < NUM_JOB_SLOTS; i++) {
> > -		/* If there are any jobs in the HW queue, we're not idle */
> > -		if (atomic_read(&js->queue[i].sched.credit_count))
> > -			return false;
> > -	}
> > -
> > -	return true;
> > +	panfrost_jm_ctx_put(jm_ctx);
> > +	return 0;
> >  }
> > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h
> > index ec581b97852b..f3e5010e6cf5 100644
> > --- a/drivers/gpu/drm/panfrost/panfrost_job.h
> > +++ b/drivers/gpu/drm/panfrost/panfrost_job.h
> > @@ -18,6 +18,7 @@ struct panfrost_job {
> >
> >  	struct panfrost_device *pfdev;
> >  	struct panfrost_mmu *mmu;
> > +	struct panfrost_jm_ctx *ctx;
> >
> >  	/* Fence to be signaled by IRQ handler when the job is complete. */
> >  	struct dma_fence *done_fence;
> > @@ -39,10 +40,30 @@ struct panfrost_job {
> >  	u64 start_cycles;
> >  };
> >
> > +struct panfrost_js_ctx {
> > +	struct drm_sched_entity sched_entity;
> > +	bool enabled;
> > +};
> > +
> > +#define NUM_JOB_SLOTS 3
> > +
> > +struct panfrost_jm_ctx {
> > +	struct kref refcnt;
> > +	u32 config;
> > +	struct panfrost_js_ctx slots[NUM_JOB_SLOTS];
> > +};
> > +
> > +int panfrost_jm_ctx_create(struct drm_file *file,
> > +			   struct drm_panfrost_jm_ctx_create *args);
> > +int panfrost_jm_ctx_destroy(struct drm_file *file, u32 handle);
> > +void panfrost_jm_ctx_put(struct panfrost_jm_ctx *jm_ctx);
> > +struct panfrost_jm_ctx *panfrost_jm_ctx_get(struct panfrost_jm_ctx *jm_ctx);
> > +struct panfrost_jm_ctx *panfrost_jm_ctx_from_handle(struct drm_file *file, u32 handle);
> > +
> >  int panfrost_job_init(struct panfrost_device *pfdev);
> >  void panfrost_job_fini(struct panfrost_device *pfdev);
> > -int panfrost_job_open(struct panfrost_file_priv *panfrost_priv);
> > -void panfrost_job_close(struct panfrost_file_priv *panfrost_priv);
> > +int panfrost_job_open(struct drm_file *file);
> > +void panfrost_job_close(struct drm_file *file);
> >  int panfrost_job_get_slot(struct panfrost_job *job);
> >  int panfrost_job_push(struct panfrost_job *job);
> >  void panfrost_job_put(struct panfrost_job *job);


Adrian Larumbe