[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251128152403.72aedafa@fedora>
Date: Fri, 28 Nov 2025 15:24:03 +0100
From: Boris Brezillon <boris.brezillon@...labora.com>
To: Alice Ryhl <aliceryhl@...gle.com>
Cc: Danilo Krummrich <dakr@...nel.org>, Daniel Almeida
<daniel.almeida@...labora.com>, Matthew Brost <matthew.brost@...el.com>,
"Thomas Hellström" <thomas.hellstrom@...ux.intel.com>,
Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>, Maxime Ripard
<mripard@...nel.org>, Thomas Zimmermann <tzimmermann@...e.de>, David Airlie
<airlied@...il.com>, Simona Vetter <simona@...ll.ch>, Steven Price
<steven.price@....com>, Liviu Dudau <liviu.dudau@....com>, Miguel Ojeda
<ojeda@...nel.org>, Boqun Feng <boqun.feng@...il.com>, Gary Guo
<gary@...yguo.net>, "Björn Roy Baron"
<bjorn3_gh@...tonmail.com>, Benno Lossin <lossin@...nel.org>, Andreas
Hindborg <a.hindborg@...nel.org>, Trevor Gross <tmgross@...ch.edu>, Frank
Binns <frank.binns@...tec.com>, Matt Coster <matt.coster@...tec.com>, Rob
Clark <robin.clark@....qualcomm.com>, Dmitry Baryshkov <lumag@...nel.org>,
Abhinav Kumar <abhinav.kumar@...ux.dev>, Jessica Zhang
<jessica.zhang@....qualcomm.com>, Sean Paul <sean@...rly.run>, Marijn
Suijten <marijn.suijten@...ainline.org>, Lyude Paul <lyude@...hat.com>,
Lucas De Marchi <lucas.demarchi@...el.com>, Rodrigo Vivi
<rodrigo.vivi@...el.com>, Sumit Semwal <sumit.semwal@...aro.org>,
"Christian König" <christian.koenig@....com>,
dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org,
rust-for-linux@...r.kernel.org, linux-arm-msm@...r.kernel.org,
freedreno@...ts.freedesktop.org, nouveau@...ts.freedesktop.org,
intel-xe@...ts.freedesktop.org, linux-media@...r.kernel.org,
linaro-mm-sig@...ts.linaro.org
Subject: Re: [PATCH 1/4] drm/gpuvm: take GEM lock inside
drm_gpuvm_bo_obtain_prealloc()
On Fri, 28 Nov 2025 14:14:15 +0000
Alice Ryhl <aliceryhl@...gle.com> wrote:
> When calling drm_gpuvm_bo_obtain_prealloc() and using immediate mode,
> this may result in a call to ops->vm_bo_free(vm_bo) while holding the
> GEMs gpuva mutex. This is a problem if ops->vm_bo_free(vm_bo) performs
> any operations that are not safe in the fence signalling critical path,
> and it turns out that Panthor (the only current user of the method)
> calls drm_gem_shmem_unpin() which takes a resv lock internally.
>
> This constitutes both a violation of signalling safety and lock
> inversion. To fix this, we modify the method to internally take the GEMs
> gpuva mutex so that the mutex can be unlocked before freeing the
> preallocated vm_bo.
>
> Note that this modification introduces a requirement that the driver
> uses immediate mode to call drm_gpuvm_bo_obtain_prealloc() as it would
> otherwise take the wrong lock.
>
> Signed-off-by: Alice Ryhl <aliceryhl@...gle.com>
Reviewed-by: Boris Brezillon <boris.brezillon@...labora.com>
Should we add a Fixes tag?
> ---
> drivers/gpu/drm/drm_gpuvm.c | 58 ++++++++++++++++++++++-------------
> drivers/gpu/drm/panthor/panthor_mmu.c | 10 ------
> 2 files changed, 37 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
> index 936e6c1a60c16ed5a6898546bf99e23a74f6b58b..f08a5cc1d611f971862c1272987e5ecd6d97c163 100644
> --- a/drivers/gpu/drm/drm_gpuvm.c
> +++ b/drivers/gpu/drm/drm_gpuvm.c
> @@ -1601,14 +1601,37 @@ drm_gpuvm_bo_create(struct drm_gpuvm *gpuvm,
> }
> EXPORT_SYMBOL_GPL(drm_gpuvm_bo_create);
>
> +static void
> +drm_gpuvm_bo_destroy_not_in_lists(struct drm_gpuvm_bo *vm_bo)
> +{
> + struct drm_gpuvm *gpuvm = vm_bo->vm;
> + const struct drm_gpuvm_ops *ops = gpuvm->ops;
> + struct drm_gem_object *obj = vm_bo->obj;
> +
> + if (ops && ops->vm_bo_free)
> + ops->vm_bo_free(vm_bo);
> + else
> + kfree(vm_bo);
> +
> + drm_gpuvm_put(gpuvm);
> + drm_gem_object_put(obj);
> +}
> +
> +static void
> +drm_gpuvm_bo_destroy_not_in_lists_kref(struct kref *kref)
> +{
> + struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
> + kref);
> +
> + drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> +}
> +
> static void
> drm_gpuvm_bo_destroy(struct kref *kref)
> {
> struct drm_gpuvm_bo *vm_bo = container_of(kref, struct drm_gpuvm_bo,
> kref);
> struct drm_gpuvm *gpuvm = vm_bo->vm;
> - const struct drm_gpuvm_ops *ops = gpuvm->ops;
> - struct drm_gem_object *obj = vm_bo->obj;
> bool lock = !drm_gpuvm_resv_protected(gpuvm);
>
> if (!lock)
> @@ -1617,16 +1640,10 @@ drm_gpuvm_bo_destroy(struct kref *kref)
> drm_gpuvm_bo_list_del(vm_bo, extobj, lock);
> drm_gpuvm_bo_list_del(vm_bo, evict, lock);
>
> - drm_gem_gpuva_assert_lock_held(gpuvm, obj);
> + drm_gem_gpuva_assert_lock_held(gpuvm, vm_bo->obj);
> list_del(&vm_bo->list.entry.gem);
>
> - if (ops && ops->vm_bo_free)
> - ops->vm_bo_free(vm_bo);
> - else
> - kfree(vm_bo);
> -
> - drm_gpuvm_put(gpuvm);
> - drm_gem_object_put(obj);
> + drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> }
>
> /**
> @@ -1744,9 +1761,7 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_put_deferred);
> void
> drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
> {
> - const struct drm_gpuvm_ops *ops = gpuvm->ops;
> struct drm_gpuvm_bo *vm_bo;
> - struct drm_gem_object *obj;
> struct llist_node *bo_defer;
>
> bo_defer = llist_del_all(&gpuvm->bo_defer);
> @@ -1765,14 +1780,7 @@ drm_gpuvm_bo_deferred_cleanup(struct drm_gpuvm *gpuvm)
> while (bo_defer) {
> vm_bo = llist_entry(bo_defer, struct drm_gpuvm_bo, list.entry.bo_defer);
> bo_defer = bo_defer->next;
> - obj = vm_bo->obj;
> - if (ops && ops->vm_bo_free)
> - ops->vm_bo_free(vm_bo);
> - else
> - kfree(vm_bo);
> -
> - drm_gpuvm_put(gpuvm);
> - drm_gem_object_put(obj);
> + drm_gpuvm_bo_destroy_not_in_lists(vm_bo);
> }
> }
> EXPORT_SYMBOL_GPL(drm_gpuvm_bo_deferred_cleanup);
> @@ -1860,6 +1868,9 @@ EXPORT_SYMBOL_GPL(drm_gpuvm_bo_obtain);
> * count is decreased. If not found @__vm_bo is returned without further
> * increase of the reference count.
> *
> + * The provided @__vm_bo must not already be in the gpuva, evict, or extobj
> + * lists prior to calling this method.
> + *
> * A new &drm_gpuvm_bo is added to the GEMs gpuva list.
> *
> * Returns: a pointer to the found &drm_gpuvm_bo or @__vm_bo if no existing
> @@ -1872,14 +1883,19 @@ drm_gpuvm_bo_obtain_prealloc(struct drm_gpuvm_bo *__vm_bo)
> struct drm_gem_object *obj = __vm_bo->obj;
> struct drm_gpuvm_bo *vm_bo;
>
> + drm_WARN_ON(gpuvm->drm, !drm_gpuvm_immediate_mode(gpuvm));
> +
> + mutex_lock(&obj->gpuva.lock);
> vm_bo = drm_gpuvm_bo_find(gpuvm, obj);
> if (vm_bo) {
> - drm_gpuvm_bo_put(__vm_bo);
> + mutex_unlock(&obj->gpuva.lock);
> + kref_put(&__vm_bo->kref, drm_gpuvm_bo_destroy_not_in_lists_kref);
> return vm_bo;
> }
>
> drm_gem_gpuva_assert_lock_held(gpuvm, obj);
> list_add_tail(&__vm_bo->list.entry.gem, &obj->gpuva.list);
> + mutex_unlock(&obj->gpuva.lock);
>
> return __vm_bo;
> }
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 9f5f4ddf291024121f3fd5644f2fdeba354fa67c..be8811a70e1a3adec87ca4a85cad7c838f54bebf 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1224,17 +1224,7 @@ static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
> goto err_cleanup;
> }
>
> - /* drm_gpuvm_bo_obtain_prealloc() will call drm_gpuvm_bo_put() on our
> - * pre-allocated BO if the <BO,VM> association exists. Given we
> - * only have one ref on preallocated_vm_bo, drm_gpuvm_bo_destroy() will
> - * be called immediately, and we have to hold the VM resv lock when
> - * calling this function.
> - */
> - dma_resv_lock(panthor_vm_resv(vm), NULL);
> - mutex_lock(&bo->base.base.gpuva.lock);
> op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
> - mutex_unlock(&bo->base.base.gpuva.lock);
> - dma_resv_unlock(panthor_vm_resv(vm));
>
> op_ctx->map.bo_offset = offset;
>
>
Powered by blists - more mailing lists