[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <yq5a4iw45pyk.fsf@kernel.org>
Date: Wed, 25 Jun 2025 11:21:15 +0530
From: Aneesh Kumar K.V <aneesh.kumar@...nel.org>
To: Xu Yilun <yilun.xu@...ux.intel.com>, jgg@...dia.com, jgg@...pe.ca,
kevin.tian@...el.com, will@...nel.org
Cc: iommu@...ts.linux.dev, linux-kernel@...r.kernel.org, joro@...tes.org,
robin.murphy@....com, shuah@...nel.org, nicolinc@...dia.com,
aik@....com, dan.j.williams@...el.com, baolu.lu@...ux.intel.com,
yilun.xu@...ux.intel.com, yilun.xu@...el.com
Subject: Re: [PATCH v2 1/4] iommufd: Add iommufd_object_tombstone_user() helper
Xu Yilun <yilun.xu@...ux.intel.com> writes:
> Add the iommufd_object_tombstone_user() helper, which allows the caller
> to destroy an iommufd object created by userspace.
>
> This is useful on some destroy paths when the kernel caller finds the
> object should have been removed by userspace but is still alive. With
> this helper, the caller destroys the object but leave the object ID
> reserved (so called tombstone). The tombstone prevents repurposing the
> object ID without awareness from the original user.
>
> Since this happens for abnomal userspace behavior, for simplicity, the
> tombstoned object ID would be permanently leaked until
> iommufd_fops_release(). I.e. the original user gets an error when
> calling ioctl(IOMMU_DESTROY) on that ID.
>
> The first use case would be to ensure the iommufd_vdevice can't outlive
> the associated iommufd_device.
>
> Suggested-by: Jason Gunthorpe <jgg@...dia.com>
> Signed-off-by: Xu Yilun <yilun.xu@...ux.intel.com>
> ---
> drivers/iommu/iommufd/iommufd_private.h | 23 +++++++++++++++++-
> drivers/iommu/iommufd/main.c | 31 ++++++++++++++++++-------
> 2 files changed, 45 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 9ccc83341f32..fbc9ef78d81f 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -187,7 +187,8 @@ void iommufd_object_finalize(struct iommufd_ctx *ictx,
> struct iommufd_object *obj);
>
> enum {
> - REMOVE_WAIT_SHORTTERM = 1,
> + REMOVE_WAIT_SHORTTERM = BIT(0),
> + REMOVE_OBJ_TOMBSTONE = BIT(1),
> };
> int iommufd_object_remove(struct iommufd_ctx *ictx,
> struct iommufd_object *to_destroy, u32 id,
> @@ -213,6 +214,26 @@ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
> WARN_ON(ret);
> }
>
> +/*
> + * Similar to iommufd_object_destroy_user(), except that the object ID is left
> + * reserved/tombstoned.
> + */
> +static inline void iommufd_object_tombstone_user(struct iommufd_ctx *ictx,
> + struct iommufd_object *obj)
> +{
> + int ret;
> +
> + ret = iommufd_object_remove(ictx, obj, obj->id,
> + REMOVE_WAIT_SHORTTERM | REMOVE_OBJ_TOMBSTONE);
> +
> + /*
> + * If there is a bug and we couldn't destroy the object then we did put
> + * back the caller's users refcount and will eventually try to free it
> + * again during close.
> + */
> + WARN_ON(ret);
> +}
> +
> /*
> * The HWPT allocated by autodomains is used in possibly many devices and
> * is automatically destroyed when its refcount reaches zero.
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 3df468f64e7d..5fd75aba068b 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -167,7 +167,7 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
> goto err_xa;
> }
>
> - xas_store(&xas, NULL);
> + xas_store(&xas, (flags & REMOVE_OBJ_TOMBSTONE) ? XA_ZERO_ENTRY : NULL);
> if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
> ictx->vfio_ioas = NULL;
> xa_unlock(&ictx->objects);
> @@ -238,6 +238,7 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
> struct iommufd_ctx *ictx = filp->private_data;
> struct iommufd_sw_msi_map *next;
> struct iommufd_sw_msi_map *cur;
> + XA_STATE(xas, &ictx->objects, 0);
> struct iommufd_object *obj;
>
> /*
> @@ -251,16 +252,30 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
> */
> while (!xa_empty(&ictx->objects)) {
> unsigned int destroyed = 0;
> - unsigned long index;
>
> - xa_for_each(&ictx->objects, index, obj) {
> - if (!refcount_dec_if_one(&obj->users))
> - continue;
> + xas_set(&xas, 0);
> + for (;;) {
> + rcu_read_lock();
> + obj = xas_find(&xas, ULONG_MAX);
> + rcu_read_unlock();
>
What is the need for the rcu_read_lock()?
> +
> + if (!obj)
> + break;
> +
> + if (!xa_is_zero(obj)) {
> + if (!refcount_dec_if_one(&obj->users))
> + continue;
> +
> + iommufd_object_ops[obj->type].destroy(obj);
> + kfree(obj);
> + }
> +
> destroyed++;
> - xa_erase(&ictx->objects, index);
> - iommufd_object_ops[obj->type].destroy(obj);
> - kfree(obj);
> + xas_lock(&xas);
> + xas_store(&xas, NULL);
> + xas_unlock(&xas);
is that xas_lock needed?. we can't run a xarray update parallel to this
because neither iommufd ioctl not vfio cdev unbind can happen in parallel.
I have this as an additonal comment added to the function in my change.
/*
* We don't need additional locks because the iommufd_fops_release() function is
* only triggered when the iommufd descriptor is released. At that point, no
* other iommufd-based ioctl operations can be running concurrently.
*
* The destruction of the vdevice via idevice unbind is also safe:
* iommufd_fops_release() can only be called after the idevice has been unbound.
* The idevice bind operation takes a reference to the iommufd descriptor,
* preventing early release.
*/
> }
> +
> /* Bug related to users refcount */
> if (WARN_ON(!destroyed))
> break;
> --
> 2.25.1
-aneesh
Powered by blists - more mailing lists