[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b5b84208-e43c-483e-838b-c42375d3bada@linux.intel.com>
Date: Mon, 30 Jun 2025 13:08:32 +0800
From: Baolu Lu <baolu.lu@...ux.intel.com>
To: Xu Yilun <yilun.xu@...ux.intel.com>, jgg@...dia.com, jgg@...pe.ca,
kevin.tian@...el.com, will@...nel.org, aneesh.kumar@...nel.org
Cc: iommu@...ts.linux.dev, linux-kernel@...r.kernel.org, joro@...tes.org,
robin.murphy@....com, shuah@...nel.org, nicolinc@...dia.com, aik@....com,
dan.j.williams@...el.com, yilun.xu@...el.com
Subject: Re: [PATCH v3 2/5] iommufd: Destroy vdevice on idevice destroy
On 6/27/25 11:38, Xu Yilun wrote:
> Destroy iommufd_vdevice (vdev) on iommufd_idevice (idev) destroy so
> that vdev can't outlive idev.
>
> iommufd_device (idev) represents the physical device bound to iommufd,
> while the iommufd_vdevice (vdev) represents the virtual instance of the
> physical device in the VM. The lifecycle of the vdev should not be
> longer than idev. This doesn't cause real problem on existing use cases
> cause vdev doesn't impact the physical device, only provides
> virtualization information. But to extend vdev for Confidential
> Computing (CC), there are needs to do secure configuration for the vdev,
> e.g. TSM Bind/Unbind. These configurations should be rolled back on idev
> destroy, or the external driver (VFIO) functionality may be impact.
>
> Building the association between idev & vdev requires the two objects
> pointing each other, but not referencing each other. This requires
> proper locking. This is done by reviving some of Nicolin's patch [1].
>
> There are 3 cases on idev destroy:
>
> 1. vdev is already destroyed by userspace. No extra handling needed.
> 2. vdev is still alive. Use iommufd_object_tombstone_user() to
> destroy vdev and tombstone the vdev ID.
> 3. vdev is being destroyed by userspace. The vdev ID is already freed,
> but vdev destroy handler is not completed. Destroy the vdev
> immediately. To solve the racing with userspace destruction, make
> iommufd_vdevice_abort() reentrant.
>
> [1]:https://lore.kernel.org/
> all/53025c827c44d68edb6469bfd940a8e8bc6147a5.1729897278.git.nicolinc@...dia.com/
>
> Originally-by: Nicolin Chen<nicolinc@...dia.com>
> Suggested-by: Jason Gunthorpe<jgg@...dia.com>
> Co-developed-by: Aneesh Kumar K.V (Arm)<aneesh.kumar@...nel.org>
> Signed-off-by: Aneesh Kumar K.V (Arm)<aneesh.kumar@...nel.org>
> Signed-off-by: Xu Yilun<yilun.xu@...ux.intel.com>
> ---
> drivers/iommu/iommufd/device.c | 42 +++++++++++++++++++++++
> drivers/iommu/iommufd/iommufd_private.h | 11 +++++++
> drivers/iommu/iommufd/main.c | 1 +
> drivers/iommu/iommufd/viommu.c | 44 +++++++++++++++++++++++--
> 4 files changed, 95 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
> index 86244403b532..0937d4989185 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -137,11 +137,53 @@ static struct iommufd_group *iommufd_get_group(struct iommufd_ctx *ictx,
> }
> }
>
> +static void iommufd_device_remove_vdev(struct iommufd_device *idev)
> +{
> + struct iommufd_vdevice *vdev;
> +
> + mutex_lock(&idev->igroup->lock);
> + /* vdev has been completely destroyed by userspace */
> + if (!idev->vdev)
> + goto out_unlock;
> +
> + vdev = iommufd_get_vdevice(idev->ictx, idev->vdev->obj.id);
> + if (IS_ERR(vdev)) {
> + /*
> + * vdev is removed from xarray by userspace, but is not
> + * destroyed/freed. Since iommufd_vdevice_abort() is reentrant,
> + * safe to destroy vdev here.
> + */
> + iommufd_vdevice_abort(&idev->vdev->obj);
> + goto out_unlock;
> + }
> +
> + /* Should never happen */
> + if (WARN_ON(vdev != idev->vdev)) {
> + iommufd_put_object(idev->ictx, &vdev->obj);
> + goto out_unlock;
> + }
> +
> + /*
> + * vdev is still alive. Hold a users refcount to prevent racing with
> + * userspace destruction, then use iommufd_object_tombstone_user() to
> + * destroy it and leave a tombstone.
> + */
> + refcount_inc(&vdev->obj.users);
> + iommufd_put_object(idev->ictx, &vdev->obj);
> + mutex_unlock(&idev->igroup->lock);
> + iommufd_object_tombstone_user(idev->ictx, &vdev->obj);
> + return;
> +
> +out_unlock:
> + mutex_unlock(&idev->igroup->lock);
> +}
> +
> void iommufd_device_destroy(struct iommufd_object *obj)
> {
> struct iommufd_device *idev =
> container_of(obj, struct iommufd_device, obj);
>
> + iommufd_device_remove_vdev(idev);
> iommu_device_release_dma_owner(idev->dev);
> iommufd_put_group(idev->igroup);
> if (!iommufd_selftest_is_mock_dev(idev->dev))
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index fbc9ef78d81f..f58aa4439c53 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -446,6 +446,7 @@ struct iommufd_device {
> /* always the physical device */
> struct device *dev;
> bool enforce_cache_coherency;
> + struct iommufd_vdevice *vdev;
> };
>
> static inline struct iommufd_device *
> @@ -621,6 +622,7 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
> void iommufd_viommu_destroy(struct iommufd_object *obj);
> int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd);
> void iommufd_vdevice_destroy(struct iommufd_object *obj);
> +void iommufd_vdevice_abort(struct iommufd_object *obj);
>
> struct iommufd_vdevice {
> struct iommufd_object obj;
> @@ -628,8 +630,17 @@ struct iommufd_vdevice {
> struct iommufd_viommu *viommu;
> struct device *dev;
> u64 id; /* per-vIOMMU virtual ID */
> + struct iommufd_device *idev;
> };
>
> +static inline struct iommufd_vdevice *
> +iommufd_get_vdevice(struct iommufd_ctx *ictx, u32 id)
> +{
> + return container_of(iommufd_get_object(ictx, id,
> + IOMMUFD_OBJ_VDEVICE),
> + struct iommufd_vdevice, obj);
> +}
> +
> #ifdef CONFIG_IOMMUFD_TEST
> int iommufd_test(struct iommufd_ucmd *ucmd);
> void iommufd_selftest_destroy(struct iommufd_object *obj);
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 620923669b42..64731b4fdbdf 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -529,6 +529,7 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
> },
> [IOMMUFD_OBJ_VDEVICE] = {
> .destroy = iommufd_vdevice_destroy,
> + .abort = iommufd_vdevice_abort,
> },
> [IOMMUFD_OBJ_VEVENTQ] = {
> .destroy = iommufd_veventq_destroy,
> diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
> index 01df2b985f02..632d1d7b8fd8 100644
> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -84,16 +84,38 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> return rc;
> }
>
> -void iommufd_vdevice_destroy(struct iommufd_object *obj)
> +void iommufd_vdevice_abort(struct iommufd_object *obj)
> {
> struct iommufd_vdevice *vdev =
> container_of(obj, struct iommufd_vdevice, obj);
> struct iommufd_viommu *viommu = vdev->viommu;
> + struct iommufd_device *idev = vdev->idev;
> +
> + lockdep_assert_held(&idev->igroup->lock);
> +
> + /*
> + * iommufd_vdevice_abort() could be reentrant, by
> + * iommufd_device_unbind() or by iommufd_destroy(). Cleanup only once.
> + */
> + if (!viommu)
> + return;
>
> /* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
> xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
> refcount_dec(&viommu->obj.users);
> put_device(vdev->dev);
> + vdev->viommu = NULL;
> + idev->vdev = NULL;
I feel it makes more sense to reorder the operations like this:
vdev->viommu = NULL;
vdev->idev = NULL;
idev->vdev = NULL;
put_device(vdev->dev);
put_device(vdev->dev) could potentially trigger other code paths that
might attempt to reference vdev or its associated pointers. Therefore,
it's safer to null all the relevant reference pointers before calling
put_device().
Others look good to me,
Reviewed-by: Lu Baolu <baolu.lu@...ux.intel.com>
Thanks,
baolu
Powered by blists - more mailing lists