[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BN9PR11MB52765B056B71AA14943BE88F8C43A@BN9PR11MB5276.namprd11.prod.outlook.com>
Date: Thu, 3 Jul 2025 04:32:26 +0000
From: "Tian, Kevin" <kevin.tian@...el.com>
To: Jason Gunthorpe <jgg@...dia.com>
CC: Xu Yilun <yilun.xu@...ux.intel.com>, "will@...nel.org" <will@...nel.org>,
"aneesh.kumar@...nel.org" <aneesh.kumar@...nel.org>, "iommu@...ts.linux.dev"
<iommu@...ts.linux.dev>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "joro@...tes.org" <joro@...tes.org>,
"robin.murphy@....com" <robin.murphy@....com>, "shuah@...nel.org"
<shuah@...nel.org>, "nicolinc@...dia.com" <nicolinc@...dia.com>,
"aik@....com" <aik@....com>, "Williams, Dan J" <dan.j.williams@...el.com>,
"baolu.lu@...ux.intel.com" <baolu.lu@...ux.intel.com>, "Xu, Yilun"
<yilun.xu@...el.com>
Subject: RE: [PATCH v3 2/5] iommufd: Destroy vdevice on idevice destroy
> From: Jason Gunthorpe <jgg@...dia.com>
> Sent: Wednesday, July 2, 2025 8:41 PM
>
> On Wed, Jul 02, 2025 at 09:13:50AM +0000, Tian, Kevin wrote:
> > > > Yes, you can't touch idev inside the destroy function at all, under
> > > > any version. idev is only valid if you have a refcount on vdev.
> > > >
> > > > But why are you touching this lock? Arrange things so abort doesn't
> > > > touch the idev??
> > >
> > > idev has a pointer idev->vdev to track the vdev's lifecycle.
> > > idev->igroup->lock protects the pointer. At the end of
> > > iommufd_vdevice_destroy() this pointer should be NULLed so that idev
> > > knows vdev is really destroyed.
>
> Well, that is destroy, not abort, but OK, there is an issue with
> destroy.
>
> > but comparing to that I'd prefer to the original wait approach...
>
> Okay, but lets try to keep the wait hidden inside the refcounting..
>
> The issue here is we don't hold a refcount on idev while working with
> idev. Let's fix that and then things should work properly?
>
> Maybe something like this:
>
> diff --git a/drivers/iommu/iommufd/device.c
> b/drivers/iommu/iommufd/device.c
> index 4e781aa9fc6329..9174fa7c972b80 100644
> --- a/drivers/iommu/iommufd/device.c
> +++ b/drivers/iommu/iommufd/device.c
> @@ -178,12 +178,20 @@ static void iommufd_device_remove_vdev(struct
> iommufd_device *idev)
> mutex_unlock(&idev->igroup->lock);
> }
>
> +void iommufd_device_pre_destroy(struct iommufd_object *obj)
> +{
> + struct iommufd_device *idev =
> + container_of(obj, struct iommufd_device, obj);
> +
> + /* Release the short term users on this */
> + iommufd_device_remove_vdev(idev);
> +}
> +
> void iommufd_device_destroy(struct iommufd_object *obj)
> {
> struct iommufd_device *idev =
> container_of(obj, struct iommufd_device, obj);
>
> - iommufd_device_remove_vdev(idev);
> iommu_device_release_dma_owner(idev->dev);
> iommufd_put_group(idev->igroup);
> if (!iommufd_selftest_is_mock_dev(idev->dev))
> diff --git a/drivers/iommu/iommufd/main.c
> b/drivers/iommu/iommufd/main.c
> index b2e8e106c16158..387c630fdabfbd 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -151,6 +151,9 @@ static int
> iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx,
> if (refcount_dec_and_test(&to_destroy->shortterm_users))
> return 0;
>
> + if (iommufd_object_ops[to_destroy->type].pre_destroy)
> + iommufd_object_ops[to_destroy-
> >type].pre_destroy(to_destroy);
> +
> if (wait_event_timeout(ictx->destroy_wait,
> refcount_read(&to_destroy->shortterm_users)
> == 0,
> msecs_to_jiffies(60000)))
> @@ -567,6 +570,7 @@ static const struct iommufd_object_ops
> iommufd_object_ops[] = {
> .destroy = iommufd_access_destroy_object,
> },
> [IOMMUFD_OBJ_DEVICE] = {
> + .pre_destroy = iommufd_device_pre_destroy,
> .destroy = iommufd_device_destroy,
> },
> [IOMMUFD_OBJ_FAULT] = {
> diff --git a/drivers/iommu/iommufd/viommu.c
> b/drivers/iommu/iommufd/viommu.c
> index 9451a311745f7b..cbf99daa7dc25d 100644
> --- a/drivers/iommu/iommufd/viommu.c
> +++ b/drivers/iommu/iommufd/viommu.c
> @@ -135,6 +135,7 @@ void iommufd_vdevice_destroy(struct
> iommufd_object *obj)
> mutex_lock(&vdev->idev->igroup->lock);
> iommufd_vdevice_abort(obj);
> mutex_unlock(&vdev->idev->igroup->lock);
> + iommufd_put_object(vdev->viommu->ictx, &vdev->idev->obj);
> }
>
> int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
> @@ -180,13 +181,19 @@ int iommufd_vdevice_alloc_ioctl(struct
> iommufd_ucmd *ucmd)
> vdev->id = virt_id;
> vdev->viommu = viommu;
> refcount_inc(&viommu->obj.users);
> +
> + /*
> + * A reference is held on the idev so long as we have a pointer.
> + * iommufd_device_pre_destroy() will revoke it before the real
> + * destruction.
> + */
> + vdev->idev = idev;
> +
> /*
> * iommufd_device_destroy() waits until idev->vdev is NULL before
> * freeing the idev, which only happens once the vdev is finished
> - * destruction. Thus we do not need refcounting on either idev->vdev
> or
> - * vdev->idev.
> + * destruction.
> */
> - vdev->idev = idev;
> idev->vdev = vdev;
>
> curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev,
> GFP_KERNEL);
> @@ -207,7 +214,8 @@ int iommufd_vdevice_alloc_ioctl(struct
> iommufd_ucmd *ucmd)
> out_unlock_igroup:
> mutex_unlock(&idev->igroup->lock);
> out_put_idev:
> - iommufd_put_object(ucmd->ictx, &idev->obj);
> + if (rc)
> + iommufd_put_object(ucmd->ictx, &idev->obj);
but this sounds like a misuse of shortterm_users which is not intended
to be held long beyond ioctl...
Powered by blists - more mailing lists